[AUR-387, AUR-425] Add start-project to CLI (#29)
This commit is contained in:
parent
d83c22aa4e
commit
205955b8a3
|
@ -49,3 +49,4 @@ repos:
|
||||||
- id: mypy
|
- id: mypy
|
||||||
additional_dependencies: [types-PyYAML==6.0.12.11, "types-requests"]
|
additional_dependencies: [types-PyYAML==6.0.12.11, "types-requests"]
|
||||||
args: ["--check-untyped-defs", "--ignore-missing-imports"]
|
args: ["--check-untyped-defs", "--ignore-missing-imports"]
|
||||||
|
exclude: "^templates/"
|
||||||
|
|
|
@ -22,4 +22,4 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
__version__ = "0.0.2"
|
__version__ = "0.0.3"
|
||||||
|
|
|
@ -3,9 +3,6 @@ import os
|
||||||
import click
|
import click
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from kotaemon.contribs.promptui.config import export_pipeline_to_config
|
|
||||||
from kotaemon.contribs.promptui.ui import build_from_dict
|
|
||||||
|
|
||||||
|
|
||||||
# check if the output is not a .yml file -> raise error
|
# check if the output is not a .yml file -> raise error
|
||||||
def check_config_format(config):
|
def check_config_format(config):
|
||||||
|
@ -39,6 +36,8 @@ def export(export_path, output):
|
||||||
|
|
||||||
from theflow.utils.modules import import_dotted_string
|
from theflow.utils.modules import import_dotted_string
|
||||||
|
|
||||||
|
from kotaemon.contribs.promptui.config import export_pipeline_to_config
|
||||||
|
|
||||||
sys.path.append(os.getcwd())
|
sys.path.append(os.getcwd())
|
||||||
cls = import_dotted_string(export_path, safe=False)
|
cls = import_dotted_string(export_path, safe=False)
|
||||||
export_pipeline_to_config(cls, output)
|
export_pipeline_to_config(cls, output)
|
||||||
|
@ -48,9 +47,21 @@ def export(export_path, output):
|
||||||
@promptui.command()
|
@promptui.command()
|
||||||
@click.argument("run_path", required=False, default="promptui.yml")
|
@click.argument("run_path", required=False, default="promptui.yml")
|
||||||
def run(run_path):
|
def run(run_path):
|
||||||
|
|
||||||
|
from kotaemon.contribs.promptui.ui import build_from_dict
|
||||||
|
|
||||||
build_from_dict(run_path)
|
build_from_dict(run_path)
|
||||||
check_config_format(run_path)
|
check_config_format(run_path)
|
||||||
|
|
||||||
|
|
||||||
|
@main.command()
|
||||||
|
def start_project():
|
||||||
|
|
||||||
|
os.system(
|
||||||
|
"cookiecutter https://github.com/Cinnamon/kotaemon.git"
|
||||||
|
"--directory='templates/project-default'"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
6
setup.py
6
setup.py
|
@ -29,18 +29,16 @@ setuptools.setup(
|
||||||
packages=setuptools.find_packages(
|
packages=setuptools.find_packages(
|
||||||
exclude=("tests", "tests.*", "examples", "examples.*")
|
exclude=("tests", "tests.*", "examples", "examples.*")
|
||||||
),
|
),
|
||||||
dependencies=[
|
|
||||||
"click >= 8.1.7",
|
|
||||||
],
|
|
||||||
install_requires=[
|
install_requires=[
|
||||||
"farm-haystack==1.19.0",
|
"farm-haystack==1.19.0",
|
||||||
"langchain",
|
"langchain",
|
||||||
"theflow",
|
"theflow",
|
||||||
"llama-index",
|
"llama-index",
|
||||||
"llama-hub",
|
"llama-hub",
|
||||||
"nltk",
|
|
||||||
"gradio",
|
"gradio",
|
||||||
"openpyxl",
|
"openpyxl",
|
||||||
|
"cookiecutter",
|
||||||
|
"click",
|
||||||
],
|
],
|
||||||
extras_require={
|
extras_require={
|
||||||
"dev": [
|
"dev": [
|
||||||
|
|
4
templates/project-default/cookiecutter.json
Normal file
4
templates/project-default/cookiecutter.json
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
{
|
||||||
|
"project_name": "prj_kotaemon",
|
||||||
|
"ptl": "john"
|
||||||
|
}
|
23
templates/project-default/{{cookiecutter.project_name}}/.gitattributes
vendored
Normal file
23
templates/project-default/{{cookiecutter.project_name}}/.gitattributes
vendored
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
.gitattributes text eol=lf
|
||||||
|
.gitignore text eol=lf
|
||||||
|
*.build text eol=lf
|
||||||
|
*.c text eol=lf
|
||||||
|
*.cmake text eol=lf
|
||||||
|
*.cpp text eol=lf
|
||||||
|
*.csv text eol=lf
|
||||||
|
*.f text eol=lf
|
||||||
|
*.f90 text eol=lf
|
||||||
|
*.for text eol=lf
|
||||||
|
*.grc text eol=lf
|
||||||
|
*.h text eol=lf
|
||||||
|
*.ipynb text eol=lf
|
||||||
|
*.m text eol=lf
|
||||||
|
*.md text eol=lf
|
||||||
|
*.pas text eol=lf
|
||||||
|
*.py text eol=lf
|
||||||
|
*.rst text eol=lf
|
||||||
|
*.sh text eol=lf
|
||||||
|
*.txt text eol=lf
|
||||||
|
*.yml text eol=lf
|
||||||
|
Makefile text eol=lf
|
||||||
|
*.html linguist-documentation
|
459
templates/project-default/{{cookiecutter.project_name}}/.gitignore
vendored
Normal file
459
templates/project-default/{{cookiecutter.project_name}}/.gitignore
vendored
Normal file
|
@ -0,0 +1,459 @@
|
||||||
|
# Created by https://www.toptal.com/developers/gitignore/api/python,linux,macos,windows,vim,emacs,visualstudiocode,pycharm
|
||||||
|
# Edit at https://www.toptal.com/developers/gitignore?templates=python,linux,macos,windows,vim,emacs,visualstudiocode,pycharm
|
||||||
|
|
||||||
|
### Emacs ###
|
||||||
|
# -*- mode: gitignore; -*-
|
||||||
|
*~
|
||||||
|
\#*\#
|
||||||
|
/.emacs.desktop
|
||||||
|
/.emacs.desktop.lock
|
||||||
|
*.elc
|
||||||
|
auto-save-list
|
||||||
|
tramp
|
||||||
|
.\#*
|
||||||
|
|
||||||
|
# Org-mode
|
||||||
|
.org-id-locations
|
||||||
|
*_archive
|
||||||
|
|
||||||
|
# flymake-mode
|
||||||
|
*_flymake.*
|
||||||
|
|
||||||
|
# eshell files
|
||||||
|
/eshell/history
|
||||||
|
/eshell/lastdir
|
||||||
|
|
||||||
|
# elpa packages
|
||||||
|
/elpa/
|
||||||
|
|
||||||
|
# reftex files
|
||||||
|
*.rel
|
||||||
|
|
||||||
|
# AUCTeX auto folder
|
||||||
|
/auto/
|
||||||
|
|
||||||
|
# cask packages
|
||||||
|
.cask/
|
||||||
|
dist/
|
||||||
|
|
||||||
|
# Flycheck
|
||||||
|
flycheck_*.el
|
||||||
|
|
||||||
|
# server auth directory
|
||||||
|
/server/
|
||||||
|
|
||||||
|
# projectiles files
|
||||||
|
.projectile
|
||||||
|
|
||||||
|
# directory configuration
|
||||||
|
.dir-locals.el
|
||||||
|
|
||||||
|
# network security
|
||||||
|
/network-security.data
|
||||||
|
|
||||||
|
### Linux ###
|
||||||
|
|
||||||
|
# temporary files which can be created if a process still has a handle open of a deleted file
|
||||||
|
.fuse_hidden*
|
||||||
|
|
||||||
|
# KDE directory preferences
|
||||||
|
.directory
|
||||||
|
|
||||||
|
# Linux trash folder which might appear on any partition or disk
|
||||||
|
.Trash-*
|
||||||
|
|
||||||
|
# .nfs files are created when an open file is removed but is still being accessed
|
||||||
|
.nfs*
|
||||||
|
|
||||||
|
### macOS ###
|
||||||
|
# General
|
||||||
|
.DS_Store
|
||||||
|
.AppleDouble
|
||||||
|
.LSOverride
|
||||||
|
|
||||||
|
# Icon must end with two \r
|
||||||
|
Icon
|
||||||
|
|
||||||
|
# Thumbnails
|
||||||
|
._*
|
||||||
|
|
||||||
|
# Files that might appear in the root of a volume
|
||||||
|
.DocumentRevisions-V100
|
||||||
|
.fseventsd
|
||||||
|
.Spotlight-V100
|
||||||
|
.TemporaryItems
|
||||||
|
.Trashes
|
||||||
|
.VolumeIcon.icns
|
||||||
|
.com.apple.timemachine.donotpresent
|
||||||
|
|
||||||
|
# Directories potentially created on remote AFP share
|
||||||
|
.AppleDB
|
||||||
|
.AppleDesktop
|
||||||
|
Network Trash Folder
|
||||||
|
Temporary Items
|
||||||
|
.apdisk
|
||||||
|
|
||||||
|
### macOS Patch ###
|
||||||
|
# iCloud generated files
|
||||||
|
*.icloud
|
||||||
|
|
||||||
|
### PyCharm ###
|
||||||
|
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
|
||||||
|
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
||||||
|
|
||||||
|
# User-specific stuff
|
||||||
|
.idea/**/workspace.xml
|
||||||
|
.idea/**/tasks.xml
|
||||||
|
.idea/**/usage.statistics.xml
|
||||||
|
.idea/**/dictionaries
|
||||||
|
.idea/**/shelf
|
||||||
|
|
||||||
|
# AWS User-specific
|
||||||
|
.idea/**/aws.xml
|
||||||
|
|
||||||
|
# Generated files
|
||||||
|
.idea/**/contentModel.xml
|
||||||
|
|
||||||
|
# Sensitive or high-churn files
|
||||||
|
.idea/**/dataSources/
|
||||||
|
.idea/**/dataSources.ids
|
||||||
|
.idea/**/dataSources.local.xml
|
||||||
|
.idea/**/sqlDataSources.xml
|
||||||
|
.idea/**/dynamic.xml
|
||||||
|
.idea/**/uiDesigner.xml
|
||||||
|
.idea/**/dbnavigator.xml
|
||||||
|
|
||||||
|
# Gradle
|
||||||
|
.idea/**/gradle.xml
|
||||||
|
.idea/**/libraries
|
||||||
|
|
||||||
|
# Gradle and Maven with auto-import
|
||||||
|
# When using Gradle or Maven with auto-import, you should exclude module files,
|
||||||
|
# since they will be recreated, and may cause churn. Uncomment if using
|
||||||
|
# auto-import.
|
||||||
|
# .idea/artifacts
|
||||||
|
# .idea/compiler.xml
|
||||||
|
# .idea/jarRepositories.xml
|
||||||
|
# .idea/modules.xml
|
||||||
|
# .idea/*.iml
|
||||||
|
# .idea/modules
|
||||||
|
# *.iml
|
||||||
|
# *.ipr
|
||||||
|
|
||||||
|
# CMake
|
||||||
|
cmake-build-*/
|
||||||
|
|
||||||
|
# Mongo Explorer plugin
|
||||||
|
.idea/**/mongoSettings.xml
|
||||||
|
|
||||||
|
# File-based project format
|
||||||
|
*.iws
|
||||||
|
|
||||||
|
# IntelliJ
|
||||||
|
out/
|
||||||
|
|
||||||
|
# mpeltonen/sbt-idea plugin
|
||||||
|
.idea_modules/
|
||||||
|
|
||||||
|
# JIRA plugin
|
||||||
|
atlassian-ide-plugin.xml
|
||||||
|
|
||||||
|
# Cursive Clojure plugin
|
||||||
|
.idea/replstate.xml
|
||||||
|
|
||||||
|
# SonarLint plugin
|
||||||
|
.idea/sonarlint/
|
||||||
|
|
||||||
|
# Crashlytics plugin (for Android Studio and IntelliJ)
|
||||||
|
com_crashlytics_export_strings.xml
|
||||||
|
crashlytics.properties
|
||||||
|
crashlytics-build.properties
|
||||||
|
fabric.properties
|
||||||
|
|
||||||
|
# Editor-based Rest Client
|
||||||
|
.idea/httpRequests
|
||||||
|
|
||||||
|
# Android studio 3.1+ serialized cache file
|
||||||
|
.idea/caches/build_file_checksums.ser
|
||||||
|
|
||||||
|
### PyCharm Patch ###
|
||||||
|
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
|
||||||
|
|
||||||
|
# *.iml
|
||||||
|
# modules.xml
|
||||||
|
# .idea/misc.xml
|
||||||
|
# *.ipr
|
||||||
|
|
||||||
|
# Sonarlint plugin
|
||||||
|
# https://plugins.jetbrains.com/plugin/7973-sonarlint
|
||||||
|
.idea/**/sonarlint/
|
||||||
|
|
||||||
|
# SonarQube Plugin
|
||||||
|
# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
|
||||||
|
.idea/**/sonarIssues.xml
|
||||||
|
|
||||||
|
# Markdown Navigator plugin
|
||||||
|
# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
|
||||||
|
.idea/**/markdown-navigator.xml
|
||||||
|
.idea/**/markdown-navigator-enh.xml
|
||||||
|
.idea/**/markdown-navigator/
|
||||||
|
|
||||||
|
# Cache file creation bug
|
||||||
|
# See https://youtrack.jetbrains.com/issue/JBR-2257
|
||||||
|
.idea/$CACHE_FILE$
|
||||||
|
|
||||||
|
# CodeStream plugin
|
||||||
|
# https://plugins.jetbrains.com/plugin/12206-codestream
|
||||||
|
.idea/codestream.xml
|
||||||
|
|
||||||
|
# Azure Toolkit for IntelliJ plugin
|
||||||
|
# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
|
||||||
|
.idea/**/azureSettings.xml
|
||||||
|
|
||||||
|
### Python ###
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
#poetry.lock
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
#pdm.lock
|
||||||
|
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||||
|
# in version control.
|
||||||
|
# https://pdm.fming.dev/#use-with-ide
|
||||||
|
.pdm.toml
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
#.idea/
|
||||||
|
|
||||||
|
### Python Patch ###
|
||||||
|
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
|
||||||
|
poetry.toml
|
||||||
|
|
||||||
|
# ruff
|
||||||
|
.ruff_cache/
|
||||||
|
|
||||||
|
# LSP config files
|
||||||
|
pyrightconfig.json
|
||||||
|
|
||||||
|
### Vim ###
|
||||||
|
# Swap
|
||||||
|
[._]*.s[a-v][a-z]
|
||||||
|
!*.svg # comment out if you don't need vector files
|
||||||
|
[._]*.sw[a-p]
|
||||||
|
[._]s[a-rt-v][a-z]
|
||||||
|
[._]ss[a-gi-z]
|
||||||
|
[._]sw[a-p]
|
||||||
|
|
||||||
|
# Session
|
||||||
|
Session.vim
|
||||||
|
Sessionx.vim
|
||||||
|
|
||||||
|
# Temporary
|
||||||
|
.netrwhist
|
||||||
|
# Auto-generated tag files
|
||||||
|
tags
|
||||||
|
# Persistent undo
|
||||||
|
[._]*.un~
|
||||||
|
|
||||||
|
### VisualStudioCode ###
|
||||||
|
.vscode/*
|
||||||
|
!.vscode/settings.json
|
||||||
|
!.vscode/tasks.json
|
||||||
|
!.vscode/launch.json
|
||||||
|
!.vscode/extensions.json
|
||||||
|
!.vscode/*.code-snippets
|
||||||
|
|
||||||
|
# Local History for Visual Studio Code
|
||||||
|
.history/
|
||||||
|
|
||||||
|
# Built Visual Studio Code Extensions
|
||||||
|
*.vsix
|
||||||
|
|
||||||
|
### VisualStudioCode Patch ###
|
||||||
|
# Ignore all local history of files
|
||||||
|
.history
|
||||||
|
.ionide
|
||||||
|
|
||||||
|
### Windows ###
|
||||||
|
# Windows thumbnail cache files
|
||||||
|
Thumbs.db
|
||||||
|
Thumbs.db:encryptable
|
||||||
|
ehthumbs.db
|
||||||
|
ehthumbs_vista.db
|
||||||
|
|
||||||
|
# Dump file
|
||||||
|
*.stackdump
|
||||||
|
|
||||||
|
# Folder config file
|
||||||
|
[Dd]esktop.ini
|
||||||
|
|
||||||
|
# Recycle Bin used on file shares
|
||||||
|
$RECYCLE.BIN/
|
||||||
|
|
||||||
|
# Windows Installer files
|
||||||
|
*.cab
|
||||||
|
*.msi
|
||||||
|
*.msix
|
||||||
|
*.msm
|
||||||
|
*.msp
|
||||||
|
|
||||||
|
# Windows shortcuts
|
||||||
|
*.lnk
|
||||||
|
|
||||||
|
.theflow/
|
||||||
|
|
||||||
|
# End of https://www.toptal.com/developers/gitignore/api/python,linux,macos,windows,vim,emacs,visualstudiocode,pycharm
|
||||||
|
|
||||||
|
logs/
|
||||||
|
.gitsecret/keys/random_seed
|
||||||
|
!*.secret
|
||||||
|
credentials.txt
|
||||||
|
|
||||||
|
S.gpg-agent*
|
||||||
|
.vscode/settings.json
|
|
@ -0,0 +1,51 @@
|
||||||
|
repos:
|
||||||
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
|
rev: v4.3.0
|
||||||
|
hooks:
|
||||||
|
- id: check-yaml
|
||||||
|
- id: check-toml
|
||||||
|
- id: end-of-file-fixer
|
||||||
|
- id: trailing-whitespace
|
||||||
|
- id: detect-aws-credentials
|
||||||
|
args: ["--allow-missing-credentials"]
|
||||||
|
- id: detect-private-key
|
||||||
|
- id: check-added-large-files
|
||||||
|
- repo: https://github.com/ambv/black
|
||||||
|
rev: 22.3.0
|
||||||
|
hooks:
|
||||||
|
- id: black
|
||||||
|
language_version: python3
|
||||||
|
- repo: https://github.com/pycqa/isort
|
||||||
|
rev: 5.12.0
|
||||||
|
hooks:
|
||||||
|
- id: isort
|
||||||
|
args: ["--profile", "black"]
|
||||||
|
language_version: python3.10
|
||||||
|
- repo: https://github.com/pycqa/flake8
|
||||||
|
rev: 4.0.1
|
||||||
|
hooks:
|
||||||
|
- id: flake8
|
||||||
|
args: ["--max-line-length", "88", "--extend-ignore", "E203"]
|
||||||
|
- repo: https://github.com/myint/autoflake
|
||||||
|
rev: v1.4
|
||||||
|
hooks:
|
||||||
|
- id: autoflake
|
||||||
|
args:
|
||||||
|
[
|
||||||
|
"--in-place",
|
||||||
|
"--remove-unused-variables",
|
||||||
|
"--remove-all-unused-imports",
|
||||||
|
"--ignore-init-module-imports",
|
||||||
|
"--exclude=tests/*",
|
||||||
|
]
|
||||||
|
- repo: https://github.com/pre-commit/mirrors-prettier
|
||||||
|
rev: v2.7.1
|
||||||
|
hooks:
|
||||||
|
- id: prettier
|
||||||
|
types_or: [markdown, yaml]
|
||||||
|
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||||
|
rev: "v1.5.1"
|
||||||
|
hooks:
|
||||||
|
- id: mypy
|
||||||
|
additional_dependencies: [types-PyYAML==6.0.12.11, "types-requests"]
|
||||||
|
args: ["--check-untyped-defs", "--ignore-missing-imports"]
|
|
@ -0,0 +1,37 @@
|
||||||
|
<div align="center">
|
||||||
|
|
||||||
|
# Project {{ cookiecutter.project_name }}
|
||||||
|
|
||||||
|
[](https://github.com/Cinnamon/kotaemon)
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
# Install
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create new conda env (optional)
|
||||||
|
conda create -n {{ cookiecutter.project_name }} python=3.10
|
||||||
|
conda activate {{ cookiecutter.project_name }}
|
||||||
|
|
||||||
|
# Clone and install the project
|
||||||
|
git clone "<{{ cookiecutter.project_name }}-repo>"
|
||||||
|
cd "<{{ cookiecutter.project_name }}-repo>"
|
||||||
|
pip install -e .
|
||||||
|
|
||||||
|
# Generate the project structure
|
||||||
|
cd ..
|
||||||
|
kh start-project
|
||||||
|
```
|
||||||
|
|
||||||
|
# Usage
|
||||||
|
|
||||||
|
- Build the pipeline in `pipeline.py`
|
||||||
|
|
||||||
|
For supported utilities and tools, refer: https://github.com/Cinnamon/kotaemon/wiki/Utilities
|
||||||
|
|
||||||
|
# Contribute
|
||||||
|
|
||||||
|
- For project issues and errors, please report in this repo issues.
|
||||||
|
- For kotaemon issues and errors, please report or make PR fixes in https://github.com/Cinnamon/kotaemon.git
|
||||||
|
- If the template for this project has issues and errors, please report or make
|
||||||
|
PR fixes in https://github.com/Cinnamon/kotaemon/tree/main/templates/project-default
|
|
@ -0,0 +1,20 @@
|
||||||
|
import setuptools
|
||||||
|
|
||||||
|
setuptools.setup(
|
||||||
|
name="{{ cookiecutter.project_name }}",
|
||||||
|
version="0.0.1",
|
||||||
|
author="{{ cookiecutter.ptl }}",
|
||||||
|
author_email="{{ cookiecutter.ptl }}@cinnamon.is",
|
||||||
|
description="Project {{ cookiecutter.project_name }}",
|
||||||
|
long_description="Project {{ cookiecutter.project_name }}",
|
||||||
|
url="https://github.com/Cinnamon/kotaemon",
|
||||||
|
python_requires=">=3",
|
||||||
|
classifiers=[
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"License :: OSI Approved :: MIT License",
|
||||||
|
"Operating System :: OS Independent",
|
||||||
|
],
|
||||||
|
install_requires=[
|
||||||
|
"kotaemon@git+ssh://git@github.com/Cinnamon/kotaemon.git",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,106 @@
|
||||||
|
import os
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from theflow import Node, Param
|
||||||
|
|
||||||
|
from kotaemon.base import BaseComponent
|
||||||
|
from kotaemon.docstores import InMemoryDocumentStore
|
||||||
|
from kotaemon.embeddings import AzureOpenAIEmbeddings
|
||||||
|
from kotaemon.llms.completions.openai import AzureOpenAI
|
||||||
|
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
|
||||||
|
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
|
||||||
|
from kotaemon.vectorstores import ChromaVectorStore
|
||||||
|
|
||||||
|
|
||||||
|
class QuestionAnsweringPipeline(BaseComponent):
|
||||||
|
vectorstore_path: str = str("./tmp")
|
||||||
|
retrieval_top_k: int = 1
|
||||||
|
openai_api_key: str = os.environ.get("OPENAI_API_KEY", "")
|
||||||
|
|
||||||
|
@Node.decorate(depends_on="openai_api_key")
|
||||||
|
def llm(self):
|
||||||
|
return AzureOpenAI(
|
||||||
|
openai_api_base="https://bleh-dummy-2.openai.azure.com/",
|
||||||
|
openai_api_key=self.openai_api_key,
|
||||||
|
openai_api_version="2023-03-15-preview",
|
||||||
|
deployment_name="dummy-q2-gpt35",
|
||||||
|
temperature=0,
|
||||||
|
request_timeout=60,
|
||||||
|
)
|
||||||
|
|
||||||
|
@Node.decorate(depends_on=["vectorstore_path", "openai_api_key"])
|
||||||
|
def retrieving_pipeline(self):
|
||||||
|
vector_store = ChromaVectorStore(self.vectorstore_path)
|
||||||
|
embedding = AzureOpenAIEmbeddings(
|
||||||
|
model="text-embedding-ada-002",
|
||||||
|
deployment="dummy-q2-text-embedding",
|
||||||
|
openai_api_base="https://bleh-dummy-2.openai.azure.com/",
|
||||||
|
openai_api_key=self.openai_api_key,
|
||||||
|
)
|
||||||
|
|
||||||
|
return RetrieveDocumentFromVectorStorePipeline(
|
||||||
|
vector_store=vector_store,
|
||||||
|
embedding=embedding,
|
||||||
|
)
|
||||||
|
|
||||||
|
def run_raw(self, text: str) -> str:
|
||||||
|
# reload the document store, in case it has been updated
|
||||||
|
doc_store = InMemoryDocumentStore()
|
||||||
|
doc_store.load("docstore.json")
|
||||||
|
self.retrieving_pipeline.doc_store = doc_store
|
||||||
|
|
||||||
|
# retrieve relevant documents as context
|
||||||
|
matched_texts: List[str] = [
|
||||||
|
_.text
|
||||||
|
for _ in self.retrieving_pipeline(text, top_k=int(self.retrieval_top_k))
|
||||||
|
]
|
||||||
|
context = "\n".join(matched_texts)
|
||||||
|
|
||||||
|
# generate the answer
|
||||||
|
prompt = f'Answer the following question: "{text}". The context is: \n{context}'
|
||||||
|
self.log_progress(".prompt", prompt=prompt)
|
||||||
|
|
||||||
|
return self.llm(prompt).text[0]
|
||||||
|
|
||||||
|
|
||||||
|
class IndexingPipeline(IndexVectorStoreFromDocumentPipeline):
|
||||||
|
# Expose variables for users to switch in prompt ui
|
||||||
|
vectorstore_path: str = str("./tmp")
|
||||||
|
embedding_model: str = "text-embedding-ada-002"
|
||||||
|
deployment: str = "dummy-q2-text-embedding"
|
||||||
|
openai_api_base: str = "https://bleh-dummy-2.openai.azure.com/"
|
||||||
|
openai_api_key: str = os.environ.get("OPENAI_API_KEY", "")
|
||||||
|
|
||||||
|
@Param.decorate(depends_on=["vectorstore_path"])
|
||||||
|
def vector_store(self):
|
||||||
|
return ChromaVectorStore(self.vectorstore_path)
|
||||||
|
|
||||||
|
@Param.decorate()
|
||||||
|
def doc_store(self):
|
||||||
|
doc_store = InMemoryDocumentStore()
|
||||||
|
if os.path.isfile("docstore.json"):
|
||||||
|
doc_store.load("docstore.json")
|
||||||
|
return doc_store
|
||||||
|
|
||||||
|
@Node.decorate(depends_on=["vector_store"])
|
||||||
|
def embedding(self):
|
||||||
|
return AzureOpenAIEmbeddings(
|
||||||
|
model="text-embedding-ada-002",
|
||||||
|
deployment=self.deployment,
|
||||||
|
openai_api_base=self.openai_api_base,
|
||||||
|
openai_api_key=self.openai_api_key,
|
||||||
|
)
|
||||||
|
|
||||||
|
def run_raw(self, text: str) -> int: # type: ignore
|
||||||
|
"""Normally, this indexing pipeline returns nothing. For demonstration,
|
||||||
|
we want it to return something, so let's return the number of documents
|
||||||
|
in the vector store
|
||||||
|
"""
|
||||||
|
super().run_raw(text)
|
||||||
|
|
||||||
|
if self.doc_store is not None:
|
||||||
|
# persist to local anytime an indexing is created
|
||||||
|
# this can be bypassed when we have a FileDocucmentStore
|
||||||
|
self.doc_store.save("docstore.json")
|
||||||
|
|
||||||
|
return self.vector_store._collection.count()
|
Loading…
Reference in New Issue
Block a user