[AUR-387, AUR-425] Add start-project to CLI (#29)
This commit is contained in:
parent
d83c22aa4e
commit
205955b8a3
|
@ -49,3 +49,4 @@ repos:
|
|||
- id: mypy
|
||||
additional_dependencies: [types-PyYAML==6.0.12.11, "types-requests"]
|
||||
args: ["--check-untyped-defs", "--ignore-missing-imports"]
|
||||
exclude: "^templates/"
|
||||
|
|
|
@ -22,4 +22,4 @@ try:
|
|||
except ImportError:
|
||||
pass
|
||||
|
||||
__version__ = "0.0.2"
|
||||
__version__ = "0.0.3"
|
||||
|
|
|
@ -3,9 +3,6 @@ import os
|
|||
import click
|
||||
import yaml
|
||||
|
||||
from kotaemon.contribs.promptui.config import export_pipeline_to_config
|
||||
from kotaemon.contribs.promptui.ui import build_from_dict
|
||||
|
||||
|
||||
# check if the output is not a .yml file -> raise error
|
||||
def check_config_format(config):
|
||||
|
@ -39,6 +36,8 @@ def export(export_path, output):
|
|||
|
||||
from theflow.utils.modules import import_dotted_string
|
||||
|
||||
from kotaemon.contribs.promptui.config import export_pipeline_to_config
|
||||
|
||||
sys.path.append(os.getcwd())
|
||||
cls = import_dotted_string(export_path, safe=False)
|
||||
export_pipeline_to_config(cls, output)
|
||||
|
@ -48,9 +47,21 @@ def export(export_path, output):
|
|||
@promptui.command()
|
||||
@click.argument("run_path", required=False, default="promptui.yml")
|
||||
def run(run_path):
|
||||
|
||||
from kotaemon.contribs.promptui.ui import build_from_dict
|
||||
|
||||
build_from_dict(run_path)
|
||||
check_config_format(run_path)
|
||||
|
||||
|
||||
@main.command()
|
||||
def start_project():
|
||||
|
||||
os.system(
|
||||
"cookiecutter https://github.com/Cinnamon/kotaemon.git"
|
||||
"--directory='templates/project-default'"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
6
setup.py
6
setup.py
|
@ -29,18 +29,16 @@ setuptools.setup(
|
|||
packages=setuptools.find_packages(
|
||||
exclude=("tests", "tests.*", "examples", "examples.*")
|
||||
),
|
||||
dependencies=[
|
||||
"click >= 8.1.7",
|
||||
],
|
||||
install_requires=[
|
||||
"farm-haystack==1.19.0",
|
||||
"langchain",
|
||||
"theflow",
|
||||
"llama-index",
|
||||
"llama-hub",
|
||||
"nltk",
|
||||
"gradio",
|
||||
"openpyxl",
|
||||
"cookiecutter",
|
||||
"click",
|
||||
],
|
||||
extras_require={
|
||||
"dev": [
|
||||
|
|
4
templates/project-default/cookiecutter.json
Normal file
4
templates/project-default/cookiecutter.json
Normal file
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"project_name": "prj_kotaemon",
|
||||
"ptl": "john"
|
||||
}
|
23
templates/project-default/{{cookiecutter.project_name}}/.gitattributes
vendored
Normal file
23
templates/project-default/{{cookiecutter.project_name}}/.gitattributes
vendored
Normal file
|
@ -0,0 +1,23 @@
|
|||
.gitattributes text eol=lf
|
||||
.gitignore text eol=lf
|
||||
*.build text eol=lf
|
||||
*.c text eol=lf
|
||||
*.cmake text eol=lf
|
||||
*.cpp text eol=lf
|
||||
*.csv text eol=lf
|
||||
*.f text eol=lf
|
||||
*.f90 text eol=lf
|
||||
*.for text eol=lf
|
||||
*.grc text eol=lf
|
||||
*.h text eol=lf
|
||||
*.ipynb text eol=lf
|
||||
*.m text eol=lf
|
||||
*.md text eol=lf
|
||||
*.pas text eol=lf
|
||||
*.py text eol=lf
|
||||
*.rst text eol=lf
|
||||
*.sh text eol=lf
|
||||
*.txt text eol=lf
|
||||
*.yml text eol=lf
|
||||
Makefile text eol=lf
|
||||
*.html linguist-documentation
|
459
templates/project-default/{{cookiecutter.project_name}}/.gitignore
vendored
Normal file
459
templates/project-default/{{cookiecutter.project_name}}/.gitignore
vendored
Normal file
|
@ -0,0 +1,459 @@
|
|||
# Created by https://www.toptal.com/developers/gitignore/api/python,linux,macos,windows,vim,emacs,visualstudiocode,pycharm
|
||||
# Edit at https://www.toptal.com/developers/gitignore?templates=python,linux,macos,windows,vim,emacs,visualstudiocode,pycharm
|
||||
|
||||
### Emacs ###
|
||||
# -*- mode: gitignore; -*-
|
||||
*~
|
||||
\#*\#
|
||||
/.emacs.desktop
|
||||
/.emacs.desktop.lock
|
||||
*.elc
|
||||
auto-save-list
|
||||
tramp
|
||||
.\#*
|
||||
|
||||
# Org-mode
|
||||
.org-id-locations
|
||||
*_archive
|
||||
|
||||
# flymake-mode
|
||||
*_flymake.*
|
||||
|
||||
# eshell files
|
||||
/eshell/history
|
||||
/eshell/lastdir
|
||||
|
||||
# elpa packages
|
||||
/elpa/
|
||||
|
||||
# reftex files
|
||||
*.rel
|
||||
|
||||
# AUCTeX auto folder
|
||||
/auto/
|
||||
|
||||
# cask packages
|
||||
.cask/
|
||||
dist/
|
||||
|
||||
# Flycheck
|
||||
flycheck_*.el
|
||||
|
||||
# server auth directory
|
||||
/server/
|
||||
|
||||
# projectiles files
|
||||
.projectile
|
||||
|
||||
# directory configuration
|
||||
.dir-locals.el
|
||||
|
||||
# network security
|
||||
/network-security.data
|
||||
|
||||
### Linux ###
|
||||
|
||||
# temporary files which can be created if a process still has a handle open of a deleted file
|
||||
.fuse_hidden*
|
||||
|
||||
# KDE directory preferences
|
||||
.directory
|
||||
|
||||
# Linux trash folder which might appear on any partition or disk
|
||||
.Trash-*
|
||||
|
||||
# .nfs files are created when an open file is removed but is still being accessed
|
||||
.nfs*
|
||||
|
||||
### macOS ###
|
||||
# General
|
||||
.DS_Store
|
||||
.AppleDouble
|
||||
.LSOverride
|
||||
|
||||
# Icon must end with two \r
|
||||
Icon
|
||||
|
||||
# Thumbnails
|
||||
._*
|
||||
|
||||
# Files that might appear in the root of a volume
|
||||
.DocumentRevisions-V100
|
||||
.fseventsd
|
||||
.Spotlight-V100
|
||||
.TemporaryItems
|
||||
.Trashes
|
||||
.VolumeIcon.icns
|
||||
.com.apple.timemachine.donotpresent
|
||||
|
||||
# Directories potentially created on remote AFP share
|
||||
.AppleDB
|
||||
.AppleDesktop
|
||||
Network Trash Folder
|
||||
Temporary Items
|
||||
.apdisk
|
||||
|
||||
### macOS Patch ###
|
||||
# iCloud generated files
|
||||
*.icloud
|
||||
|
||||
### PyCharm ###
|
||||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
|
||||
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
||||
|
||||
# User-specific stuff
|
||||
.idea/**/workspace.xml
|
||||
.idea/**/tasks.xml
|
||||
.idea/**/usage.statistics.xml
|
||||
.idea/**/dictionaries
|
||||
.idea/**/shelf
|
||||
|
||||
# AWS User-specific
|
||||
.idea/**/aws.xml
|
||||
|
||||
# Generated files
|
||||
.idea/**/contentModel.xml
|
||||
|
||||
# Sensitive or high-churn files
|
||||
.idea/**/dataSources/
|
||||
.idea/**/dataSources.ids
|
||||
.idea/**/dataSources.local.xml
|
||||
.idea/**/sqlDataSources.xml
|
||||
.idea/**/dynamic.xml
|
||||
.idea/**/uiDesigner.xml
|
||||
.idea/**/dbnavigator.xml
|
||||
|
||||
# Gradle
|
||||
.idea/**/gradle.xml
|
||||
.idea/**/libraries
|
||||
|
||||
# Gradle and Maven with auto-import
|
||||
# When using Gradle or Maven with auto-import, you should exclude module files,
|
||||
# since they will be recreated, and may cause churn. Uncomment if using
|
||||
# auto-import.
|
||||
# .idea/artifacts
|
||||
# .idea/compiler.xml
|
||||
# .idea/jarRepositories.xml
|
||||
# .idea/modules.xml
|
||||
# .idea/*.iml
|
||||
# .idea/modules
|
||||
# *.iml
|
||||
# *.ipr
|
||||
|
||||
# CMake
|
||||
cmake-build-*/
|
||||
|
||||
# Mongo Explorer plugin
|
||||
.idea/**/mongoSettings.xml
|
||||
|
||||
# File-based project format
|
||||
*.iws
|
||||
|
||||
# IntelliJ
|
||||
out/
|
||||
|
||||
# mpeltonen/sbt-idea plugin
|
||||
.idea_modules/
|
||||
|
||||
# JIRA plugin
|
||||
atlassian-ide-plugin.xml
|
||||
|
||||
# Cursive Clojure plugin
|
||||
.idea/replstate.xml
|
||||
|
||||
# SonarLint plugin
|
||||
.idea/sonarlint/
|
||||
|
||||
# Crashlytics plugin (for Android Studio and IntelliJ)
|
||||
com_crashlytics_export_strings.xml
|
||||
crashlytics.properties
|
||||
crashlytics-build.properties
|
||||
fabric.properties
|
||||
|
||||
# Editor-based Rest Client
|
||||
.idea/httpRequests
|
||||
|
||||
# Android studio 3.1+ serialized cache file
|
||||
.idea/caches/build_file_checksums.ser
|
||||
|
||||
### PyCharm Patch ###
|
||||
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
|
||||
|
||||
# *.iml
|
||||
# modules.xml
|
||||
# .idea/misc.xml
|
||||
# *.ipr
|
||||
|
||||
# Sonarlint plugin
|
||||
# https://plugins.jetbrains.com/plugin/7973-sonarlint
|
||||
.idea/**/sonarlint/
|
||||
|
||||
# SonarQube Plugin
|
||||
# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
|
||||
.idea/**/sonarIssues.xml
|
||||
|
||||
# Markdown Navigator plugin
|
||||
# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
|
||||
.idea/**/markdown-navigator.xml
|
||||
.idea/**/markdown-navigator-enh.xml
|
||||
.idea/**/markdown-navigator/
|
||||
|
||||
# Cache file creation bug
|
||||
# See https://youtrack.jetbrains.com/issue/JBR-2257
|
||||
.idea/$CACHE_FILE$
|
||||
|
||||
# CodeStream plugin
|
||||
# https://plugins.jetbrains.com/plugin/12206-codestream
|
||||
.idea/codestream.xml
|
||||
|
||||
# Azure Toolkit for IntelliJ plugin
|
||||
# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
|
||||
.idea/**/azureSettings.xml
|
||||
|
||||
### Python ###
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/#use-with-ide
|
||||
.pdm.toml
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
### Python Patch ###
|
||||
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
|
||||
poetry.toml
|
||||
|
||||
# ruff
|
||||
.ruff_cache/
|
||||
|
||||
# LSP config files
|
||||
pyrightconfig.json
|
||||
|
||||
### Vim ###
|
||||
# Swap
|
||||
[._]*.s[a-v][a-z]
|
||||
!*.svg # comment out if you don't need vector files
|
||||
[._]*.sw[a-p]
|
||||
[._]s[a-rt-v][a-z]
|
||||
[._]ss[a-gi-z]
|
||||
[._]sw[a-p]
|
||||
|
||||
# Session
|
||||
Session.vim
|
||||
Sessionx.vim
|
||||
|
||||
# Temporary
|
||||
.netrwhist
|
||||
# Auto-generated tag files
|
||||
tags
|
||||
# Persistent undo
|
||||
[._]*.un~
|
||||
|
||||
### VisualStudioCode ###
|
||||
.vscode/*
|
||||
!.vscode/settings.json
|
||||
!.vscode/tasks.json
|
||||
!.vscode/launch.json
|
||||
!.vscode/extensions.json
|
||||
!.vscode/*.code-snippets
|
||||
|
||||
# Local History for Visual Studio Code
|
||||
.history/
|
||||
|
||||
# Built Visual Studio Code Extensions
|
||||
*.vsix
|
||||
|
||||
### VisualStudioCode Patch ###
|
||||
# Ignore all local history of files
|
||||
.history
|
||||
.ionide
|
||||
|
||||
### Windows ###
|
||||
# Windows thumbnail cache files
|
||||
Thumbs.db
|
||||
Thumbs.db:encryptable
|
||||
ehthumbs.db
|
||||
ehthumbs_vista.db
|
||||
|
||||
# Dump file
|
||||
*.stackdump
|
||||
|
||||
# Folder config file
|
||||
[Dd]esktop.ini
|
||||
|
||||
# Recycle Bin used on file shares
|
||||
$RECYCLE.BIN/
|
||||
|
||||
# Windows Installer files
|
||||
*.cab
|
||||
*.msi
|
||||
*.msix
|
||||
*.msm
|
||||
*.msp
|
||||
|
||||
# Windows shortcuts
|
||||
*.lnk
|
||||
|
||||
.theflow/
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/python,linux,macos,windows,vim,emacs,visualstudiocode,pycharm
|
||||
|
||||
logs/
|
||||
.gitsecret/keys/random_seed
|
||||
!*.secret
|
||||
credentials.txt
|
||||
|
||||
S.gpg-agent*
|
||||
.vscode/settings.json
|
|
@ -0,0 +1,51 @@
|
|||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.3.0
|
||||
hooks:
|
||||
- id: check-yaml
|
||||
- id: check-toml
|
||||
- id: end-of-file-fixer
|
||||
- id: trailing-whitespace
|
||||
- id: detect-aws-credentials
|
||||
args: ["--allow-missing-credentials"]
|
||||
- id: detect-private-key
|
||||
- id: check-added-large-files
|
||||
- repo: https://github.com/ambv/black
|
||||
rev: 22.3.0
|
||||
hooks:
|
||||
- id: black
|
||||
language_version: python3
|
||||
- repo: https://github.com/pycqa/isort
|
||||
rev: 5.12.0
|
||||
hooks:
|
||||
- id: isort
|
||||
args: ["--profile", "black"]
|
||||
language_version: python3.10
|
||||
- repo: https://github.com/pycqa/flake8
|
||||
rev: 4.0.1
|
||||
hooks:
|
||||
- id: flake8
|
||||
args: ["--max-line-length", "88", "--extend-ignore", "E203"]
|
||||
- repo: https://github.com/myint/autoflake
|
||||
rev: v1.4
|
||||
hooks:
|
||||
- id: autoflake
|
||||
args:
|
||||
[
|
||||
"--in-place",
|
||||
"--remove-unused-variables",
|
||||
"--remove-all-unused-imports",
|
||||
"--ignore-init-module-imports",
|
||||
"--exclude=tests/*",
|
||||
]
|
||||
- repo: https://github.com/pre-commit/mirrors-prettier
|
||||
rev: v2.7.1
|
||||
hooks:
|
||||
- id: prettier
|
||||
types_or: [markdown, yaml]
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
rev: "v1.5.1"
|
||||
hooks:
|
||||
- id: mypy
|
||||
additional_dependencies: [types-PyYAML==6.0.12.11, "types-requests"]
|
||||
args: ["--check-untyped-defs", "--ignore-missing-imports"]
|
|
@ -0,0 +1,37 @@
|
|||
<div align="center">
|
||||
|
||||
# Project {{ cookiecutter.project_name }}
|
||||
|
||||
[](https://github.com/Cinnamon/kotaemon)
|
||||
|
||||
</div>
|
||||
|
||||
# Install
|
||||
|
||||
```bash
|
||||
# Create new conda env (optional)
|
||||
conda create -n {{ cookiecutter.project_name }} python=3.10
|
||||
conda activate {{ cookiecutter.project_name }}
|
||||
|
||||
# Clone and install the project
|
||||
git clone "<{{ cookiecutter.project_name }}-repo>"
|
||||
cd "<{{ cookiecutter.project_name }}-repo>"
|
||||
pip install -e .
|
||||
|
||||
# Generate the project structure
|
||||
cd ..
|
||||
kh start-project
|
||||
```
|
||||
|
||||
# Usage
|
||||
|
||||
- Build the pipeline in `pipeline.py`
|
||||
|
||||
For supported utilities and tools, refer: https://github.com/Cinnamon/kotaemon/wiki/Utilities
|
||||
|
||||
# Contribute
|
||||
|
||||
- For project issues and errors, please report in this repo issues.
|
||||
- For kotaemon issues and errors, please report or make PR fixes in https://github.com/Cinnamon/kotaemon.git
|
||||
- If the template for this project has issues and errors, please report or make
|
||||
PR fixes in https://github.com/Cinnamon/kotaemon/tree/main/templates/project-default
|
|
@ -0,0 +1,20 @@
|
|||
import setuptools
|
||||
|
||||
setuptools.setup(
|
||||
name="{{ cookiecutter.project_name }}",
|
||||
version="0.0.1",
|
||||
author="{{ cookiecutter.ptl }}",
|
||||
author_email="{{ cookiecutter.ptl }}@cinnamon.is",
|
||||
description="Project {{ cookiecutter.project_name }}",
|
||||
long_description="Project {{ cookiecutter.project_name }}",
|
||||
url="https://github.com/Cinnamon/kotaemon",
|
||||
python_requires=">=3",
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
],
|
||||
install_requires=[
|
||||
"kotaemon@git+ssh://git@github.com/Cinnamon/kotaemon.git",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,106 @@
|
|||
import os
|
||||
from typing import List
|
||||
|
||||
from theflow import Node, Param
|
||||
|
||||
from kotaemon.base import BaseComponent
|
||||
from kotaemon.docstores import InMemoryDocumentStore
|
||||
from kotaemon.embeddings import AzureOpenAIEmbeddings
|
||||
from kotaemon.llms.completions.openai import AzureOpenAI
|
||||
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
|
||||
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
|
||||
from kotaemon.vectorstores import ChromaVectorStore
|
||||
|
||||
|
||||
class QuestionAnsweringPipeline(BaseComponent):
|
||||
vectorstore_path: str = str("./tmp")
|
||||
retrieval_top_k: int = 1
|
||||
openai_api_key: str = os.environ.get("OPENAI_API_KEY", "")
|
||||
|
||||
@Node.decorate(depends_on="openai_api_key")
|
||||
def llm(self):
|
||||
return AzureOpenAI(
|
||||
openai_api_base="https://bleh-dummy-2.openai.azure.com/",
|
||||
openai_api_key=self.openai_api_key,
|
||||
openai_api_version="2023-03-15-preview",
|
||||
deployment_name="dummy-q2-gpt35",
|
||||
temperature=0,
|
||||
request_timeout=60,
|
||||
)
|
||||
|
||||
@Node.decorate(depends_on=["vectorstore_path", "openai_api_key"])
|
||||
def retrieving_pipeline(self):
|
||||
vector_store = ChromaVectorStore(self.vectorstore_path)
|
||||
embedding = AzureOpenAIEmbeddings(
|
||||
model="text-embedding-ada-002",
|
||||
deployment="dummy-q2-text-embedding",
|
||||
openai_api_base="https://bleh-dummy-2.openai.azure.com/",
|
||||
openai_api_key=self.openai_api_key,
|
||||
)
|
||||
|
||||
return RetrieveDocumentFromVectorStorePipeline(
|
||||
vector_store=vector_store,
|
||||
embedding=embedding,
|
||||
)
|
||||
|
||||
def run_raw(self, text: str) -> str:
|
||||
# reload the document store, in case it has been updated
|
||||
doc_store = InMemoryDocumentStore()
|
||||
doc_store.load("docstore.json")
|
||||
self.retrieving_pipeline.doc_store = doc_store
|
||||
|
||||
# retrieve relevant documents as context
|
||||
matched_texts: List[str] = [
|
||||
_.text
|
||||
for _ in self.retrieving_pipeline(text, top_k=int(self.retrieval_top_k))
|
||||
]
|
||||
context = "\n".join(matched_texts)
|
||||
|
||||
# generate the answer
|
||||
prompt = f'Answer the following question: "{text}". The context is: \n{context}'
|
||||
self.log_progress(".prompt", prompt=prompt)
|
||||
|
||||
return self.llm(prompt).text[0]
|
||||
|
||||
|
||||
class IndexingPipeline(IndexVectorStoreFromDocumentPipeline):
|
||||
# Expose variables for users to switch in prompt ui
|
||||
vectorstore_path: str = str("./tmp")
|
||||
embedding_model: str = "text-embedding-ada-002"
|
||||
deployment: str = "dummy-q2-text-embedding"
|
||||
openai_api_base: str = "https://bleh-dummy-2.openai.azure.com/"
|
||||
openai_api_key: str = os.environ.get("OPENAI_API_KEY", "")
|
||||
|
||||
@Param.decorate(depends_on=["vectorstore_path"])
|
||||
def vector_store(self):
|
||||
return ChromaVectorStore(self.vectorstore_path)
|
||||
|
||||
@Param.decorate()
|
||||
def doc_store(self):
|
||||
doc_store = InMemoryDocumentStore()
|
||||
if os.path.isfile("docstore.json"):
|
||||
doc_store.load("docstore.json")
|
||||
return doc_store
|
||||
|
||||
@Node.decorate(depends_on=["vector_store"])
|
||||
def embedding(self):
|
||||
return AzureOpenAIEmbeddings(
|
||||
model="text-embedding-ada-002",
|
||||
deployment=self.deployment,
|
||||
openai_api_base=self.openai_api_base,
|
||||
openai_api_key=self.openai_api_key,
|
||||
)
|
||||
|
||||
def run_raw(self, text: str) -> int: # type: ignore
|
||||
"""Normally, this indexing pipeline returns nothing. For demonstration,
|
||||
we want it to return something, so let's return the number of documents
|
||||
in the vector store
|
||||
"""
|
||||
super().run_raw(text)
|
||||
|
||||
if self.doc_store is not None:
|
||||
# persist to local anytime an indexing is created
|
||||
# this can be bypassed when we have a FileDocucmentStore
|
||||
self.doc_store.save("docstore.json")
|
||||
|
||||
return self.vector_store._collection.count()
|
Loading…
Reference in New Issue
Block a user