Make ktem official (#134)
* Move kotaemon and ktem into same folder * Update docs * Update CI * Resolve mypy, isorts * Re-allow test pdf files
This commit is contained in:
parent
9c5b707010
commit
2dd531114f
5
.github/workflows/unit-test.yaml
vendored
5
.github/workflows/unit-test.yaml
vendored
|
@ -60,7 +60,7 @@ jobs:
|
||||||
# built-in tomllib
|
# built-in tomllib
|
||||||
run: |
|
run: |
|
||||||
pip install tomli
|
pip install tomli
|
||||||
package_version=$(python -c "import tomli; print(tomli.load(open('pyproject.toml', 'rb'))['project']['version'])")
|
package_version=$(python -c "import tomli; print(tomli.load(open('libs/kotaemon/pyproject.toml', 'rb'))['project']['version'])")
|
||||||
cache_key="${{ runner.os }}-py${{ matrix.python-version }}-v${package_version}"
|
cache_key="${{ runner.os }}-py${{ matrix.python-version }}-v${package_version}"
|
||||||
echo "key=$cache_key" | tee -a ${{ matrix.GITHUB_OUTPUT }}
|
echo "key=$cache_key" | tee -a ${{ matrix.GITHUB_OUTPUT }}
|
||||||
|
|
||||||
|
@ -99,7 +99,8 @@ jobs:
|
||||||
path: ${{ env.pythonLocation }}
|
path: ${{ env.pythonLocation }}
|
||||||
key: ${{ steps.restore-dependencies.outputs.cache-primary-key }}
|
key: ${{ steps.restore-dependencies.outputs.cache-primary-key }}
|
||||||
|
|
||||||
- name: Test with pytest
|
- name: Test kotaemon with pytest
|
||||||
run: |
|
run: |
|
||||||
pip show pytest
|
pip show pytest
|
||||||
|
cd libs/kotaemon
|
||||||
pytest
|
pytest
|
||||||
|
|
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -448,6 +448,7 @@ $RECYCLE.BIN/
|
||||||
|
|
||||||
# PDF files
|
# PDF files
|
||||||
*.pdf
|
*.pdf
|
||||||
|
!libs/kotaemon/tests/resources/*.pdf
|
||||||
|
|
||||||
.theflow/
|
.theflow/
|
||||||
|
|
||||||
|
|
|
@ -69,7 +69,7 @@ def generate_docs_for_src_code(
|
||||||
|
|
||||||
|
|
||||||
generate_docs_for_src_code(
|
generate_docs_for_src_code(
|
||||||
code_dir=doc_dir.parent / "kotaemon",
|
code_dir=doc_dir.parent / "libs" / "kotaemon",
|
||||||
target_doc_folder="reference",
|
target_doc_folder="reference",
|
||||||
ignored_modules={"contribs"},
|
ignored_modules={"contribs"},
|
||||||
)
|
)
|
||||||
|
|
130
libs/kotaemon/README.md
Normal file
130
libs/kotaemon/README.md
Normal file
|
@ -0,0 +1,130 @@
|
||||||
|
# kotaemon
|
||||||
|
|
||||||
|
Quick and easy AI components to build Kotaemon - applicable in client
|
||||||
|
project.
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
https://docs.promptui.dm.cinnamon.is
|
||||||
|
|
||||||
|
## Install
|
||||||
|
|
||||||
|
```shell
|
||||||
|
pip install kotaemon@git+ssh://git@github.com/Cinnamon/kotaemon.git
|
||||||
|
```
|
||||||
|
|
||||||
|
## Contribute
|
||||||
|
|
||||||
|
### Setup
|
||||||
|
|
||||||
|
- Create conda environment (suggest 3.10)
|
||||||
|
|
||||||
|
```shell
|
||||||
|
conda create -n kotaemon python=3.10
|
||||||
|
conda activate kotaemon
|
||||||
|
```
|
||||||
|
|
||||||
|
- Clone the repo
|
||||||
|
|
||||||
|
```shell
|
||||||
|
git clone git@github.com:Cinnamon/kotaemon.git
|
||||||
|
cd kotaemon
|
||||||
|
```
|
||||||
|
|
||||||
|
- Install all
|
||||||
|
|
||||||
|
```shell
|
||||||
|
pip install -e ".[dev]"
|
||||||
|
```
|
||||||
|
|
||||||
|
- Pre-commit
|
||||||
|
|
||||||
|
```shell
|
||||||
|
pre-commit install
|
||||||
|
```
|
||||||
|
|
||||||
|
- Test
|
||||||
|
|
||||||
|
```shell
|
||||||
|
pytest tests
|
||||||
|
```
|
||||||
|
|
||||||
|
### Credential sharing
|
||||||
|
|
||||||
|
This repo uses [git-secret](https://sobolevn.me/git-secret/) to share credentials, which
|
||||||
|
internally uses `gpg` to encrypt and decrypt secret files.
|
||||||
|
|
||||||
|
This repo uses `python-dotenv` to manage credentials stored as environment variable.
|
||||||
|
Please note that the use of `python-dotenv` and credentials are for development
|
||||||
|
purposes only. Thus, it should not be used in the main source code (i.e. `kotaemon/` and `tests/`), but can be used in `examples/`.
|
||||||
|
|
||||||
|
#### Install git-secret
|
||||||
|
|
||||||
|
Please follow the [official guide](https://sobolevn.me/git-secret/installation) to install git-secret.
|
||||||
|
|
||||||
|
For Windows users, see [For Windows users](#for-windows-users).
|
||||||
|
|
||||||
|
For users who don't have sudo privilege to install packages, follow the `Manual Installation` in the [official guide](https://sobolevn.me/git-secret/installation) and set `PREFIX` to a path that you have access to. And please don't forget to add `PREFIX` to your `PATH`.
|
||||||
|
|
||||||
|
#### Gaining access
|
||||||
|
|
||||||
|
In order to gain access to the secret files, you must provide your gpg public file to anyone who has access and ask them to ask your key to the keyring. For a quick tutorial on generating your gpg key pair, you can refer to the `Using gpg` section from the [git-secret main page](https://sobolevn.me/git-secret/).
|
||||||
|
|
||||||
|
#### Decrypt the secret file
|
||||||
|
|
||||||
|
The credentials are encrypted in the `.env.secret` file. To print the decrypted content to stdout, run
|
||||||
|
|
||||||
|
```shell
|
||||||
|
git-secret cat [filename]
|
||||||
|
```
|
||||||
|
|
||||||
|
Or to get the decrypted `.env` file, run
|
||||||
|
|
||||||
|
```shell
|
||||||
|
git-secret reveal [filename]
|
||||||
|
```
|
||||||
|
|
||||||
|
#### For Windows users
|
||||||
|
|
||||||
|
git-secret is currently not available for Windows, thus the easiest way is to use it in WSL (please use the latest version of WSL2). From there you have 2 options:
|
||||||
|
|
||||||
|
1. Using the gpg of WSL.
|
||||||
|
|
||||||
|
This is the most straight-forward option since you would use WSL just like any other unix environment. However, the downside is that you have to make WSL your main environment, which means WSL must have write permission on your repo. To achieve this, you must either:
|
||||||
|
|
||||||
|
- Clone and store your repo inside WSL's file system.
|
||||||
|
- Provide WSL with necessary permission on your Windows file system. This can be achieve by setting `automount` options for WSL. To do that, add these content to `/etc/wsl.conf` and then restart your sub-system.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
[automount]
|
||||||
|
options = "metadata,umask=022,fmask=011"
|
||||||
|
```
|
||||||
|
|
||||||
|
This enables all permissions for user owner.
|
||||||
|
|
||||||
|
2. Using the gpg of Windows but with git-secret from WSL.
|
||||||
|
|
||||||
|
For those who use Windows as the main environment, having to switch back and forth between Windows and WSL will be inconvenient. You can instead stay within your Windows environment and apply some tricks to use `git-secret` from WSL.
|
||||||
|
|
||||||
|
- Install and setup `gpg` on Windows.
|
||||||
|
- Install `git-secret` on WSL. Now in Windows, you can invoke `git-secret` using `wsl git-secret`.
|
||||||
|
- Alternatively you can setup alias in CMD to shorten the syntax. Please refer to [this SO answer](https://stackoverflow.com/a/65823225) for the instruction. Some recommended aliases are:
|
||||||
|
|
||||||
|
```bat
|
||||||
|
@echo off
|
||||||
|
|
||||||
|
:: Commands
|
||||||
|
DOSKEY ls=dir /B $*
|
||||||
|
DOSKEY ll=dir /a $*
|
||||||
|
DOSKEY git-secret=wsl git-secret $*
|
||||||
|
DOSKEY gs=wsl git-secret $*
|
||||||
|
```
|
||||||
|
|
||||||
|
Now you can invoke `git-secret` in CMD using `git-secret` or `gs`.
|
||||||
|
|
||||||
|
- For Powershell users, similar behaviours can be achieved using `Set-Alias` and `profile.ps1`. Please refer this [SO thread](https://stackoverflow.com/questions/61081434/how-do-i-create-a-permanent-alias-file-in-powershell-core) as an example.
|
||||||
|
|
||||||
|
### Code base structure
|
||||||
|
|
||||||
|
- documents: define document
|
||||||
|
- loaders
|
|
@ -5,9 +5,8 @@ from dataclasses import dataclass
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Dict, Literal, NamedTuple, Optional, Union
|
from typing import Any, Dict, Literal, NamedTuple, Optional, Union
|
||||||
|
|
||||||
from pydantic import Extra
|
|
||||||
|
|
||||||
from kotaemon.base import LLMInterface
|
from kotaemon.base import LLMInterface
|
||||||
|
from pydantic import Extra
|
||||||
|
|
||||||
|
|
||||||
def check_log():
|
def check_log():
|
|
@ -1,11 +1,10 @@
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from kotaemon.llms import LLM, ChatLLM
|
||||||
from langchain.agents import AgentType as LCAgentType
|
from langchain.agents import AgentType as LCAgentType
|
||||||
from langchain.agents import initialize_agent
|
from langchain.agents import initialize_agent
|
||||||
from langchain.agents.agent import AgentExecutor as LCAgentExecutor
|
from langchain.agents.agent import AgentExecutor as LCAgentExecutor
|
||||||
|
|
||||||
from kotaemon.llms import LLM, ChatLLM
|
|
||||||
|
|
||||||
from .base import BaseAgent
|
from .base import BaseAgent
|
||||||
from .io import AgentOutput, AgentType
|
from .io import AgentOutput, AgentType
|
||||||
from .tools import BaseTool
|
from .tools import BaseTool
|
|
@ -1,10 +1,9 @@
|
||||||
from typing import Any, Callable, Dict, Optional, Tuple, Type, Union
|
from typing import Any, Callable, Dict, Optional, Tuple, Type, Union
|
||||||
|
|
||||||
|
from kotaemon.base import BaseComponent
|
||||||
from langchain.agents import Tool as LCTool
|
from langchain.agents import Tool as LCTool
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from kotaemon.base import BaseComponent
|
|
||||||
|
|
||||||
|
|
||||||
class ToolException(Exception):
|
class ToolException(Exception):
|
||||||
"""An optional exception that tool throws when execution error occurs.
|
"""An optional exception that tool throws when execution error occurs.
|
|
@ -1,8 +1,7 @@
|
||||||
from typing import AnyStr, Optional, Type
|
from typing import AnyStr, Optional, Type
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
|
|
||||||
from kotaemon.llms import BaseLLM
|
from kotaemon.llms import BaseLLM
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from .base import BaseTool, ToolException
|
from .base import BaseTool, ToolException
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
from typing import Any, AnyStr, Optional, Type, Union
|
from typing import Any, AnyStr, Optional, Type, Union
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
|
|
||||||
from kotaemon.base import Document
|
from kotaemon.base import Document
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from .base import BaseTool
|
from .base import BaseTool
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from typing import Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from theflow import Function, Node, Param, lazy
|
|
||||||
|
|
||||||
from kotaemon.base.schema import Document
|
from kotaemon.base.schema import Document
|
||||||
|
from theflow import Function, Node, Param, lazy
|
||||||
|
|
||||||
|
|
||||||
class BaseComponent(Function):
|
class BaseComponent(Function):
|
|
@ -1,10 +1,9 @@
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
from theflow import SessionFunction
|
|
||||||
|
|
||||||
from kotaemon.base import BaseComponent, LLMInterface
|
from kotaemon.base import BaseComponent, LLMInterface
|
||||||
from kotaemon.base.schema import AIMessage, BaseMessage, HumanMessage, SystemMessage
|
from kotaemon.base.schema import AIMessage, BaseMessage, HumanMessage, SystemMessage
|
||||||
|
from theflow import SessionFunction
|
||||||
|
|
||||||
|
|
||||||
class BaseChatBot(BaseComponent):
|
class BaseChatBot(BaseComponent):
|
|
@ -36,9 +36,8 @@ def export(export_path, output):
|
||||||
"""Export a pipeline to a config file"""
|
"""Export a pipeline to a config file"""
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from theflow.utils.modules import import_dotted_string
|
|
||||||
|
|
||||||
from kotaemon.contribs.promptui.config import export_pipeline_to_config
|
from kotaemon.contribs.promptui.config import export_pipeline_to_config
|
||||||
|
from theflow.utils.modules import import_dotted_string
|
||||||
|
|
||||||
sys.path.append(os.getcwd())
|
sys.path.append(os.getcwd())
|
||||||
cls = import_dotted_string(export_path, safe=False)
|
cls = import_dotted_string(export_path, safe=False)
|
|
@ -4,7 +4,6 @@ from pathlib import Path
|
||||||
from typing import Any, Dict, Optional, Type, Union
|
from typing import Any, Dict, Optional, Type, Union
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from kotaemon.base import BaseComponent
|
from kotaemon.base import BaseComponent
|
||||||
from kotaemon.chatbot import BaseChatBot
|
from kotaemon.chatbot import BaseChatBot
|
||||||
|
|
|
@ -6,11 +6,10 @@ from typing import Any, Dict, List, Type, Union
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import yaml
|
import yaml
|
||||||
|
from kotaemon.base import BaseComponent
|
||||||
from theflow.storage import storage
|
from theflow.storage import storage
|
||||||
from theflow.utils.modules import import_dotted_string
|
from theflow.utils.modules import import_dotted_string
|
||||||
|
|
||||||
from kotaemon.base import BaseComponent
|
|
||||||
|
|
||||||
from .logs import ResultLog
|
from .logs import ResultLog
|
||||||
|
|
||||||
|
|
|
@ -3,12 +3,11 @@ from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
from theflow.storage import storage
|
|
||||||
|
|
||||||
from kotaemon.chatbot import ChatConversation
|
from kotaemon.chatbot import ChatConversation
|
||||||
from kotaemon.contribs.promptui.base import get_component
|
from kotaemon.contribs.promptui.base import get_component
|
||||||
from kotaemon.contribs.promptui.export import export
|
from kotaemon.contribs.promptui.export import export
|
||||||
from kotaemon.contribs.promptui.ui.blocks import ChatBlock
|
from kotaemon.contribs.promptui.ui.blocks import ChatBlock
|
||||||
|
from theflow.storage import storage
|
||||||
|
|
||||||
from ..logs import ResultLog
|
from ..logs import ResultLog
|
||||||
|
|
|
@ -6,10 +6,9 @@ from typing import Any, Dict
|
||||||
|
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from theflow.storage import storage
|
|
||||||
|
|
||||||
from kotaemon.contribs.promptui.base import get_component
|
from kotaemon.contribs.promptui.base import get_component
|
||||||
from kotaemon.contribs.promptui.export import export
|
from kotaemon.contribs.promptui.export import export
|
||||||
|
from theflow.storage import storage
|
||||||
|
|
||||||
from ..logs import ResultLog
|
from ..logs import ResultLog
|
||||||
|
|
|
@ -3,9 +3,8 @@ from __future__ import annotations
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from typing import Any, Type
|
from typing import Any, Type
|
||||||
|
|
||||||
from llama_index.node_parser.interface import NodeParser
|
|
||||||
|
|
||||||
from kotaemon.base import BaseComponent, Document, RetrievedDocument
|
from kotaemon.base import BaseComponent, Document, RetrievedDocument
|
||||||
|
from llama_index.node_parser.interface import NodeParser
|
||||||
|
|
||||||
|
|
||||||
class DocTransformer(BaseComponent):
|
class DocTransformer(BaseComponent):
|
|
@ -1,7 +1,5 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from llama_index.readers.base import BaseReader
|
|
||||||
|
|
||||||
from kotaemon.base import BaseComponent, Document, Param
|
from kotaemon.base import BaseComponent, Document, Param
|
||||||
from kotaemon.indices.extractors import BaseDocParser
|
from kotaemon.indices.extractors import BaseDocParser
|
||||||
from kotaemon.indices.splitters import BaseSplitter, TokenSplitter
|
from kotaemon.indices.splitters import BaseSplitter, TokenSplitter
|
||||||
|
@ -13,6 +11,7 @@ from kotaemon.loaders import (
|
||||||
PandasExcelReader,
|
PandasExcelReader,
|
||||||
UnstructuredReader,
|
UnstructuredReader,
|
||||||
)
|
)
|
||||||
|
from llama_index.readers.base import BaseReader
|
||||||
|
|
||||||
|
|
||||||
class DocumentIngestor(BaseComponent):
|
class DocumentIngestor(BaseComponent):
|
|
@ -1,10 +1,9 @@
|
||||||
from typing import Iterator, List
|
from typing import Iterator, List
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
|
|
||||||
from kotaemon.base import BaseComponent
|
from kotaemon.base import BaseComponent
|
||||||
from kotaemon.base.schema import HumanMessage, SystemMessage
|
from kotaemon.base.schema import HumanMessage, SystemMessage
|
||||||
from kotaemon.llms import BaseLLM
|
from kotaemon.llms import BaseLLM
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
class FactWithEvidence(BaseModel):
|
class FactWithEvidence(BaseModel):
|
|
@ -2,10 +2,9 @@ from __future__ import annotations
|
||||||
|
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
from langchain.output_parsers.boolean import BooleanOutputParser
|
|
||||||
|
|
||||||
from kotaemon.base import Document
|
from kotaemon.base import Document
|
||||||
from kotaemon.llms import BaseLLM, PromptTemplate
|
from kotaemon.llms import BaseLLM, PromptTemplate
|
||||||
|
from langchain.output_parsers.boolean import BooleanOutputParser
|
||||||
|
|
||||||
from .base import BaseReranking
|
from .base import BaseReranking
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
from langchain_core.language_models.base import BaseLanguageModel
|
|
||||||
|
|
||||||
from kotaemon.base import BaseComponent
|
from kotaemon.base import BaseComponent
|
||||||
|
from langchain_core.language_models.base import BaseLanguageModel
|
||||||
|
|
||||||
|
|
||||||
class BaseLLM(BaseComponent):
|
class BaseLLM(BaseComponent):
|
|
@ -156,7 +156,6 @@ class GatedBranchingPipeline(SimpleBranchingPipeline):
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import dotenv
|
import dotenv
|
||||||
|
|
||||||
from kotaemon.llms import AzureChatOpenAI, BasePromptComponent
|
from kotaemon.llms import AzureChatOpenAI, BasePromptComponent
|
||||||
from kotaemon.parsers import RegexExtractor
|
from kotaemon.parsers import RegexExtractor
|
||||||
|
|
|
@ -141,9 +141,12 @@ class OpenAI(LCCompletionMixin, LLM):
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_lc_class(self):
|
def _get_lc_class(self):
|
||||||
import langchain.llms as langchain_llms
|
try:
|
||||||
|
from langchain_openai import OpenAI
|
||||||
|
except ImportError:
|
||||||
|
from langchain.llms import OpenAI
|
||||||
|
|
||||||
return langchain_llms.OpenAI
|
return OpenAI
|
||||||
|
|
||||||
|
|
||||||
class AzureOpenAI(LCCompletionMixin, LLM):
|
class AzureOpenAI(LCCompletionMixin, LLM):
|
|
@ -1,9 +1,8 @@
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from typing import Callable, List
|
from typing import Callable, List
|
||||||
|
|
||||||
from theflow import Function, Node, Param
|
|
||||||
|
|
||||||
from kotaemon.base import BaseComponent, Document
|
from kotaemon.base import BaseComponent, Document
|
||||||
|
from theflow import Function, Node, Param
|
||||||
|
|
||||||
from .chats import AzureChatOpenAI
|
from .chats import AzureChatOpenAI
|
||||||
from .completions import LLM
|
from .completions import LLM
|
|
@ -1,11 +1,10 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, List, Type, Union
|
from typing import Any, List, Type, Union
|
||||||
|
|
||||||
|
from kotaemon.base import BaseComponent, Document
|
||||||
from llama_index import SimpleDirectoryReader, download_loader
|
from llama_index import SimpleDirectoryReader, download_loader
|
||||||
from llama_index.readers.base import BaseReader
|
from llama_index.readers.base import BaseReader
|
||||||
|
|
||||||
from kotaemon.base import BaseComponent, Document
|
|
||||||
|
|
||||||
|
|
||||||
class AutoReader(BaseComponent):
|
class AutoReader(BaseComponent):
|
||||||
"""General auto reader for a variety of files. (based on llama-hub)"""
|
"""General auto reader for a variety of files. (based on llama-hub)"""
|
|
@ -6,9 +6,8 @@ Pandas parser for .xlsx files.
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, List, Optional, Union
|
from typing import Any, List, Optional, Union
|
||||||
|
|
||||||
from llama_index.readers.base import BaseReader
|
|
||||||
|
|
||||||
from kotaemon.base import Document
|
from kotaemon.base import Document
|
||||||
|
from llama_index.readers.base import BaseReader
|
||||||
|
|
||||||
|
|
||||||
class PandasExcelReader(BaseReader):
|
class PandasExcelReader(BaseReader):
|
|
@ -5,11 +5,10 @@ from pathlib import Path
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
from kotaemon.base import Document
|
||||||
from langchain.utils import get_from_dict_or_env
|
from langchain.utils import get_from_dict_or_env
|
||||||
from llama_index.readers.base import BaseReader
|
from llama_index.readers.base import BaseReader
|
||||||
|
|
||||||
from kotaemon.base import Document
|
|
||||||
|
|
||||||
from .utils.table import parse_markdown_text_to_tables, strip_special_chars_markdown
|
from .utils.table import parse_markdown_text_to_tables, strip_special_chars_markdown
|
||||||
|
|
||||||
|
|
|
@ -3,9 +3,8 @@ from typing import List
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from llama_index.readers.base import BaseReader
|
|
||||||
|
|
||||||
from kotaemon.base import Document
|
from kotaemon.base import Document
|
||||||
|
from llama_index.readers.base import BaseReader
|
||||||
|
|
||||||
from .utils.pdf_ocr import parse_ocr_output, read_pdf_unstructured
|
from .utils.pdf_ocr import parse_ocr_output, read_pdf_unstructured
|
||||||
from .utils.table import strip_special_chars_markdown
|
from .utils.table import strip_special_chars_markdown
|
|
@ -12,9 +12,8 @@ pip install xlrd
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
from llama_index.readers.base import BaseReader
|
|
||||||
|
|
||||||
from kotaemon.base import Document
|
from kotaemon.base import Document
|
||||||
|
from llama_index.readers.base import BaseReader
|
||||||
|
|
||||||
|
|
||||||
class UnstructuredReader(BaseReader):
|
class UnstructuredReader(BaseReader):
|
|
@ -3,13 +3,12 @@ from __future__ import annotations
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from kotaemon.base import DocumentWithEmbedding
|
||||||
from llama_index.schema import NodeRelationship, RelatedNodeInfo
|
from llama_index.schema import NodeRelationship, RelatedNodeInfo
|
||||||
from llama_index.vector_stores.types import BasePydanticVectorStore
|
from llama_index.vector_stores.types import BasePydanticVectorStore
|
||||||
from llama_index.vector_stores.types import VectorStore as LIVectorStore
|
from llama_index.vector_stores.types import VectorStore as LIVectorStore
|
||||||
from llama_index.vector_stores.types import VectorStoreQuery
|
from llama_index.vector_stores.types import VectorStoreQuery
|
||||||
|
|
||||||
from kotaemon.base import DocumentWithEmbedding
|
|
||||||
|
|
||||||
|
|
||||||
class BaseVectorStore(ABC):
|
class BaseVectorStore(ABC):
|
||||||
@abstractmethod
|
@abstractmethod
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user