diff --git a/README.md b/README.md index ad70b68..5843cc5 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,7 @@ Quick and easy AI components to build Kotaemon - applicable in client project. -## Documentation - -https://docs.promptui.dm.cinnamon.is +[Documentation](https://docs.bleh-internal.cinnamon.is/) ## Install diff --git a/docs/contributing.md b/docs/contributing.md index e6c5ece..7334bef 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -139,8 +139,8 @@ Examples: https://github.com/Cinnamon/kotaemon/pull/2 - 1st line message is the PR title. - The text area is the PR description. -![image](https://github.com/Cinnamon/kotaemon/assets/35283585/e2593010-d7ef-46e3-8719-6fcae0315b5d) -![image](https://github.com/Cinnamon/kotaemon/assets/35283585/bfe6a117-85cd-4dd4-b432-197c791a9901) +![image](images/274787925-e2593010-d7ef-46e3-8719-6fcae0315b5d.png) +![image](images/274787941-bfe6a117-85cd-4dd4-b432-197c791a9901.png) ## Develop pipelines diff --git a/docs/images/269170170-af94ff6b-b8b4-4602-ab6e-2947deb30dff.png b/docs/images/269170170-af94ff6b-b8b4-4602-ab6e-2947deb30dff.png new file mode 100644 index 0000000..e0bb63b Binary files /dev/null and b/docs/images/269170170-af94ff6b-b8b4-4602-ab6e-2947deb30dff.png differ diff --git a/docs/images/269170198-9ac1b95a-b667-42e7-b318-98a1b805d6df.png b/docs/images/269170198-9ac1b95a-b667-42e7-b318-98a1b805d6df.png new file mode 100644 index 0000000..c688bc6 Binary files /dev/null and b/docs/images/269170198-9ac1b95a-b667-42e7-b318-98a1b805d6df.png differ diff --git a/docs/images/271332562-ac8f9aac-d853-4571-a48b-d866a99eaf3e.png b/docs/images/271332562-ac8f9aac-d853-4571-a48b-d866a99eaf3e.png new file mode 100644 index 0000000..9f38c73 Binary files /dev/null and b/docs/images/271332562-ac8f9aac-d853-4571-a48b-d866a99eaf3e.png differ diff --git a/docs/images/274787925-e2593010-d7ef-46e3-8719-6fcae0315b5d.png b/docs/images/274787925-e2593010-d7ef-46e3-8719-6fcae0315b5d.png new file mode 100644 index 0000000..7de7a36 Binary files /dev/null and b/docs/images/274787925-e2593010-d7ef-46e3-8719-6fcae0315b5d.png differ diff --git a/docs/overview.md b/docs/overview.md index c55c991..093adf8 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -3,8 +3,17 @@ `kotaemon` library focuses on the AI building blocks to implement the Kotaemon. It can be used in both client project and in product development. It consists of base interfaces, core components and a list of utilities: - Base interfaces: `kotaemon` defines the base interface of a component in a pipeline. A pipeline is also a component. By clearly define this interface, a pipeline of steps can be easily constructed and orchestrated. -- Core components: `kotaemon` implements (or wraps 3rd-party libraries like Langchain, llama-index,... when possible) commonly used components in kotaemon use cases. Some of these components are: LLM, vector store, document store, retriever... For a detailed list and description of these components, please refer to the [Pipeline Components](Pipeline-Components) and [Data & Data Structure Components](Data-&-Data-Structure-Components) sections. -- List of utilities: `lib-knowledge` provides utilities and tools that are usually needed in client project. For example, it provides a prompt engineering UI for AI developers in a project to quickly create a prompt engineering tool for DMs and QALs. It also provides a command to quickly spin up a project code base. For a full list and description of these utilities, please refer to the [Utilities](Utilities) section. +- Core components: `kotaemon` implements (or wraps 3rd-party libraries + like Langchain, llama-index,... when possible) commonly used components in + kotaemon use cases. Some of these components are: LLM, vector store, + document store, retriever... For a detailed list and description of these + components, please refer to the [API Reference](/reference/nav/) section. +- List of utilities: `lib-knowledge` provides utilities and tools that are + usually needed in client project. For example, it provides a prompt + engineering UI for AI developers in a project to quickly create a prompt + engineering tool for DMs and QALs. It also provides a command to quickly spin + up a project code base. For a full list and description of these utilities, + please refer to the [Tutorial/Utilities](/ultilities) section. ```mermaid mindmap diff --git a/docs/scripts/generate_reference_docs.py b/docs/scripts/generate_reference_docs.py index f533cb3..33bdad4 100644 --- a/docs/scripts/generate_reference_docs.py +++ b/docs/scripts/generate_reference_docs.py @@ -13,6 +13,8 @@ while doc_dir.name != doc_dir_name and doc_dir != doc_dir.parent: if doc_dir == doc_dir.parent: raise ValueError(f"root_name ({doc_dir_name}) not in path ({str(Path(__file__))}).") +nav_title_map = {"cli": "CLI", "llms": "LLMs"} + def generate_docs_for_src_code( code_dir: Path, target_doc_folder: str, ignored_modules: Iterable[Any] = [] @@ -53,7 +55,9 @@ def generate_docs_for_src_code( if ignore: continue - nav_titles = [name.replace("_", " ").title() for name in parts] + nav_titles = [ + nav_title_map.get(name, name.replace("_", " ").title()) for name in parts + ] nav[nav_titles] = doc_path.as_posix() with mkdocs_gen_files.open(full_doc_path, "w") as f: @@ -69,7 +73,7 @@ def generate_docs_for_src_code( generate_docs_for_src_code( - code_dir=doc_dir.parent / "libs" / "kotaemon", + code_dir=doc_dir.parent / "libs" / "kotaemon" / "kotaemon", target_doc_folder="reference", ignored_modules={"contribs"}, ) diff --git a/docs/ultilities.md b/docs/ultilities.md index 225b443..53a9da3 100644 --- a/docs/ultilities.md +++ b/docs/ultilities.md @@ -2,7 +2,7 @@ Utilities detail can be referred in the sub-pages of this section. ## Prompt engineering UI -![chat-ui](https://github.com/Cinnamon/kotaemon/assets/35283585/ac8f9aac-d853-4571-a48b-d866a99eaf3e) +![chat-ui](images/271332562-ac8f9aac-d853-4571-a48b-d866a99eaf3e.png) **_Important:_** despite the name prompt engineering UI, this tool allows DMs to test any kind of parameters that are exposed by AIRs. Prompt is one kind of param. There can be other type of params that DMs can tweak (e.g. top_k, temperature...). @@ -146,7 +146,7 @@ $ kh promptui run This will generate an UI as follow: -![Screenshot from 2023-09-20 12-20-31](https://github.com/Cinnamon/kotaemon/assets/35283585/9ac1b95a-b667-42e7-b318-98a1b805d6df) +![Screenshot from 2023-09-20 12-20-31](images/269170198-9ac1b95a-b667-42e7-b318-98a1b805d6df.png) where: diff --git a/libs/kotaemon/kotaemon/agents/io/base.py b/libs/kotaemon/kotaemon/agents/io/base.py index da6e3b9..c27eed0 100644 --- a/libs/kotaemon/kotaemon/agents/io/base.py +++ b/libs/kotaemon/kotaemon/agents/io/base.py @@ -5,9 +5,10 @@ from dataclasses import dataclass from enum import Enum from typing import Any, Dict, Literal, NamedTuple, Optional, Union -from kotaemon.base import LLMInterface from pydantic import Extra +from kotaemon.base import LLMInterface + def check_log(): """ diff --git a/libs/kotaemon/kotaemon/agents/langchain_based.py b/libs/kotaemon/kotaemon/agents/langchain_based.py index 7bdb7aa..3083b70 100644 --- a/libs/kotaemon/kotaemon/agents/langchain_based.py +++ b/libs/kotaemon/kotaemon/agents/langchain_based.py @@ -1,10 +1,11 @@ from typing import List, Optional -from kotaemon.llms import LLM, ChatLLM from langchain.agents import AgentType as LCAgentType from langchain.agents import initialize_agent from langchain.agents.agent import AgentExecutor as LCAgentExecutor +from kotaemon.llms import LLM, ChatLLM + from .base import BaseAgent from .io import AgentOutput, AgentType from .tools import BaseTool diff --git a/libs/kotaemon/kotaemon/agents/tools/base.py b/libs/kotaemon/kotaemon/agents/tools/base.py index 42f7533..1caf3d2 100644 --- a/libs/kotaemon/kotaemon/agents/tools/base.py +++ b/libs/kotaemon/kotaemon/agents/tools/base.py @@ -1,9 +1,10 @@ from typing import Any, Callable, Dict, Optional, Tuple, Type, Union -from kotaemon.base import BaseComponent from langchain.agents import Tool as LCTool from pydantic import BaseModel +from kotaemon.base import BaseComponent + class ToolException(Exception): """An optional exception that tool throws when execution error occurs. diff --git a/libs/kotaemon/kotaemon/agents/tools/llm.py b/libs/kotaemon/kotaemon/agents/tools/llm.py index 2df87cc..750462e 100644 --- a/libs/kotaemon/kotaemon/agents/tools/llm.py +++ b/libs/kotaemon/kotaemon/agents/tools/llm.py @@ -1,8 +1,9 @@ from typing import AnyStr, Optional, Type -from kotaemon.llms import BaseLLM from pydantic import BaseModel, Field +from kotaemon.llms import BaseLLM + from .base import BaseTool, ToolException diff --git a/libs/kotaemon/kotaemon/agents/tools/wikipedia.py b/libs/kotaemon/kotaemon/agents/tools/wikipedia.py index 3046522..9e6a362 100644 --- a/libs/kotaemon/kotaemon/agents/tools/wikipedia.py +++ b/libs/kotaemon/kotaemon/agents/tools/wikipedia.py @@ -1,8 +1,9 @@ from typing import Any, AnyStr, Optional, Type, Union -from kotaemon.base import Document from pydantic import BaseModel, Field +from kotaemon.base import Document + from .base import BaseTool diff --git a/libs/kotaemon/kotaemon/base/component.py b/libs/kotaemon/kotaemon/base/component.py index 2f8abf5..4e6f7b8 100644 --- a/libs/kotaemon/kotaemon/base/component.py +++ b/libs/kotaemon/kotaemon/base/component.py @@ -1,9 +1,10 @@ from abc import abstractmethod from typing import Iterator, Optional -from kotaemon.base.schema import Document from theflow import Function, Node, Param, lazy +from kotaemon.base.schema import Document + class BaseComponent(Function): """A component is a class that can be used to compose a pipeline. diff --git a/libs/kotaemon/kotaemon/chatbot/base.py b/libs/kotaemon/kotaemon/chatbot/base.py index f4b8c86..b6a3baf 100644 --- a/libs/kotaemon/kotaemon/chatbot/base.py +++ b/libs/kotaemon/kotaemon/chatbot/base.py @@ -1,9 +1,10 @@ from abc import abstractmethod from typing import List, Optional +from theflow import SessionFunction + from kotaemon.base import BaseComponent, LLMInterface from kotaemon.base.schema import AIMessage, BaseMessage, HumanMessage, SystemMessage -from theflow import SessionFunction class BaseChatBot(BaseComponent): diff --git a/libs/kotaemon/kotaemon/cli.py b/libs/kotaemon/kotaemon/cli.py index 84a29b1..75101ff 100644 --- a/libs/kotaemon/kotaemon/cli.py +++ b/libs/kotaemon/kotaemon/cli.py @@ -36,9 +36,10 @@ def export(export_path, output): """Export a pipeline to a config file""" import sys - from kotaemon.contribs.promptui.config import export_pipeline_to_config from theflow.utils.modules import import_dotted_string + from kotaemon.contribs.promptui.config import export_pipeline_to_config + sys.path.append(os.getcwd()) cls = import_dotted_string(export_path, safe=False) export_pipeline_to_config(cls, output) diff --git a/libs/kotaemon/kotaemon/contribs/promptui/config.py b/libs/kotaemon/kotaemon/contribs/promptui/config.py index e660168..e9098b1 100644 --- a/libs/kotaemon/kotaemon/contribs/promptui/config.py +++ b/libs/kotaemon/kotaemon/contribs/promptui/config.py @@ -4,6 +4,7 @@ from pathlib import Path from typing import Any, Dict, Optional, Type, Union import yaml + from kotaemon.base import BaseComponent from kotaemon.chatbot import BaseChatBot diff --git a/libs/kotaemon/kotaemon/contribs/promptui/export.py b/libs/kotaemon/kotaemon/contribs/promptui/export.py index ce152f1..8b174ea 100644 --- a/libs/kotaemon/kotaemon/contribs/promptui/export.py +++ b/libs/kotaemon/kotaemon/contribs/promptui/export.py @@ -6,10 +6,11 @@ from typing import Any, Dict, List, Type, Union import pandas as pd import yaml -from kotaemon.base import BaseComponent from theflow.storage import storage from theflow.utils.modules import import_dotted_string +from kotaemon.base import BaseComponent + from .logs import ResultLog diff --git a/libs/kotaemon/kotaemon/contribs/promptui/ui/chat.py b/libs/kotaemon/kotaemon/contribs/promptui/ui/chat.py index 6c4b743..78eecb5 100644 --- a/libs/kotaemon/kotaemon/contribs/promptui/ui/chat.py +++ b/libs/kotaemon/kotaemon/contribs/promptui/ui/chat.py @@ -3,11 +3,12 @@ from datetime import datetime from pathlib import Path import gradio as gr +from theflow.storage import storage + from kotaemon.chatbot import ChatConversation from kotaemon.contribs.promptui.base import get_component from kotaemon.contribs.promptui.export import export from kotaemon.contribs.promptui.ui.blocks import ChatBlock -from theflow.storage import storage from ..logs import ResultLog diff --git a/libs/kotaemon/kotaemon/contribs/promptui/ui/pipeline.py b/libs/kotaemon/kotaemon/contribs/promptui/ui/pipeline.py index 12b1133..725893d 100644 --- a/libs/kotaemon/kotaemon/contribs/promptui/ui/pipeline.py +++ b/libs/kotaemon/kotaemon/contribs/promptui/ui/pipeline.py @@ -6,9 +6,10 @@ from typing import Any, Dict import gradio as gr import pandas as pd +from theflow.storage import storage + from kotaemon.contribs.promptui.base import get_component from kotaemon.contribs.promptui.export import export -from theflow.storage import storage from ..logs import ResultLog diff --git a/libs/kotaemon/kotaemon/indices/base.py b/libs/kotaemon/kotaemon/indices/base.py index 0de39c7..938be66 100644 --- a/libs/kotaemon/kotaemon/indices/base.py +++ b/libs/kotaemon/kotaemon/indices/base.py @@ -3,9 +3,10 @@ from __future__ import annotations from abc import abstractmethod from typing import Any, Type -from kotaemon.base import BaseComponent, Document, RetrievedDocument from llama_index.node_parser.interface import NodeParser +from kotaemon.base import BaseComponent, Document, RetrievedDocument + class DocTransformer(BaseComponent): """This is a base class for document transformers diff --git a/libs/kotaemon/kotaemon/indices/ingests/files.py b/libs/kotaemon/kotaemon/indices/ingests/files.py index b3b4f48..22e7db9 100644 --- a/libs/kotaemon/kotaemon/indices/ingests/files.py +++ b/libs/kotaemon/kotaemon/indices/ingests/files.py @@ -1,5 +1,7 @@ from pathlib import Path +from llama_index.readers.base import BaseReader + from kotaemon.base import BaseComponent, Document, Param from kotaemon.indices.extractors import BaseDocParser from kotaemon.indices.splitters import BaseSplitter, TokenSplitter @@ -11,7 +13,6 @@ from kotaemon.loaders import ( PandasExcelReader, UnstructuredReader, ) -from llama_index.readers.base import BaseReader class DocumentIngestor(BaseComponent): diff --git a/libs/kotaemon/kotaemon/indices/qa/citation.py b/libs/kotaemon/kotaemon/indices/qa/citation.py index bafa2d4..4c1281a 100644 --- a/libs/kotaemon/kotaemon/indices/qa/citation.py +++ b/libs/kotaemon/kotaemon/indices/qa/citation.py @@ -1,9 +1,10 @@ from typing import Iterator, List +from pydantic import BaseModel, Field + from kotaemon.base import BaseComponent from kotaemon.base.schema import HumanMessage, SystemMessage from kotaemon.llms import BaseLLM -from pydantic import BaseModel, Field class FactWithEvidence(BaseModel): diff --git a/libs/kotaemon/kotaemon/indices/rankings/llm.py b/libs/kotaemon/kotaemon/indices/rankings/llm.py index 7b280f1..bff81ff 100644 --- a/libs/kotaemon/kotaemon/indices/rankings/llm.py +++ b/libs/kotaemon/kotaemon/indices/rankings/llm.py @@ -2,9 +2,10 @@ from __future__ import annotations from concurrent.futures import ThreadPoolExecutor +from langchain.output_parsers.boolean import BooleanOutputParser + from kotaemon.base import Document from kotaemon.llms import BaseLLM, PromptTemplate -from langchain.output_parsers.boolean import BooleanOutputParser from .base import BaseReranking diff --git a/libs/kotaemon/kotaemon/llms/base.py b/libs/kotaemon/kotaemon/llms/base.py index 56bd910..ff315ea 100644 --- a/libs/kotaemon/kotaemon/llms/base.py +++ b/libs/kotaemon/kotaemon/llms/base.py @@ -1,6 +1,7 @@ -from kotaemon.base import BaseComponent from langchain_core.language_models.base import BaseLanguageModel +from kotaemon.base import BaseComponent + class BaseLLM(BaseComponent): def to_langchain_format(self) -> BaseLanguageModel: diff --git a/libs/kotaemon/kotaemon/llms/branching.py b/libs/kotaemon/kotaemon/llms/branching.py index 09fe3c2..a9cbbe8 100644 --- a/libs/kotaemon/kotaemon/llms/branching.py +++ b/libs/kotaemon/kotaemon/llms/branching.py @@ -156,6 +156,7 @@ class GatedBranchingPipeline(SimpleBranchingPipeline): if __name__ == "__main__": import dotenv + from kotaemon.llms import AzureChatOpenAI, BasePromptComponent from kotaemon.parsers import RegexExtractor diff --git a/libs/kotaemon/kotaemon/llms/cot.py b/libs/kotaemon/kotaemon/llms/cot.py index bac2a22..7eaf5d1 100644 --- a/libs/kotaemon/kotaemon/llms/cot.py +++ b/libs/kotaemon/kotaemon/llms/cot.py @@ -1,9 +1,10 @@ from copy import deepcopy from typing import Callable, List -from kotaemon.base import BaseComponent, Document from theflow import Function, Node, Param +from kotaemon.base import BaseComponent, Document + from .chats import AzureChatOpenAI from .completions import LLM from .prompts import BasePromptComponent @@ -84,7 +85,7 @@ class Thought(BaseComponent): @Node.auto(depends_on="prompt") def prompt_template(self): """Automatically wrap around param prompt. Can ignore""" - return BasePromptComponent(self.prompt) + return BasePromptComponent(template=self.prompt) def run(self, **kwargs) -> Document: """Run the chain of thought""" diff --git a/libs/kotaemon/kotaemon/llms/prompts/base.py b/libs/kotaemon/kotaemon/llms/prompts/base.py index 564279d..ca84d57 100644 --- a/libs/kotaemon/kotaemon/llms/prompts/base.py +++ b/libs/kotaemon/kotaemon/llms/prompts/base.py @@ -1,4 +1,6 @@ -from typing import Callable, Union +from typing import Callable + +from theflow import Param from kotaemon.base import BaseComponent, Document @@ -19,14 +21,18 @@ class BasePromptComponent(BaseComponent): middleware_switches = {"theflow.middleware.CachingMiddleware": False} allow_extra = True - def __init__(self, template: Union[str, PromptTemplate], **kwargs): - super().__init__() - self.template = ( - template - if isinstance(template, PromptTemplate) - else PromptTemplate(template) + template: str | PromptTemplate + + @Param.auto(depends_on="template") + def template__(self): + return ( + self.template + if isinstance(self.template, PromptTemplate) + else PromptTemplate(self.template) ) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.__set(**kwargs) def __check_redundant_kwargs(self, **kwargs): @@ -42,7 +48,7 @@ class BasePromptComponent(BaseComponent): Returns: None """ - self.template.check_redundant_kwargs(**kwargs) + self.template__.check_redundant_kwargs(**kwargs) def __check_unset_placeholders(self): """ @@ -58,7 +64,7 @@ class BasePromptComponent(BaseComponent): Returns: None """ - self.template.check_missing_kwargs(**self.__dict__) + self.template__.check_missing_kwargs(**self.__dict__) def __validate_value_type(self, **kwargs): """ @@ -76,6 +82,8 @@ class BasePromptComponent(BaseComponent): """ type_error = [] for k, v in kwargs.items(): + if k.startswith("template"): + continue if not isinstance(v, (str, int, Document, Callable)): # type: ignore type_error.append((k, type(v))) @@ -122,7 +130,7 @@ class BasePromptComponent(BaseComponent): ) kwargs = {} - for k in self.template.placeholders: + for k in self.template__.placeholders: v = getattr(self, k) # if get a callable, execute to get its output @@ -141,7 +149,7 @@ class BasePromptComponent(BaseComponent): return kwargs - def set(self, **kwargs): + def set_value(self, **kwargs): """ Similar to `__set` but for external use. @@ -172,7 +180,7 @@ class BasePromptComponent(BaseComponent): self.__check_unset_placeholders() prepared_kwargs = self.__prepare_value() - text = self.template.populate(**prepared_kwargs) + text = self.template__.populate(**prepared_kwargs) return Document(text=text, metadata={"origin": "PromptComponent"}) def flow(self): diff --git a/libs/kotaemon/kotaemon/loaders/base.py b/libs/kotaemon/kotaemon/loaders/base.py index 956002f..cb92d5b 100644 --- a/libs/kotaemon/kotaemon/loaders/base.py +++ b/libs/kotaemon/kotaemon/loaders/base.py @@ -1,10 +1,11 @@ from pathlib import Path from typing import Any, List, Type, Union -from kotaemon.base import BaseComponent, Document from llama_index import SimpleDirectoryReader, download_loader from llama_index.readers.base import BaseReader +from kotaemon.base import BaseComponent, Document + class AutoReader(BaseComponent): """General auto reader for a variety of files. (based on llama-hub)""" diff --git a/libs/kotaemon/kotaemon/loaders/excel_loader.py b/libs/kotaemon/kotaemon/loaders/excel_loader.py index 5fab7c5..d903aea 100644 --- a/libs/kotaemon/kotaemon/loaders/excel_loader.py +++ b/libs/kotaemon/kotaemon/loaders/excel_loader.py @@ -6,9 +6,10 @@ Pandas parser for .xlsx files. from pathlib import Path from typing import Any, List, Optional, Union -from kotaemon.base import Document from llama_index.readers.base import BaseReader +from kotaemon.base import Document + class PandasExcelReader(BaseReader): r"""Pandas-based CSV parser. diff --git a/libs/kotaemon/kotaemon/loaders/mathpix_loader.py b/libs/kotaemon/kotaemon/loaders/mathpix_loader.py index 1f5b602..19f1001 100644 --- a/libs/kotaemon/kotaemon/loaders/mathpix_loader.py +++ b/libs/kotaemon/kotaemon/loaders/mathpix_loader.py @@ -5,10 +5,11 @@ from pathlib import Path from typing import Any, Dict, List, Optional import requests -from kotaemon.base import Document from langchain.utils import get_from_dict_or_env from llama_index.readers.base import BaseReader +from kotaemon.base import Document + from .utils.table import parse_markdown_text_to_tables, strip_special_chars_markdown diff --git a/libs/kotaemon/kotaemon/loaders/ocr_loader.py b/libs/kotaemon/kotaemon/loaders/ocr_loader.py index 608405f..c751338 100644 --- a/libs/kotaemon/kotaemon/loaders/ocr_loader.py +++ b/libs/kotaemon/kotaemon/loaders/ocr_loader.py @@ -3,9 +3,10 @@ from typing import List, Optional from uuid import uuid4 import requests -from kotaemon.base import Document from llama_index.readers.base import BaseReader +from kotaemon.base import Document + from .utils.pdf_ocr import parse_ocr_output, read_pdf_unstructured from .utils.table import strip_special_chars_markdown diff --git a/libs/kotaemon/kotaemon/loaders/unstructured_loader.py b/libs/kotaemon/kotaemon/loaders/unstructured_loader.py index 8568d95..82f3255 100644 --- a/libs/kotaemon/kotaemon/loaders/unstructured_loader.py +++ b/libs/kotaemon/kotaemon/loaders/unstructured_loader.py @@ -12,9 +12,10 @@ pip install xlrd from pathlib import Path from typing import Any, Dict, List, Optional -from kotaemon.base import Document from llama_index.readers.base import BaseReader +from kotaemon.base import Document + class UnstructuredReader(BaseReader): """General unstructured text reader for a variety of files.""" diff --git a/libs/kotaemon/kotaemon/storages/vectorstores/base.py b/libs/kotaemon/kotaemon/storages/vectorstores/base.py index d353886..ba4f3ec 100644 --- a/libs/kotaemon/kotaemon/storages/vectorstores/base.py +++ b/libs/kotaemon/kotaemon/storages/vectorstores/base.py @@ -3,12 +3,13 @@ from __future__ import annotations from abc import ABC, abstractmethod from typing import Any, Optional -from kotaemon.base import DocumentWithEmbedding from llama_index.schema import NodeRelationship, RelatedNodeInfo from llama_index.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.types import VectorStore as LIVectorStore from llama_index.vector_stores.types import VectorStoreQuery +from kotaemon.base import DocumentWithEmbedding + class BaseVectorStore(ABC): @abstractmethod diff --git a/libs/kotaemon/kotaemon/storages/vectorstores/simple_file.py b/libs/kotaemon/kotaemon/storages/vectorstores/simple_file.py index 407d4fc..6f14a34 100644 --- a/libs/kotaemon/kotaemon/storages/vectorstores/simple_file.py +++ b/libs/kotaemon/kotaemon/storages/vectorstores/simple_file.py @@ -3,10 +3,11 @@ from pathlib import Path from typing import Any, Optional, Type import fsspec -from kotaemon.base import DocumentWithEmbedding from llama_index.vector_stores import SimpleVectorStore as LISimpleVectorStore from llama_index.vector_stores.simple import SimpleVectorStoreData +from kotaemon.base import DocumentWithEmbedding + from .base import LlamaIndexVectorStore diff --git a/libs/kotaemon/tests/test_agent.py b/libs/kotaemon/tests/test_agent.py index 49740cd..dad9a33 100644 --- a/libs/kotaemon/tests/test_agent.py +++ b/libs/kotaemon/tests/test_agent.py @@ -1,6 +1,8 @@ from unittest.mock import patch import pytest +from openai.types.chat.chat_completion import ChatCompletion + from kotaemon.agents import ( AgentType, BaseTool, @@ -12,7 +14,6 @@ from kotaemon.agents import ( WikipediaTool, ) from kotaemon.llms import AzureChatOpenAI -from openai.types.chat.chat_completion import ChatCompletion FINAL_RESPONSE_TEXT = "Final Answer: Hello Cinnamon AI!" REWOO_VALID_PLAN = ( diff --git a/libs/kotaemon/tests/test_composite.py b/libs/kotaemon/tests/test_composite.py index 85cb35a..464a456 100644 --- a/libs/kotaemon/tests/test_composite.py +++ b/libs/kotaemon/tests/test_composite.py @@ -1,6 +1,8 @@ from copy import deepcopy import pytest +from openai.types.chat.chat_completion import ChatCompletion + from kotaemon.llms import ( AzureChatOpenAI, BasePromptComponent, @@ -10,7 +12,6 @@ from kotaemon.llms import ( SimpleLinearPipeline, ) from kotaemon.parsers import RegexExtractor -from openai.types.chat.chat_completion import ChatCompletion _openai_chat_completion_response = ChatCompletion.parse_obj( { diff --git a/libs/kotaemon/tests/test_cot.py b/libs/kotaemon/tests/test_cot.py index 58833c1..aef8a69 100644 --- a/libs/kotaemon/tests/test_cot.py +++ b/libs/kotaemon/tests/test_cot.py @@ -1,8 +1,9 @@ from unittest.mock import patch +from openai.types.chat.chat_completion import ChatCompletion + from kotaemon.llms import AzureChatOpenAI from kotaemon.llms.cot import ManualSequentialChainOfThought, Thought -from openai.types.chat.chat_completion import ChatCompletion _openai_chat_completion_response = [ ChatCompletion.parse_obj( diff --git a/libs/kotaemon/tests/test_docstores.py b/libs/kotaemon/tests/test_docstores.py index af4834c..90bae43 100644 --- a/libs/kotaemon/tests/test_docstores.py +++ b/libs/kotaemon/tests/test_docstores.py @@ -3,6 +3,7 @@ from unittest.mock import patch import pytest from elastic_transport import ApiResponseMeta + from kotaemon.base import Document from kotaemon.storages import ( ElasticsearchDocumentStore, diff --git a/libs/kotaemon/tests/test_indexing_retrieval.py b/libs/kotaemon/tests/test_indexing_retrieval.py index 032cbde..7393248 100644 --- a/libs/kotaemon/tests/test_indexing_retrieval.py +++ b/libs/kotaemon/tests/test_indexing_retrieval.py @@ -3,11 +3,12 @@ from pathlib import Path from typing import cast import pytest +from openai.resources.embeddings import Embeddings + from kotaemon.base import Document from kotaemon.embeddings import AzureOpenAIEmbeddings from kotaemon.indices import VectorIndexing, VectorRetrieval from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore -from openai.resources.embeddings import Embeddings with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f: openai_embedding = json.load(f) diff --git a/libs/kotaemon/tests/test_llms_completion_models.py b/libs/kotaemon/tests/test_llms_completion_models.py index ac4794f..a88d3c4 100644 --- a/libs/kotaemon/tests/test_llms_completion_models.py +++ b/libs/kotaemon/tests/test_llms_completion_models.py @@ -9,6 +9,7 @@ try: except ImportError: from langchain.llms import AzureOpenAI as AzureOpenAILC from langchain.llms import OpenAI as OpenAILC + from openai.types.completion import Completion _openai_completion_response = Completion.parse_obj( diff --git a/libs/kotaemon/tests/test_post_processing.py b/libs/kotaemon/tests/test_post_processing.py index 0b0359a..d32808f 100644 --- a/libs/kotaemon/tests/test_post_processing.py +++ b/libs/kotaemon/tests/test_post_processing.py @@ -1,4 +1,5 @@ import pytest + from kotaemon.base import Document from kotaemon.parsers import RegexExtractor diff --git a/libs/kotaemon/tests/test_prompt.py b/libs/kotaemon/tests/test_prompt.py index 6e9e4f9..e0fb0c8 100644 --- a/libs/kotaemon/tests/test_prompt.py +++ b/libs/kotaemon/tests/test_prompt.py @@ -1,4 +1,5 @@ import pytest + from kotaemon.base import Document from kotaemon.llms import BasePromptComponent, PromptTemplate from kotaemon.parsers import RegexExtractor @@ -58,5 +59,5 @@ def test_run(): def test_set_method(): template = PromptTemplate("Hello, {name}!") prompt = BasePromptComponent(template=template) - prompt.set(name="Alice") + prompt.set_value(name="Alice") assert prompt.name == "Alice" diff --git a/libs/kotaemon/tests/test_reader.py b/libs/kotaemon/tests/test_reader.py index f8f6ef3..4231e18 100644 --- a/libs/kotaemon/tests/test_reader.py +++ b/libs/kotaemon/tests/test_reader.py @@ -1,9 +1,10 @@ from pathlib import Path +from langchain.schema import Document as LangchainDocument +from llama_index.node_parser import SimpleNodeParser + from kotaemon.base import Document from kotaemon.loaders import AutoReader, UnstructuredReader -from langchain.schema import Document as LangchainDocument -from llama_index.node_parser import SimpleNodeParser def test_pdf_reader(): diff --git a/libs/kotaemon/tests/test_reranking.py b/libs/kotaemon/tests/test_reranking.py index 953afdb..d4f7be8 100644 --- a/libs/kotaemon/tests/test_reranking.py +++ b/libs/kotaemon/tests/test_reranking.py @@ -1,10 +1,11 @@ from unittest.mock import patch import pytest +from openai.types.chat.chat_completion import ChatCompletion + from kotaemon.base import Document from kotaemon.indices.rankings import LLMReranking from kotaemon.llms import AzureChatOpenAI -from openai.types.chat.chat_completion import ChatCompletion _openai_chat_completion_responses = [ ChatCompletion.parse_obj( diff --git a/libs/kotaemon/tests/test_splitter.py b/libs/kotaemon/tests/test_splitter.py index c87e196..71e63ee 100644 --- a/libs/kotaemon/tests/test_splitter.py +++ b/libs/kotaemon/tests/test_splitter.py @@ -1,6 +1,7 @@ +from llama_index.schema import NodeRelationship + from kotaemon.base import Document from kotaemon.indices.splitters import TokenSplitter -from llama_index.schema import NodeRelationship source1 = Document( content="The City Hall and Raffles Place MRT stations are paired cross-platform " diff --git a/libs/kotaemon/tests/test_table_reader.py b/libs/kotaemon/tests/test_table_reader.py index 88f56fb..b723778 100644 --- a/libs/kotaemon/tests/test_table_reader.py +++ b/libs/kotaemon/tests/test_table_reader.py @@ -2,6 +2,7 @@ import json from pathlib import Path import pytest + from kotaemon.loaders import MathpixPDFReader, OCRReader, PandasExcelReader input_file = Path(__file__).parent / "resources" / "table.pdf" diff --git a/libs/kotaemon/tests/test_telemetry.py b/libs/kotaemon/tests/test_telemetry.py index e5528ab..942b0ef 100644 --- a/libs/kotaemon/tests/test_telemetry.py +++ b/libs/kotaemon/tests/test_telemetry.py @@ -51,6 +51,7 @@ def test_disable_telemetry_import_haystack_after_kotaemon(): import os import haystack.telemetry + import kotaemon # noqa: F401 assert haystack.telemetry.telemetry is None diff --git a/libs/kotaemon/tests/test_template.py b/libs/kotaemon/tests/test_template.py index 917e494..cf7ad46 100644 --- a/libs/kotaemon/tests/test_template.py +++ b/libs/kotaemon/tests/test_template.py @@ -1,4 +1,5 @@ import pytest + from kotaemon.llms import PromptTemplate diff --git a/libs/kotaemon/tests/test_tools.py b/libs/kotaemon/tests/test_tools.py index 58e519a..b3ee6c7 100644 --- a/libs/kotaemon/tests/test_tools.py +++ b/libs/kotaemon/tests/test_tools.py @@ -2,12 +2,13 @@ import json from pathlib import Path import pytest +from openai.resources.embeddings import Embeddings + from kotaemon.agents.tools import ComponentTool, GoogleSearchTool, WikipediaTool from kotaemon.base import Document from kotaemon.embeddings import AzureOpenAIEmbeddings from kotaemon.indices.vectorindex import VectorIndexing, VectorRetrieval from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore -from openai.resources.embeddings import Embeddings with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f: openai_embedding = json.load(f) diff --git a/libs/ktem/khapptests/test_qa.py b/libs/ktem/khapptests/test_qa.py index 645d9e8..a3993ee 100644 --- a/libs/ktem/khapptests/test_qa.py +++ b/libs/ktem/khapptests/test_qa.py @@ -4,10 +4,11 @@ from unittest.mock import patch import pytest from index import ReaderIndexingPipeline -from kotaemon.llms import AzureChatOpenAI from openai.resources.embeddings import Embeddings from openai.types.chat.chat_completion import ChatCompletion +from kotaemon.llms import AzureChatOpenAI + with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f: openai_embedding = json.load(f) diff --git a/libs/ktem/ktem/components.py b/libs/ktem/ktem/components.py index 647dce5..e225d32 100644 --- a/libs/ktem/ktem/components.py +++ b/libs/ktem/ktem/components.py @@ -3,11 +3,12 @@ import logging from functools import cache from pathlib import Path -from kotaemon.base import BaseComponent -from kotaemon.storages import BaseDocumentStore, BaseVectorStore from theflow.settings import settings from theflow.utils.modules import deserialize +from kotaemon.base import BaseComponent +from kotaemon.storages import BaseDocumentStore, BaseVectorStore + logger = logging.getLogger(__name__) diff --git a/libs/ktem/ktem/indexing/file.py b/libs/ktem/ktem/indexing/file.py index fc5ef1a..2019660 100644 --- a/libs/ktem/ktem/indexing/file.py +++ b/libs/ktem/ktem/indexing/file.py @@ -17,10 +17,6 @@ from ktem.components import ( from ktem.db.models import Index, Source, SourceTargetRelation, engine from ktem.indexing.base import BaseIndexing, BaseRetriever from ktem.indexing.exceptions import FileExistsError -from kotaemon.base import RetrievedDocument -from kotaemon.indices import VectorIndexing, VectorRetrieval -from kotaemon.indices.ingests import DocumentIngestor -from kotaemon.indices.rankings import BaseReranking, CohereReranking, LLMReranking from llama_index.vector_stores import ( FilterCondition, FilterOperator, @@ -31,6 +27,11 @@ from llama_index.vector_stores.types import VectorStoreQueryMode from sqlmodel import Session, select from theflow.settings import settings +from kotaemon.base import RetrievedDocument +from kotaemon.indices import VectorIndexing, VectorRetrieval +from kotaemon.indices.ingests import DocumentIngestor +from kotaemon.indices.rankings import BaseReranking, CohereReranking, LLMReranking + USER_SETTINGS = { "index_parser": { "name": "Index parser", diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py index 4b2422d..67ae455 100644 --- a/libs/ktem/ktem/reasoning/simple.py +++ b/libs/ktem/ktem/reasoning/simple.py @@ -6,6 +6,7 @@ from functools import partial import tiktoken from ktem.components import llms from ktem.indexing.base import BaseRetriever + from kotaemon.base import ( BaseComponent, Document, diff --git a/mkdocs.yml b/mkdocs.yml index f9997b8..9ad7b40 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -60,7 +60,7 @@ plugins: - "!^_" members_order: source separate_signature: true - paths: [libs/kotaemon] + paths: [libs/kotaemon/kotaemon] - git-revision-date-localized: enable_creation_date: true type: timeago diff --git a/pyproject.toml b/pyproject.toml index 6500548..3b1cb51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,3 +4,6 @@ skip = "*.js,*.css,*.map" ignore-words-list = "llm,fo" quiet-level = 3 check-filenames = "" + +[tool.isort] +known_first_party = ["kotaemon"]