Update docs (#106)
This commit is contained in:
@@ -5,9 +5,10 @@ from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Literal, NamedTuple, Optional, Union
|
||||
|
||||
from kotaemon.base import LLMInterface
|
||||
from pydantic import Extra
|
||||
|
||||
from kotaemon.base import LLMInterface
|
||||
|
||||
|
||||
def check_log():
|
||||
"""
|
||||
|
@@ -1,10 +1,11 @@
|
||||
from typing import List, Optional
|
||||
|
||||
from kotaemon.llms import LLM, ChatLLM
|
||||
from langchain.agents import AgentType as LCAgentType
|
||||
from langchain.agents import initialize_agent
|
||||
from langchain.agents.agent import AgentExecutor as LCAgentExecutor
|
||||
|
||||
from kotaemon.llms import LLM, ChatLLM
|
||||
|
||||
from .base import BaseAgent
|
||||
from .io import AgentOutput, AgentType
|
||||
from .tools import BaseTool
|
||||
|
@@ -1,9 +1,10 @@
|
||||
from typing import Any, Callable, Dict, Optional, Tuple, Type, Union
|
||||
|
||||
from kotaemon.base import BaseComponent
|
||||
from langchain.agents import Tool as LCTool
|
||||
from pydantic import BaseModel
|
||||
|
||||
from kotaemon.base import BaseComponent
|
||||
|
||||
|
||||
class ToolException(Exception):
|
||||
"""An optional exception that tool throws when execution error occurs.
|
||||
|
@@ -1,8 +1,9 @@
|
||||
from typing import AnyStr, Optional, Type
|
||||
|
||||
from kotaemon.llms import BaseLLM
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from kotaemon.llms import BaseLLM
|
||||
|
||||
from .base import BaseTool, ToolException
|
||||
|
||||
|
||||
|
@@ -1,8 +1,9 @@
|
||||
from typing import Any, AnyStr, Optional, Type, Union
|
||||
|
||||
from kotaemon.base import Document
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from kotaemon.base import Document
|
||||
|
||||
from .base import BaseTool
|
||||
|
||||
|
||||
|
@@ -1,9 +1,10 @@
|
||||
from abc import abstractmethod
|
||||
from typing import Iterator, Optional
|
||||
|
||||
from kotaemon.base.schema import Document
|
||||
from theflow import Function, Node, Param, lazy
|
||||
|
||||
from kotaemon.base.schema import Document
|
||||
|
||||
|
||||
class BaseComponent(Function):
|
||||
"""A component is a class that can be used to compose a pipeline.
|
||||
|
@@ -1,9 +1,10 @@
|
||||
from abc import abstractmethod
|
||||
from typing import List, Optional
|
||||
|
||||
from theflow import SessionFunction
|
||||
|
||||
from kotaemon.base import BaseComponent, LLMInterface
|
||||
from kotaemon.base.schema import AIMessage, BaseMessage, HumanMessage, SystemMessage
|
||||
from theflow import SessionFunction
|
||||
|
||||
|
||||
class BaseChatBot(BaseComponent):
|
||||
|
@@ -36,9 +36,10 @@ def export(export_path, output):
|
||||
"""Export a pipeline to a config file"""
|
||||
import sys
|
||||
|
||||
from kotaemon.contribs.promptui.config import export_pipeline_to_config
|
||||
from theflow.utils.modules import import_dotted_string
|
||||
|
||||
from kotaemon.contribs.promptui.config import export_pipeline_to_config
|
||||
|
||||
sys.path.append(os.getcwd())
|
||||
cls = import_dotted_string(export_path, safe=False)
|
||||
export_pipeline_to_config(cls, output)
|
||||
|
@@ -4,6 +4,7 @@ from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Type, Union
|
||||
|
||||
import yaml
|
||||
|
||||
from kotaemon.base import BaseComponent
|
||||
from kotaemon.chatbot import BaseChatBot
|
||||
|
||||
|
@@ -6,10 +6,11 @@ from typing import Any, Dict, List, Type, Union
|
||||
|
||||
import pandas as pd
|
||||
import yaml
|
||||
from kotaemon.base import BaseComponent
|
||||
from theflow.storage import storage
|
||||
from theflow.utils.modules import import_dotted_string
|
||||
|
||||
from kotaemon.base import BaseComponent
|
||||
|
||||
from .logs import ResultLog
|
||||
|
||||
|
||||
|
@@ -3,11 +3,12 @@ from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import gradio as gr
|
||||
from theflow.storage import storage
|
||||
|
||||
from kotaemon.chatbot import ChatConversation
|
||||
from kotaemon.contribs.promptui.base import get_component
|
||||
from kotaemon.contribs.promptui.export import export
|
||||
from kotaemon.contribs.promptui.ui.blocks import ChatBlock
|
||||
from theflow.storage import storage
|
||||
|
||||
from ..logs import ResultLog
|
||||
|
||||
|
@@ -6,9 +6,10 @@ from typing import Any, Dict
|
||||
|
||||
import gradio as gr
|
||||
import pandas as pd
|
||||
from theflow.storage import storage
|
||||
|
||||
from kotaemon.contribs.promptui.base import get_component
|
||||
from kotaemon.contribs.promptui.export import export
|
||||
from theflow.storage import storage
|
||||
|
||||
from ..logs import ResultLog
|
||||
|
||||
|
@@ -3,9 +3,10 @@ from __future__ import annotations
|
||||
from abc import abstractmethod
|
||||
from typing import Any, Type
|
||||
|
||||
from kotaemon.base import BaseComponent, Document, RetrievedDocument
|
||||
from llama_index.node_parser.interface import NodeParser
|
||||
|
||||
from kotaemon.base import BaseComponent, Document, RetrievedDocument
|
||||
|
||||
|
||||
class DocTransformer(BaseComponent):
|
||||
"""This is a base class for document transformers
|
||||
|
@@ -1,5 +1,7 @@
|
||||
from pathlib import Path
|
||||
|
||||
from llama_index.readers.base import BaseReader
|
||||
|
||||
from kotaemon.base import BaseComponent, Document, Param
|
||||
from kotaemon.indices.extractors import BaseDocParser
|
||||
from kotaemon.indices.splitters import BaseSplitter, TokenSplitter
|
||||
@@ -11,7 +13,6 @@ from kotaemon.loaders import (
|
||||
PandasExcelReader,
|
||||
UnstructuredReader,
|
||||
)
|
||||
from llama_index.readers.base import BaseReader
|
||||
|
||||
|
||||
class DocumentIngestor(BaseComponent):
|
||||
|
@@ -1,9 +1,10 @@
|
||||
from typing import Iterator, List
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from kotaemon.base import BaseComponent
|
||||
from kotaemon.base.schema import HumanMessage, SystemMessage
|
||||
from kotaemon.llms import BaseLLM
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class FactWithEvidence(BaseModel):
|
||||
|
@@ -2,9 +2,10 @@ from __future__ import annotations
|
||||
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from langchain.output_parsers.boolean import BooleanOutputParser
|
||||
|
||||
from kotaemon.base import Document
|
||||
from kotaemon.llms import BaseLLM, PromptTemplate
|
||||
from langchain.output_parsers.boolean import BooleanOutputParser
|
||||
|
||||
from .base import BaseReranking
|
||||
|
||||
|
@@ -1,6 +1,7 @@
|
||||
from kotaemon.base import BaseComponent
|
||||
from langchain_core.language_models.base import BaseLanguageModel
|
||||
|
||||
from kotaemon.base import BaseComponent
|
||||
|
||||
|
||||
class BaseLLM(BaseComponent):
|
||||
def to_langchain_format(self) -> BaseLanguageModel:
|
||||
|
@@ -156,6 +156,7 @@ class GatedBranchingPipeline(SimpleBranchingPipeline):
|
||||
|
||||
if __name__ == "__main__":
|
||||
import dotenv
|
||||
|
||||
from kotaemon.llms import AzureChatOpenAI, BasePromptComponent
|
||||
from kotaemon.parsers import RegexExtractor
|
||||
|
||||
|
@@ -1,9 +1,10 @@
|
||||
from copy import deepcopy
|
||||
from typing import Callable, List
|
||||
|
||||
from kotaemon.base import BaseComponent, Document
|
||||
from theflow import Function, Node, Param
|
||||
|
||||
from kotaemon.base import BaseComponent, Document
|
||||
|
||||
from .chats import AzureChatOpenAI
|
||||
from .completions import LLM
|
||||
from .prompts import BasePromptComponent
|
||||
@@ -84,7 +85,7 @@ class Thought(BaseComponent):
|
||||
@Node.auto(depends_on="prompt")
|
||||
def prompt_template(self):
|
||||
"""Automatically wrap around param prompt. Can ignore"""
|
||||
return BasePromptComponent(self.prompt)
|
||||
return BasePromptComponent(template=self.prompt)
|
||||
|
||||
def run(self, **kwargs) -> Document:
|
||||
"""Run the chain of thought"""
|
||||
|
@@ -1,4 +1,6 @@
|
||||
from typing import Callable, Union
|
||||
from typing import Callable
|
||||
|
||||
from theflow import Param
|
||||
|
||||
from kotaemon.base import BaseComponent, Document
|
||||
|
||||
@@ -19,14 +21,18 @@ class BasePromptComponent(BaseComponent):
|
||||
middleware_switches = {"theflow.middleware.CachingMiddleware": False}
|
||||
allow_extra = True
|
||||
|
||||
def __init__(self, template: Union[str, PromptTemplate], **kwargs):
|
||||
super().__init__()
|
||||
self.template = (
|
||||
template
|
||||
if isinstance(template, PromptTemplate)
|
||||
else PromptTemplate(template)
|
||||
template: str | PromptTemplate
|
||||
|
||||
@Param.auto(depends_on="template")
|
||||
def template__(self):
|
||||
return (
|
||||
self.template
|
||||
if isinstance(self.template, PromptTemplate)
|
||||
else PromptTemplate(self.template)
|
||||
)
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.__set(**kwargs)
|
||||
|
||||
def __check_redundant_kwargs(self, **kwargs):
|
||||
@@ -42,7 +48,7 @@ class BasePromptComponent(BaseComponent):
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
self.template.check_redundant_kwargs(**kwargs)
|
||||
self.template__.check_redundant_kwargs(**kwargs)
|
||||
|
||||
def __check_unset_placeholders(self):
|
||||
"""
|
||||
@@ -58,7 +64,7 @@ class BasePromptComponent(BaseComponent):
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
self.template.check_missing_kwargs(**self.__dict__)
|
||||
self.template__.check_missing_kwargs(**self.__dict__)
|
||||
|
||||
def __validate_value_type(self, **kwargs):
|
||||
"""
|
||||
@@ -76,6 +82,8 @@ class BasePromptComponent(BaseComponent):
|
||||
"""
|
||||
type_error = []
|
||||
for k, v in kwargs.items():
|
||||
if k.startswith("template"):
|
||||
continue
|
||||
if not isinstance(v, (str, int, Document, Callable)): # type: ignore
|
||||
type_error.append((k, type(v)))
|
||||
|
||||
@@ -122,7 +130,7 @@ class BasePromptComponent(BaseComponent):
|
||||
)
|
||||
|
||||
kwargs = {}
|
||||
for k in self.template.placeholders:
|
||||
for k in self.template__.placeholders:
|
||||
v = getattr(self, k)
|
||||
|
||||
# if get a callable, execute to get its output
|
||||
@@ -141,7 +149,7 @@ class BasePromptComponent(BaseComponent):
|
||||
|
||||
return kwargs
|
||||
|
||||
def set(self, **kwargs):
|
||||
def set_value(self, **kwargs):
|
||||
"""
|
||||
Similar to `__set` but for external use.
|
||||
|
||||
@@ -172,7 +180,7 @@ class BasePromptComponent(BaseComponent):
|
||||
self.__check_unset_placeholders()
|
||||
prepared_kwargs = self.__prepare_value()
|
||||
|
||||
text = self.template.populate(**prepared_kwargs)
|
||||
text = self.template__.populate(**prepared_kwargs)
|
||||
return Document(text=text, metadata={"origin": "PromptComponent"})
|
||||
|
||||
def flow(self):
|
||||
|
@@ -1,10 +1,11 @@
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Type, Union
|
||||
|
||||
from kotaemon.base import BaseComponent, Document
|
||||
from llama_index import SimpleDirectoryReader, download_loader
|
||||
from llama_index.readers.base import BaseReader
|
||||
|
||||
from kotaemon.base import BaseComponent, Document
|
||||
|
||||
|
||||
class AutoReader(BaseComponent):
|
||||
"""General auto reader for a variety of files. (based on llama-hub)"""
|
||||
|
@@ -6,9 +6,10 @@ Pandas parser for .xlsx files.
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Optional, Union
|
||||
|
||||
from kotaemon.base import Document
|
||||
from llama_index.readers.base import BaseReader
|
||||
|
||||
from kotaemon.base import Document
|
||||
|
||||
|
||||
class PandasExcelReader(BaseReader):
|
||||
r"""Pandas-based CSV parser.
|
||||
|
@@ -5,10 +5,11 @@ from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
from kotaemon.base import Document
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
from llama_index.readers.base import BaseReader
|
||||
|
||||
from kotaemon.base import Document
|
||||
|
||||
from .utils.table import parse_markdown_text_to_tables, strip_special_chars_markdown
|
||||
|
||||
|
||||
|
@@ -3,9 +3,10 @@ from typing import List, Optional
|
||||
from uuid import uuid4
|
||||
|
||||
import requests
|
||||
from kotaemon.base import Document
|
||||
from llama_index.readers.base import BaseReader
|
||||
|
||||
from kotaemon.base import Document
|
||||
|
||||
from .utils.pdf_ocr import parse_ocr_output, read_pdf_unstructured
|
||||
from .utils.table import strip_special_chars_markdown
|
||||
|
||||
|
@@ -12,9 +12,10 @@ pip install xlrd
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from kotaemon.base import Document
|
||||
from llama_index.readers.base import BaseReader
|
||||
|
||||
from kotaemon.base import Document
|
||||
|
||||
|
||||
class UnstructuredReader(BaseReader):
|
||||
"""General unstructured text reader for a variety of files."""
|
||||
|
@@ -3,12 +3,13 @@ from __future__ import annotations
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Optional
|
||||
|
||||
from kotaemon.base import DocumentWithEmbedding
|
||||
from llama_index.schema import NodeRelationship, RelatedNodeInfo
|
||||
from llama_index.vector_stores.types import BasePydanticVectorStore
|
||||
from llama_index.vector_stores.types import VectorStore as LIVectorStore
|
||||
from llama_index.vector_stores.types import VectorStoreQuery
|
||||
|
||||
from kotaemon.base import DocumentWithEmbedding
|
||||
|
||||
|
||||
class BaseVectorStore(ABC):
|
||||
@abstractmethod
|
||||
|
@@ -3,10 +3,11 @@ from pathlib import Path
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
import fsspec
|
||||
from kotaemon.base import DocumentWithEmbedding
|
||||
from llama_index.vector_stores import SimpleVectorStore as LISimpleVectorStore
|
||||
from llama_index.vector_stores.simple import SimpleVectorStoreData
|
||||
|
||||
from kotaemon.base import DocumentWithEmbedding
|
||||
|
||||
from .base import LlamaIndexVectorStore
|
||||
|
||||
|
||||
|
@@ -1,6 +1,8 @@
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from openai.types.chat.chat_completion import ChatCompletion
|
||||
|
||||
from kotaemon.agents import (
|
||||
AgentType,
|
||||
BaseTool,
|
||||
@@ -12,7 +14,6 @@ from kotaemon.agents import (
|
||||
WikipediaTool,
|
||||
)
|
||||
from kotaemon.llms import AzureChatOpenAI
|
||||
from openai.types.chat.chat_completion import ChatCompletion
|
||||
|
||||
FINAL_RESPONSE_TEXT = "Final Answer: Hello Cinnamon AI!"
|
||||
REWOO_VALID_PLAN = (
|
||||
|
@@ -1,6 +1,8 @@
|
||||
from copy import deepcopy
|
||||
|
||||
import pytest
|
||||
from openai.types.chat.chat_completion import ChatCompletion
|
||||
|
||||
from kotaemon.llms import (
|
||||
AzureChatOpenAI,
|
||||
BasePromptComponent,
|
||||
@@ -10,7 +12,6 @@ from kotaemon.llms import (
|
||||
SimpleLinearPipeline,
|
||||
)
|
||||
from kotaemon.parsers import RegexExtractor
|
||||
from openai.types.chat.chat_completion import ChatCompletion
|
||||
|
||||
_openai_chat_completion_response = ChatCompletion.parse_obj(
|
||||
{
|
||||
|
@@ -1,8 +1,9 @@
|
||||
from unittest.mock import patch
|
||||
|
||||
from openai.types.chat.chat_completion import ChatCompletion
|
||||
|
||||
from kotaemon.llms import AzureChatOpenAI
|
||||
from kotaemon.llms.cot import ManualSequentialChainOfThought, Thought
|
||||
from openai.types.chat.chat_completion import ChatCompletion
|
||||
|
||||
_openai_chat_completion_response = [
|
||||
ChatCompletion.parse_obj(
|
||||
|
@@ -3,6 +3,7 @@ from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from elastic_transport import ApiResponseMeta
|
||||
|
||||
from kotaemon.base import Document
|
||||
from kotaemon.storages import (
|
||||
ElasticsearchDocumentStore,
|
||||
|
@@ -3,11 +3,12 @@ from pathlib import Path
|
||||
from typing import cast
|
||||
|
||||
import pytest
|
||||
from openai.resources.embeddings import Embeddings
|
||||
|
||||
from kotaemon.base import Document
|
||||
from kotaemon.embeddings import AzureOpenAIEmbeddings
|
||||
from kotaemon.indices import VectorIndexing, VectorRetrieval
|
||||
from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore
|
||||
from openai.resources.embeddings import Embeddings
|
||||
|
||||
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
|
||||
openai_embedding = json.load(f)
|
||||
|
@@ -9,6 +9,7 @@ try:
|
||||
except ImportError:
|
||||
from langchain.llms import AzureOpenAI as AzureOpenAILC
|
||||
from langchain.llms import OpenAI as OpenAILC
|
||||
|
||||
from openai.types.completion import Completion
|
||||
|
||||
_openai_completion_response = Completion.parse_obj(
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import pytest
|
||||
|
||||
from kotaemon.base import Document
|
||||
from kotaemon.parsers import RegexExtractor
|
||||
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import pytest
|
||||
|
||||
from kotaemon.base import Document
|
||||
from kotaemon.llms import BasePromptComponent, PromptTemplate
|
||||
from kotaemon.parsers import RegexExtractor
|
||||
@@ -58,5 +59,5 @@ def test_run():
|
||||
def test_set_method():
|
||||
template = PromptTemplate("Hello, {name}!")
|
||||
prompt = BasePromptComponent(template=template)
|
||||
prompt.set(name="Alice")
|
||||
prompt.set_value(name="Alice")
|
||||
assert prompt.name == "Alice"
|
||||
|
@@ -1,9 +1,10 @@
|
||||
from pathlib import Path
|
||||
|
||||
from langchain.schema import Document as LangchainDocument
|
||||
from llama_index.node_parser import SimpleNodeParser
|
||||
|
||||
from kotaemon.base import Document
|
||||
from kotaemon.loaders import AutoReader, UnstructuredReader
|
||||
from langchain.schema import Document as LangchainDocument
|
||||
from llama_index.node_parser import SimpleNodeParser
|
||||
|
||||
|
||||
def test_pdf_reader():
|
||||
|
@@ -1,10 +1,11 @@
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from openai.types.chat.chat_completion import ChatCompletion
|
||||
|
||||
from kotaemon.base import Document
|
||||
from kotaemon.indices.rankings import LLMReranking
|
||||
from kotaemon.llms import AzureChatOpenAI
|
||||
from openai.types.chat.chat_completion import ChatCompletion
|
||||
|
||||
_openai_chat_completion_responses = [
|
||||
ChatCompletion.parse_obj(
|
||||
|
@@ -1,6 +1,7 @@
|
||||
from llama_index.schema import NodeRelationship
|
||||
|
||||
from kotaemon.base import Document
|
||||
from kotaemon.indices.splitters import TokenSplitter
|
||||
from llama_index.schema import NodeRelationship
|
||||
|
||||
source1 = Document(
|
||||
content="The City Hall and Raffles Place MRT stations are paired cross-platform "
|
||||
|
@@ -2,6 +2,7 @@ import json
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from kotaemon.loaders import MathpixPDFReader, OCRReader, PandasExcelReader
|
||||
|
||||
input_file = Path(__file__).parent / "resources" / "table.pdf"
|
||||
|
@@ -51,6 +51,7 @@ def test_disable_telemetry_import_haystack_after_kotaemon():
|
||||
import os
|
||||
|
||||
import haystack.telemetry
|
||||
|
||||
import kotaemon # noqa: F401
|
||||
|
||||
assert haystack.telemetry.telemetry is None
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import pytest
|
||||
|
||||
from kotaemon.llms import PromptTemplate
|
||||
|
||||
|
||||
|
@@ -2,12 +2,13 @@ import json
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from openai.resources.embeddings import Embeddings
|
||||
|
||||
from kotaemon.agents.tools import ComponentTool, GoogleSearchTool, WikipediaTool
|
||||
from kotaemon.base import Document
|
||||
from kotaemon.embeddings import AzureOpenAIEmbeddings
|
||||
from kotaemon.indices.vectorindex import VectorIndexing, VectorRetrieval
|
||||
from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore
|
||||
from openai.resources.embeddings import Embeddings
|
||||
|
||||
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
|
||||
openai_embedding = json.load(f)
|
||||
|
Reference in New Issue
Block a user