diff --git a/knowledgehub/base.py b/knowledgehub/base.py index a011af8..db7fd88 100644 --- a/knowledgehub/base.py +++ b/knowledgehub/base.py @@ -8,12 +8,13 @@ class BaseComponent(Compose): A component is a class that can be used to compose a pipeline. To use the component, you should implement the following methods: - - run_raw: run on raw input - - run_batch_raw: run on batch of raw input - - run_document: run on document - - run_batch_document: run on batch of documents - - is_document: check if input is document - - is_batch: check if input is batch + + - run_raw: run on raw input + - run_batch_raw: run on batch of raw input + - run_document: run on document + - run_batch_document: run on batch of documents + - is_document: check if input is document + - is_batch: check if input is batch """ inflow = None diff --git a/knowledgehub/cli.py b/knowledgehub/cli.py index 2b7afca..67b534d 100644 --- a/knowledgehub/cli.py +++ b/knowledgehub/cli.py @@ -52,6 +52,29 @@ def run(run_path): check_config_format(run_path) +@main.command() +@click.argument("module", required=True) +@click.option( + "--output", default="docs.md", required=False, help="The output markdown file" +) +@click.option( + "--separation-level", required=False, default=1, help="Organize markdown layout" +) +def makedoc(module, output, separation_level): + """Make documentation for module `module` + + Example: + + \b + # Make component documentation for kotaemon library + $ kh makedoc kotaemon + """ + from kotaemon.contribs.docs import make_doc + + make_doc(module, output, separation_level) + print(f"Documentation exported to {output}") + + @main.command() @click.option( "--template", diff --git a/knowledgehub/contribs/docs.py b/knowledgehub/contribs/docs.py new file mode 100644 index 0000000..59dff61 --- /dev/null +++ b/knowledgehub/contribs/docs.py @@ -0,0 +1,66 @@ +import inspect +from collections import defaultdict + +from theflow.utils.documentation import get_compose_documentation_from_module + + +def from_definition_to_markdown(definition: dict) -> str: + """From definition to markdown""" + + # Handle params + params = " N/A\n" + if definition["params"]: + params = "\n| Name | Description | Type | Default |\n" + params += "| --- | --- | --- | --- |\n" + for name, p in definition["params"].items(): + type_ = p["type"].__name__ if inspect.isclass(p["type"]) else p["type"] + params += f"| {name} | {p['desc']} | {type_} | {p['default']} |\n" + + # Handle nodes + nodes = " N/A\n" + if definition["nodes"]: + nodes = "\n| Name | Description | Type | Input | Output |\n" + nodes += "| --- | --- | --- | --- | --- |\n" + for name, n in definition["nodes"].items(): + type_ = n["type"].__name__ if inspect.isclass(n["type"]) else str(n["type"]) + input_ = ( + n["input"].__name__ if inspect.isclass(n["input"]) else str(n["input"]) + ) + output_ = ( + n["output"].__name__ + if inspect.isclass(n["output"]) + else str(n["output"]) + ) + nodes += f"|{name}|{n['desc']}|{type_}|{input_}|{output_}|\n" + + description = inspect.cleandoc(definition["desc"]) + return f"{description}\n\n_**Params:**_{params}\n_**Nodes:**_{nodes}" + + +def make_doc(module: str, output: str, separation_level: int): + """Run exporting from compose to markdown + + Args: + module (str): module name + output_path (str): output path to save + separation_level (int): level of separation + """ + documentation = sorted( + get_compose_documentation_from_module(module).items(), key=lambda x: x[0] + ) + + entries = defaultdict(list) + + for name, definition in documentation: + section = name.split(".")[separation_level].capitalize() + cls_name = name.split(".")[-1] + + markdown = from_definition_to_markdown(definition) + entries[section].append(f"### {cls_name}\n{markdown}") + + final = "\n".join( + [f"## {section}\n" + "\n".join(entries[section]) for section in entries] + ) + + with open(output, "w") as f: + f.write(final) diff --git a/knowledgehub/embeddings/openai.py b/knowledgehub/embeddings/openai.py index 8a7fc7a..fbd6f5f 100644 --- a/knowledgehub/embeddings/openai.py +++ b/knowledgehub/embeddings/openai.py @@ -4,10 +4,20 @@ from .base import LangchainEmbeddings class OpenAIEmbeddings(LangchainEmbeddings): + """OpenAI embeddings. + + This method is wrapped around the Langchain OpenAIEmbeddings class. + """ + _lc_class = LCOpenAIEmbeddings class AzureOpenAIEmbeddings(LangchainEmbeddings): + """Azure OpenAI embeddings. + + This method is wrapped around the Langchain OpenAIEmbeddings class. + """ + _lc_class = LCOpenAIEmbeddings def __init__(self, **params): diff --git a/knowledgehub/pipelines/cot.py b/knowledgehub/pipelines/cot.py index 4a79768..393b9e7 100644 --- a/knowledgehub/pipelines/cot.py +++ b/knowledgehub/pipelines/cot.py @@ -15,7 +15,7 @@ class Thought(BaseComponent): value is the value. - Output: an output dictionary - ##### Usage: + _**Usage:**_ Create and run a thought: @@ -80,6 +80,7 @@ class Thought(BaseComponent): @Node.decorate(depends_on="prompt") def prompt_template(self): + """Automatically wrap around param prompt. Can ignore""" return BasePromptComponent(self.prompt) def run(self, **kwargs) -> dict: @@ -104,7 +105,7 @@ class ManualSequentialChainOfThought(BaseComponent): `kotaemon.pipelines.cot.Thought`. Please refer that section for Thought's detail. This section is about chaining thought together. - ##### Usage: + _**Usage:**_ **Create and run a chain of thought without "+" operator:**