Refactor the `kotaemon/pipelines` module to `kotaemon/indices`. Create the VectorIndex. Note: currently I place `qa` to be inside `kotaemon/indices` since at the moment we only have `qa` in RAG. At the same time, I think `qa` can be an independent module in `kotaemon/qa`. Since this can be changed later, I still go at the 1st option for now to observe if we can change it later.
22 lines
563 B
Python
22 lines
563 B
Python
from ..base import DocTransformer, LlamaIndexDocTransformerMixin
|
|
|
|
|
|
class BaseSplitter(DocTransformer):
|
|
"""Represent base splitter class"""
|
|
|
|
...
|
|
|
|
|
|
class TokenSplitter(LlamaIndexDocTransformerMixin, BaseSplitter):
|
|
def _get_li_class(self):
|
|
from llama_index.text_splitter import TokenTextSplitter
|
|
|
|
return TokenTextSplitter
|
|
|
|
|
|
class SentenceWindowSplitter(LlamaIndexDocTransformerMixin, BaseSplitter):
|
|
def _get_li_class(self):
|
|
from llama_index.node_parser import SentenceWindowNodeParser
|
|
|
|
return SentenceWindowNodeParser
|