Add new OCRReader with PDF+OCR text merging (#66)
This change speeds up OCR extraction by allowing bypassing OCR for texts that are irrelevant (not in table). --------- Co-authored-by: Nguyen Trung Duc (john) <trungduc1992@gmail.com>
This commit is contained in:
committed by
GitHub
parent
d79b3744cb
commit
4704e2c11a
@@ -2,7 +2,7 @@ import os
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
from theflow import Node, Param
|
||||
from theflow import Node
|
||||
from theflow.utils.modules import ObjectInitDeclaration as _
|
||||
|
||||
from kotaemon.base import BaseComponent
|
||||
@@ -43,8 +43,8 @@ class QuestionAnsweringPipeline(BaseComponent):
|
||||
request_timeout=60,
|
||||
)
|
||||
|
||||
vector_store: Param[InMemoryVectorStore] = Param(_(InMemoryVectorStore))
|
||||
doc_store: Param[InMemoryDocumentStore] = Param(_(InMemoryDocumentStore))
|
||||
vector_store: _[InMemoryVectorStore] = _(InMemoryVectorStore)
|
||||
doc_store: _[InMemoryDocumentStore] = _(InMemoryDocumentStore)
|
||||
|
||||
embedding: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings.withx(
|
||||
model="text-embedding-ada-002",
|
||||
|
Reference in New Issue
Block a user