From 5241edbc4696386af3c3bacfff648a7ee80caed7 Mon Sep 17 00:00:00 2001
From: ian_Cin <ian@cinnamon.is>
Date: Wed, 30 Aug 2023 07:22:01 +0700
Subject: [PATCH] [AUR-361] Setup pre-commit, pytest, GitHub actions,
 ssh-secret (#3)

Co-authored-by: trducng <trungduc1992@gmail.com>
---
 .github/workflows/style-check.yaml      |  20 ++++
 .github/workflows/unit-test.yaml        |  32 +++++++
 .gitignore                              |  11 ++-
 .gitsecret/keys/pubring.kbx             | Bin 0 -> 1966 bytes
 .gitsecret/keys/trustdb.gpg             | Bin 0 -> 1200 bytes
 .gitsecret/paths/mapping.cfg            |   1 +
 .pre-commit-config.yaml                 |  49 ++++++++++
 README.md                               | 116 ++++++++++++++++++++++--
 credentials.txt.secret                  | Bin 0 -> 486 bytes
 knowledgehub/llms/base.py               |   5 +-
 knowledgehub/llms/chats/base.py         |  21 ++---
 knowledgehub/llms/completions/base.py   |   8 +-
 knowledgehub/llms/completions/openai.py |   2 +
 knowledgehub/loaders/base.py            |   3 -
 pytest.ini                              |   9 ++
 setup.py                                |   3 +-
 tests/test_llms_chat_models.py          |  18 ++--
 tests/test_llms_completion_models.py    |  14 ++-
 tests/test_telemetry.py                 |  10 +-
 19 files changed, 268 insertions(+), 54 deletions(-)
 create mode 100644 .github/workflows/style-check.yaml
 create mode 100644 .github/workflows/unit-test.yaml
 create mode 100644 .gitsecret/keys/pubring.kbx
 create mode 100644 .gitsecret/keys/trustdb.gpg
 create mode 100644 .gitsecret/paths/mapping.cfg
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 credentials.txt.secret
 create mode 100644 pytest.ini

diff --git a/.github/workflows/style-check.yaml b/.github/workflows/style-check.yaml
new file mode 100644
index 0000000..2047461
--- /dev/null
+++ b/.github/workflows/style-check.yaml
@@ -0,0 +1,20 @@
+name: style-check
+
+on:
+  pull_request:
+    branches: [main]
+  push:
+    branches: [main]
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Clone the repo
+        uses: actions/checkout@v3
+      - name: Setup python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+      - name: run pre-commit
+        uses: pre-commit/action@v3.0.0
diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml
new file mode 100644
index 0000000..5bd66c9
--- /dev/null
+++ b/.github/workflows/unit-test.yaml
@@ -0,0 +1,32 @@
+name: unit-test
+
+on:
+  pull_request:
+    branches: [main]
+  push:
+    branches: [main]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
+    name: unit testing with python ${{ matrix.python-version }}
+    steps:
+      - name: Clone the repo
+        uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+          architecture: x64
+      - name: Display Python version
+        run: python -c "import sys; print(sys.version)"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[dev]
+      - name: Test with pytest
+        run: |
+          pytest
diff --git a/.gitignore b/.gitignore
index 22de1c3..28d4435 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,7 +51,6 @@ flycheck_*.el
 # network security
 /network-security.data
 
-
 ### Linux ###
 
 # temporary files which can be created if a process still has a handle open of a deleted file
@@ -75,7 +74,6 @@ flycheck_*.el
 # Icon must end with two \r
 Icon
 
-
 # Thumbnails
 ._*
 
@@ -386,7 +384,7 @@ pyrightconfig.json
 ### Vim ###
 # Swap
 [._]*.s[a-v][a-z]
-!*.svg  # comment out if you don't need vector files
+!*.svg # comment out if you don't need vector files
 [._]*.sw[a-p]
 [._]s[a-rt-v][a-z]
 [._]ss[a-gi-z]
@@ -451,3 +449,10 @@ $RECYCLE.BIN/
 .theflow/
 
 # End of https://www.toptal.com/developers/gitignore/api/python,linux,macos,windows,vim,emacs,visualstudiocode,pycharm
+
+logs/
+.gitsecret/keys/random_seed
+!*.secret
+credentials.txt
+
+S.gpg-agent*
diff --git a/.gitsecret/keys/pubring.kbx b/.gitsecret/keys/pubring.kbx
new file mode 100644
index 0000000000000000000000000000000000000000..86a2b56937cf8dbed6bdace86a2b411bfa39d47e
GIT binary patch
literal 1966
zcmaizdo<K}AIE>cpD~zW$E60j4Q1Mi6ywQd+=qybn57y^VX$N-Gno*{%$T*5OK9DB
zl3UqvXNZ-ob;*jNT&8j>N{n2hOzU~5XV3GT=j=KAKIi-WzQ3RE=a26>pZ5y@01OBM
zkdw`+0AO2`#aF32YsY_)L^1?^b};||{{%pQD)pYEQBC{7);>0a$9L6#-juB(v)zH&
z>AIRs-E=bmNn;J8V8r|e@#wzc+U>I0&fB&?KpFrb^=+s0`3?cF&*%Sb+dsIzQ_2RD
zV3cr+3Q!v04F%X?e~DVN`_!Xn@&fmU`*{57h!q!`QjvBgS^BZVEsRad^wHs}sr#|c
zg#FxrYP>@9uPtq!!>dOU7~j_){IE5s>@XLg2Et1Z{Y0i1t38Z$*H&t#EGMN|S=_~X
zd0_+hNv7Y0(@L1Q4_K)XHnhgL=8I{Cmix2J((Q_ElGumXdSzptk)(l%sHZE5v50}h
zU@xv^FmOKE-_2_}#`GlWq6|^pX(S^2w=_ag6N*Dm6Zay@sg!h1<6plK%_hPuh0E4g
zs{>4RyEWP?P<-sU?6r?@s`R0}nv;61H*-S@Ps0PLseC2TpyAS^s~S1l^3cswT<deQ
z=Bl*($tx}svIsZ#Yi45uoAG6-I{FjEOGQd9sz$fmuN?~TX3NgGl-lmXh@PdCnB&d8
zHUt+*C#p$NNpAV&Sv<z%J{fmvn@{dJ{ew;I$B0W?YdP0nA8m|2+m|skKcedE|8hUX
zLAoq0hS~psQz_q4O2|*|8>k_^myZ%&FYE>6Kme@Xo?c*R;FmlA^-9NlXc!MB#w^H(
zX5|+|qxn$J)AWKaSp26C`ZCr8a2TclA^;1tChV`WBM$A%7*Y`vmlEp;1jLb2k`fSn
zH$qAh3Bk~i7zn9@kYX%=kdpl~Ze3Jq;8ymc{d9pP{%KeK6f%6t&^gdvyTp4v2tUA!
zp<OzDzggjU++rnXv*Sj+awj%AdidOE<K&CQcWh5*AJOkZ1L~ls7ox(Op09Jjf~h%S
zwLz&b=;;g($y_hOe%l*q=YH}#v4s>yq41?`pT>#u)1etNcpvm5C&wXlH9Vf{c?ReY
z%2O;N>{;mIG}bow%1Z{(-(E&D*9%_*-}QebjHH~^YT$V5MA2uwTTa>XmvmNk4XKgR
zs)|^qw4W<I`xx3>bH>!ldX2;;&ZivQAS-`OS!iplo#i%2@@x1mu+zxjV@!?fbqmNU
zy)2JEtd-a9NX~UI&csa=z!If$9X_qP3H;WkB^vjyPFXr3BhR+a@Z;zI#S(YFDFU4M
z!s-ZlyXWL<{|vfZIZi}bDH~I+ExXP}X?60eP$FHz6!`$EjPDjr_jz^O(bN$4w%It_
zfSmwb?kUPDYb-Goup%-#$#c66S%d4Ft6<^SRd1jO@<Zg=->GEA@2`dfV%uQ=n*R%F
zvsA~pFk(E(ahkiKKj&%zvd@Poi0lp>?~W@iu<2U;F}sz%#*Lh~K-3H7B!8O8O}Blo
z^|bOye+F*9keo1M@j=n*DM#XkK0Qw`DIoWUp1`y#8D=>AdL^ULOd$?eHLF6Ne`f3^
zUQ%*DsIJh_ePkmKJUfZ-t8_eEP=<2X^m;3HBIKu6PfFHJf}K1}^eTcYoB`Yn8{hZw
zPEK0AttD}aFhi|;XGBX?+p;bV1omd-d+Xx3f4JQ>2n<OH-K9xvFB~!d5H54?ez&Ts
zp5!fDad&3T=z71HJ-I=SG|`Y1lUmGrivAYncX_X|w|&xmxHYZLg`lS9LaN(%G#^I9
z8I^mC<`7$^Nfjz0Ip+GyUy!vtsVm=$O=_V%%9&O!vwNfEY(0#LKhzyC+jM1dwVKD!
z2%cn9Q&btCTXaFbViVmhk^Ek8N#*bZxkOLH;I^=DzFG`wK&nKORL$E3P4)5uQ;12`
zMc;WQ>vpO6O;P~77GTl!Y6d<-ngF)Kl(uoh{ta%@UvT?htj6{rw=^GQ<JW>&>s4Nt
zaPNErkHs~0>dk(ly9tgQVLsj~5ao@H9~B5i*6Up)ullesn|t?Aqo-SVc}(|Uzej#N
zQ^s8<*MC@O)F$%tB*-SZii01|co<_jnJ-ssQL9UJvdqwBLS<~Jkt0s4Q#n8<dW4#Q
z)oE#@d-S^ytQRy&%i7`($mGHZIq@RrQ#y}H0n=A*2pQfO(lR4Y?Ac0(x=iHO!{toJ
z0fn}u?sCG<ovyT_H^zrCtFl|NmGep$3pd+b54P2X_1J6(Z?HMjtyUhc=X8h{P&x~K
znHT%Y!VV<$RUw-3I=XFar$$P#QOFYSnB)-kjGjIjSQ4zg)<j4+NExWYzv-0@Zs5f6
zJOdv3=ix0`*}gT}tX`J>R{WVCEg~$Ua?hf9`46V%3B}frUC+0~I%2A71f9Bevq^8*
zbpOiTbd90bkA8_=*A*#j<k!$`Uw#ioeNUI2LdSC7iX|*1^VyTzg^6jN>`M~alMd;~
i_;Aj${quK!VzoM2_gnVvFb=_gm<eHC9X1Ii#r_*Ey<d3%

literal 0
HcmV?d00001

diff --git a/.gitsecret/keys/trustdb.gpg b/.gitsecret/keys/trustdb.gpg
new file mode 100644
index 0000000000000000000000000000000000000000..e599a3baf23ed8e0483913f117cada0450e6d943
GIT binary patch
literal 1200
zcmZQfFGy!*W@Ke#Vqi#l`*Z>WcEHGmT^vJnR2@c;fCWawWi(tsAuyT_C`$(br&|R$

literal 0
HcmV?d00001

diff --git a/.gitsecret/paths/mapping.cfg b/.gitsecret/paths/mapping.cfg
new file mode 100644
index 0000000..576d1d7
--- /dev/null
+++ b/.gitsecret/paths/mapping.cfg
@@ -0,0 +1 @@
+credentials.txt:1e17fa46dd8353b5ded588b32983ac7d800e70fd16bc5831663b9aaefc409011
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..d97508e
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,49 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.3.0
+    hooks:
+      - id: check-yaml
+      - id: check-toml
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+      - id: detect-aws-credentials
+        args: ["--allow-missing-credentials"]
+      - id: detect-private-key
+      - id: check-added-large-files
+  - repo: https://github.com/ambv/black
+    rev: 22.3.0
+    hooks:
+      - id: black
+        language_version: python3
+  - repo: https://github.com/pycqa/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        args: ["--profile", "black"]
+        language_version: python3.10
+  - repo: https://github.com/pycqa/flake8
+    rev: 4.0.1
+    hooks:
+      - id: flake8
+        args: ["--max-line-length", "88"]
+  - repo: https://github.com/myint/autoflake
+    rev: v1.4
+    hooks:
+      - id: autoflake
+        args:
+          [
+            "--in-place",
+            "--remove-unused-variables",
+            "--remove-all-unused-imports",
+            "--ignore-init-module-imports",
+            "--exclude=tests/*",
+          ]
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: v2.7.1
+    hooks:
+      - id: prettier
+        types_or: [markdown, yaml]
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: "v1.5.1"
+    hooks:
+      - id: mypy
diff --git a/README.md b/README.md
index a79c399..19befda 100644
--- a/README.md
+++ b/README.md
@@ -13,18 +13,116 @@ pip install kotaemon@git+ssh://git@github.com/Cinnamon/kotaemon.git
 
 ### Setup
 
+- Create conda environment (suggest 3.10)
+
+  ```shell
+  conda create -n kotaemon python=3.10
+  conda activate kotaemon
+  ```
+
+- Clone the repo
+
+  ```shel
+  git clone git@github.com:Cinnamon/kotaemon.git
+  cd kotaemon
+  ```
+
+- Install all
+
+  ```shell
+  pip install -e ".[dev]"
+  ```
+
+- Pre-commit
+
+  ```shell
+  pre-commit install
+  ```
+
+- Test
+
+  ```shell
+  pytest tests
+  ```
+
+### Credential sharing
+
+This repo uses [ssh-secret](https://sobolevn.me/git-secret/) to share credentials, which internally uses `gpg` to encrypt and decrypt secret files.
+
+#### Install git-secret
+
+Please follow the [official guide](https://sobolevn.me/git-secret/installation) to install git-secret.
+
+#### Gaining access
+
+In order to gain access to the secret files, you must provide your gpg public file to anyone who has access and ask them to ask your key to the keyring. For a quick tutorial on generating your gpg key pair, you can refer to the `Using gpg` section from the [ssh-secret main page](https://sobolevn.me/git-secret/).
+
+#### Decrypt the secret file
+
+The credentials are encrypted in the `credentials.txt.secret` file. To print the decrypted content to stdout, run
+
 ```shell
-# Create conda environment (suggest 3.10)
-conda create -n kotaemon python=3.10
-conda activate kotaemon
-
-# Install all
-pip install -e ".[dev]"
-
-# Test
-pytest tests
+git-secret cat [filename]
 ```
 
+Or to get the decrypted `credentials.txt` file, run
+
+```shell
+git-secret reveal [filename]
+```
+
+#### For Windows users
+
+ssh-secret is currently not available for Windows, thus the easiest way is to use it in WSL (please use the latest version of WSL2). From there you have 2 options:
+
+1. Using the gpg of WSL.
+
+   This is the most straight-forward option since you would use WSL just like any other unix environment. However, the downside is that you have to make WSL your main environment, which means WSL must have write permission on your repo. To achieve this, you must either:
+
+   - Clone and store your repo inside WSL's file system.
+   - Provide WSL with necessary permission on your Windows file system. This can be achieve by setting `automount` options for WSL. To do that, add these content to `/etc/wsl.conf` and then restart your sub-system.
+
+     ```shell
+     [automount]
+     options = "metadata,umask=022,fmask=011"
+     ```
+
+     This enables all permissions for user owner.
+
+2. Using the gpg of Windows but with ssh-secret from WSL.
+
+   For those who use Windows as the main environment, having to switch back and forth between Windows and WSL will be inconvenient. You can instead stay within your Windows environment and apply some tricks to use `ssh-secret` from WSL.
+
+   - Install and setup `gpg` on Windows.
+   - Install `ssh-secret` on WSL.
+   - Make WSL use the `gpg` executable from Windows. This can be done by alias `gpg` to your Windows executable `gpg.exe` file. Add this content to your startup script:
+
+     ```shell
+     # Create ~/bin if it doesn't exist
+     [ ! -d "$HOME/bin" ] && mkdir "$HOME/bin"
+
+     # link windows executable
+     ln -snf "$(which gpg.exe)" "$HOME/bin/gpg"
+
+     # Prepend $HOME/bin to PATH
+     if [[ ":$PATH:" == *":$HOME/bin:"* ]]; then
+         export PATH="$HOME/bin:$PATH"
+     fi
+     ```
+
+   - Now in Windows, you can invoke `ssh-secret` using `wsl ssh-secret`.
+   - Alternatively you can setup alias in CMD to shorten the syntax. Please refer to [this SO answer](https://stackoverflow.com/a/65823225) for the instruction. Some recommended aliases are:
+
+     ```bat
+     @echo off
+
+     :: Commands
+     DOSKEY ls=dir /B $*
+     DOSKEY ll=dir /a $*
+     DOSKEY git-secret=wsl git-secret $*
+     DOSKEY gs=wsl git-secret $*
+     ```
+
 ### Code base structure
 
 - documents: define document
diff --git a/credentials.txt.secret b/credentials.txt.secret
new file mode 100644
index 0000000000000000000000000000000000000000..c686370158b38628976569ae6a8f072860610d65
GIT binary patch
literal 486
zcmV<C0U7><0gMB9>i0uNB-FA23;>D&*3GeMZCqi(x}8P`CmRNzm;!juW-*?#VVg_Q
zO#$f1gXS&F9NYJKwM7M=;z(xB6v&78Xao8$;v-1^V=-c*+FED1U1SJK7sn;on`Awr
zECPp+-ekAfB(byzNdK|b%JYrq*gzWvmLq@*M9?%4UOO~niXk`&KX1Ac1ZA5IuczZ1
zXvUopLxOeAkUPsVkZMSy4CHkXdKMl6Jb-WY1DJ=+YVH_IzEGXW+dN;-cZ)L?mi5wj
z4>#RewmFK)O8lCmo*^Ay$_s-Mft?vaAffxXy6Wrj*@VXFlGHc&{6uTQ6N#Ai$S-Pv
z@+``o+&;7sY>e#~bq?Dd%bV;lNI);Z)s=T2-@nQ(&(PCFi|FV|avXiS3!@Be*3U%=
zL}3KVZ)a-6I3NSjcA3ljRXYYr(=#L960-qk|2@9z2vvk%D*KoGR_P;T00!`$t)rr@
z#OfIlz7o9Z;3alB_>{vEb-qiAh3G7nFwH`qSKf#(3L=PM6WdO9f+5mX0oZf1;HxTN
zI4!k2P1B6aKn>ug{yzNkn3nGfTpA{=e`dAc)gD!^co#O8&#FlAiNMXl-}xl<Ia1@X
cMGct3%>m(=mo<_A4N9|#s=keAvvDqEZpbC<RsaA1

literal 0
HcmV?d00001

diff --git a/knowledgehub/llms/base.py b/knowledgehub/llms/base.py
index 85ea0b9..db09bd9 100644
--- a/knowledgehub/llms/base.py
+++ b/knowledgehub/llms/base.py
@@ -1,15 +1,16 @@
 from dataclasses import dataclass, field
+from typing import List
 
 from ..components import BaseComponent
 
 
 @dataclass
 class LLMInterface:
-    text: list[str]
+    text: List[str]
     completion_tokens: int = -1
     total_tokens: int = -1
     prompt_tokens: int = -1
-    logits: list[list[float]] = field(default_factory=list)
+    logits: List[List[float]] = field(default_factory=list)
 
 
 class PromptTemplate(BaseComponent):
diff --git a/knowledgehub/llms/chats/base.py b/knowledgehub/llms/chats/base.py
index ea1aa26..d181d94 100644
--- a/knowledgehub/llms/chats/base.py
+++ b/knowledgehub/llms/chats/base.py
@@ -1,17 +1,12 @@
-from typing import Type, TypeVar
+from typing import List, Type, TypeVar
 
-from theflow.base import Param
 from langchain.schema.language_model import BaseLanguageModel
-
-from langchain.schema.messages import (
-    BaseMessage,
-    HumanMessage,
-)
+from langchain.schema.messages import BaseMessage, HumanMessage
+from theflow.base import Param
 
 from ...components import BaseComponent
 from ..base import LLMInterface
 
-
 Message = TypeVar("Message", bound=BaseMessage)
 
 
@@ -43,11 +38,11 @@ class LangchainChatLLM(ChatLLM):
         message = HumanMessage(content=text)
         return self.run_document([message])
 
-    def run_batch_raw(self, text: list[str]) -> list[LLMInterface]:
+    def run_batch_raw(self, text: List[str]) -> List[LLMInterface]:
         inputs = [[HumanMessage(content=each)] for each in text]
         return self.run_batch_document(inputs)
 
-    def run_document(self, text: list[Message]) -> LLMInterface:
+    def run_document(self, text: List[Message]) -> LLMInterface:
         pred = self.agent.generate([text])
         return LLMInterface(
             text=[each.text for each in pred.generations[0]],
@@ -57,7 +52,7 @@ class LangchainChatLLM(ChatLLM):
             logits=[],
         )
 
-    def run_batch_document(self, text: list[list[Message]]) -> list[LLMInterface]:
+    def run_batch_document(self, text: List[List[Message]]) -> List[LLMInterface]:
         outputs = []
         for each_text in text:
             outputs.append(self.run_document(each_text))
@@ -66,14 +61,14 @@ class LangchainChatLLM(ChatLLM):
     def is_document(self, text) -> bool:
         if isinstance(text, str):
             return False
-        elif isinstance(text, list) and isinstance(text[0], str):
+        elif isinstance(text, List) and isinstance(text[0], str):
             return False
         return True
 
     def is_batch(self, text) -> bool:
         if isinstance(text, str):
             return False
-        elif isinstance(text, list):
+        elif isinstance(text, List):
             if isinstance(text[0], BaseMessage):
                 return False
         return True
diff --git a/knowledgehub/llms/completions/base.py b/knowledgehub/llms/completions/base.py
index 2409c91..145979e 100644
--- a/knowledgehub/llms/completions/base.py
+++ b/knowledgehub/llms/completions/base.py
@@ -1,7 +1,7 @@
-from typing import Type
+from typing import List, Type
 
-from theflow.base import Param
 from langchain.schema.language_model import BaseLanguageModel
+from theflow.base import Param
 
 from ...components import BaseComponent
 from ..base import LLMInterface
@@ -41,7 +41,7 @@ class LangchainLLM(LLM):
             logits=[],
         )
 
-    def run_batch_raw(self, text: list[str]) -> list[LLMInterface]:
+    def run_batch_raw(self, text: List[str]) -> List[LLMInterface]:
         outputs = []
         for each_text in text:
             outputs.append(self.run_raw(each_text))
@@ -50,7 +50,7 @@ class LangchainLLM(LLM):
     def run_document(self, text: str) -> LLMInterface:
         return self.run_raw(text)
 
-    def run_batch_document(self, text: list[str]) -> list[LLMInterface]:
+    def run_batch_document(self, text: List[str]) -> List[LLMInterface]:
         return self.run_batch_raw(text)
 
     def is_document(self, text) -> bool:
diff --git a/knowledgehub/llms/completions/openai.py b/knowledgehub/llms/completions/openai.py
index a510e27..93a25ee 100644
--- a/knowledgehub/llms/completions/openai.py
+++ b/knowledgehub/llms/completions/openai.py
@@ -5,9 +5,11 @@ from .base import LangchainLLM
 
 class OpenAI(LangchainLLM):
     """Wrapper around Langchain's OpenAI class"""
+
     _lc_class = langchain_llms.OpenAI
 
 
 class AzureOpenAI(LangchainLLM):
     """Wrapper around Langchain's AzureOpenAI class"""
+
     _lc_class = langchain_llms.AzureOpenAI
diff --git a/knowledgehub/loaders/base.py b/knowledgehub/loaders/base.py
index eaae292..f21e2ec 100644
--- a/knowledgehub/loaders/base.py
+++ b/knowledgehub/loaders/base.py
@@ -1,13 +1,10 @@
 class DocumentLoader:
     """Document loader"""
-    pass
 
 
 class TextManipulator:
     """Text manipulation"""
-    pass
 
 
 class DocumentManipulator:
     """Document manipulation"""
-    pass
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..1127b02
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,9 @@
+[pytest]
+minversion = 7.4.0
+testpaths = tests
+addopts = -ra -q
+log_cli=true
+log_level=DEBUG
+log_format = %(asctime)s %(levelname)s %(message)s
+log_date_format = %Y-%m-%d %H:%M:%S
+log_file = logs/pytest-logs.txt
diff --git a/setup.py b/setup.py
index 39ed677..94839d0 100644
--- a/setup.py
+++ b/setup.py
@@ -41,9 +41,8 @@ setuptools.setup(
             "flake8",
             "sphinx",
             "coverage",
-
             # optional dependency needed for test
-            "openai"
+            "openai",
         ],
     },
     entry_points={"console_scripts": ["kh=kotaemon.cli:main"]},
diff --git a/tests/test_llms_chat_models.py b/tests/test_llms_chat_models.py
index aba8e8e..392d54e 100644
--- a/tests/test_llms_chat_models.py
+++ b/tests/test_llms_chat_models.py
@@ -1,15 +1,10 @@
 from unittest.mock import patch
 
 from langchain.chat_models import AzureChatOpenAI as AzureChatOpenAILC
-from langchain.schema.messages import (
-    SystemMessage,
-    HumanMessage,
-    AIMessage,
-)
+from langchain.schema.messages import AIMessage, HumanMessage, SystemMessage
 
-from kotaemon.llms.chats.openai import AzureChatOpenAI
 from kotaemon.llms.base import LLMInterface
-
+from kotaemon.llms.chats.openai import AzureChatOpenAI
 
 _openai_chat_completion_response = {
     "id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
@@ -49,7 +44,9 @@ def test_azureopenai_model(openai_completion):
 
     # test for str input - stream mode
     output = model("hello world")
-    assert isinstance(output, LLMInterface), "Output for single text is not LLMInterface"
+    assert isinstance(
+        output, LLMInterface
+    ), "Output for single text is not LLMInterface"
     openai_completion.assert_called()
 
     # test for list[str] input - batch mode
@@ -67,7 +64,9 @@ def test_azureopenai_model(openai_completion):
     ]
 
     output = model(messages)
-    assert isinstance(output, LLMInterface), "Output for single text is not LLMInterface"
+    assert isinstance(
+        output, LLMInterface
+    ), "Output for single text is not LLMInterface"
     openai_completion.assert_called()
 
     # test for list[list[message]] input - batch mode
@@ -75,4 +74,3 @@ def test_azureopenai_model(openai_completion):
     assert isinstance(output, list), "Output for batch string is not a list"
     assert isinstance(output[0], LLMInterface), "Output for text is not LLMInterface"
     openai_completion.assert_called()
-
diff --git a/tests/test_llms_completion_models.py b/tests/test_llms_completion_models.py
index 99a354e..495e5b4 100644
--- a/tests/test_llms_completion_models.py
+++ b/tests/test_llms_completion_models.py
@@ -1,10 +1,10 @@
 from unittest.mock import patch
 
-from langchain.llms import AzureOpenAI as AzureOpenAILC, OpenAI as OpenAILC
+from langchain.llms import AzureOpenAI as AzureOpenAILC
+from langchain.llms import OpenAI as OpenAILC
 
-from kotaemon.llms.completions.openai import AzureOpenAI, OpenAI
 from kotaemon.llms.base import LLMInterface
-
+from kotaemon.llms.completions.openai import AzureOpenAI, OpenAI
 
 _openai_completion_response = {
     "id": "cmpl-7qyNoIo6gRSCJR0hi8o3ZKBH4RkJ0",
@@ -41,7 +41,9 @@ def test_azureopenai_model(openai_completion):
     openai_completion.assert_called()
 
     output = model("hello world")
-    assert isinstance(output, LLMInterface), "Output for single text is not LLMInterface"
+    assert isinstance(
+        output, LLMInterface
+    ), "Output for single text is not LLMInterface"
 
 
 @patch(
@@ -67,4 +69,6 @@ def test_openai_model(openai_completion):
     openai_completion.assert_called()
 
     output = model("hello world")
-    assert isinstance(output, LLMInterface), "Output for single text is not LLMInterface"
+    assert isinstance(
+        output, LLMInterface
+    ), "Output for single text is not LLMInterface"
diff --git a/tests/test_telemetry.py b/tests/test_telemetry.py
index 59b184f..3b1e96c 100644
--- a/tests/test_telemetry.py
+++ b/tests/test_telemetry.py
@@ -29,11 +29,14 @@ def clean_artifacts_for_telemetry():
 def test_disable_telemetry_import_haystack_first():
     """Test that telemetry is disabled when kotaemon lib is initiated after"""
     import os
+
     import haystack.telemetry
+
     assert haystack.telemetry.telemetry is not None
     assert os.environ.get("HAYSTACK_TELEMETRY_ENABLED", "True") != "False"
 
-    import kotaemon     # noqa: F401
+    import kotaemon  # noqa: F401
+
     assert haystack.telemetry.telemetry is None
     assert os.environ.get("HAYSTACK_TELEMETRY_ENABLED", "True") == "False"
 
@@ -43,8 +46,9 @@ def test_disable_telemetry_import_haystack_after_kotaemon():
     """Test that telemetry is disabled when kotaemon lib is initiated before"""
     import os
 
-    import kotaemon     # noqa: F401
     import haystack.telemetry
+
+    import kotaemon  # noqa: F401
+
     assert haystack.telemetry.telemetry is None
     assert os.environ.get("HAYSTACK_TELEMETRY_ENABLED", "True") == "False"
-