Feat/local endpoint llm (#148)

* serve local model in a different process from the app --------- Co-authored-by: albert <albert@cinnamon.is> Co-authored-by: trducng <trungduc1992@gmail.com>
2024-03-15 16:17:33 +07:00
parent 2950e6ed02
commit df12dec732
20 changed files with 675 additions and 79 deletions
--- a/scripts/serve_local.py
+++ b/scripts/serve_local.py
@@ -0,0 +1,81 @@
+import platform
+import subprocess
+from inspect import currentframe, getframeinfo
+from pathlib import Path
+
+import dotenv
+
+configs = dotenv.dotenv_values(".env")
+
+system_name = platform.system()
+
+cur_frame = currentframe()
+if cur_frame is None:
+    raise ValueError("Cannot get the current frame.")
+this_file = getframeinfo(cur_frame).filename
+this_dir = Path(this_file).parent
+
+
+def serve_llamacpp_python(local_model_file: Path, **kwargs):
+    def guess_chat_format(local_model_file):
+        model_name = local_model_file.stem
+
+        # handle known cases that the server backends handle incorrectly
+        # this is highly heuristic, should be expand later
+        # server backends usually has logic for this but they could still be wrong
+        if "qwen" in model_name:
+            return "qwen"
+
+        return None
+
+    # default port
+    if "port" not in kwargs:
+        kwargs["port"] = 31415
+
+    chat_format = guess_chat_format(local_model_file)
+    if chat_format:
+        kwargs = {**kwargs, "chat_format": chat_format}
+
+    # these scripts create a separate conda env and run the server
+    if system_name == "Windows":
+        script_file = this_dir / "server_llamacpp_windows.bat"
+    elif system_name == "Linux":
+        script_file = this_dir / "server_llamacpp_linux.sh"
+    elif system_name == "Darwin":
+        script_file = this_dir / "server_llamacpp_macos.sh"
+    else:
+        raise ValueError(f"Unsupported system: {system_name}")
+
+    args = " ".join(f"--{k} {v}" for k, v in kwargs.items())
+
+    cmd = f"{script_file} --model {local_model_file} {args}"
+    subprocess.Popen(cmd, shell=True)
+
+
+def main():
+    local_model_file = configs.get("LOCAL_MODEL", "")
+
+    if not local_model_file:
+        print("LOCAL_MODEL not set in the `.env` file.")
+        return
+
+    local_model_file = Path(local_model_file)
+    if not local_model_file.exists():
+        print(f"Local model not found: {local_model_file}")
+        return
+
+    print(f"Local model found: {local_model_file}")
+    will_start_server = input("Do you want to use this local model ? (y/n): ")
+
+    if will_start_server.lower().strip() not in ["y", "yes"]:
+        return
+
+    print("Starting the local server...")
+    if local_model_file.suffix == ".gguf":
+        serve_llamacpp_python(local_model_file)
+    else:
+        raise ValueError(f"Unsupported model file type: {local_model_file.suffix}")
+
+
+if __name__ == "__main__":
+    main()