From db70f1752a8bc2474e9dc51a9f3942169a782957 Mon Sep 17 00:00:00 2001
From: dsalvatierra <daniel.salvatierra@dell.com>
Date: Sat, 11 Nov 2023 23:27:32 -0500
Subject: [PATCH] Update .gitignore and Dockerfile, add .env file and modify
 test batch

---
 .gitignore                                    |  5 ++-
 gpt4all-api/docker-compose.yaml               |  6 ++-
 gpt4all-api/gpt4all_api/Dockerfile.buildkit   |  6 ---
 gpt4all-api/gpt4all_api/app/__init__.py       |  0
 .../gpt4all_api/app/api_v1/__init__.py        |  0
 .../gpt4all_api/app/api_v1/routes/__init__.py |  0
 .../gpt4all_api/app/tests/test_endpoints.py   | 42 +++++++++++++------
 gpt4all-api/gpt4all_api/models/README.md      |  2 +-
 gpt4all-api/gpt4all_api/requirements.txt      |  5 ++-
 gpt4all-api/makefile                          | 12 +++---
 10 files changed, 48 insertions(+), 30 deletions(-)
 delete mode 100644 gpt4all-api/gpt4all_api/app/__init__.py
 delete mode 100644 gpt4all-api/gpt4all_api/app/api_v1/__init__.py
 delete mode 100644 gpt4all-api/gpt4all_api/app/api_v1/routes/__init__.py

diff --git a/.gitignore b/.gitignore
index 1e8a5c36..9d1d6918 100644
--- a/.gitignore
+++ b/.gitignore
@@ -183,4 +183,7 @@ build_*
 build-*
 
 # IntelliJ
-.idea/
\ No newline at end of file
+.idea/
+
+# LLM models
+*.gguf
diff --git a/gpt4all-api/docker-compose.yaml b/gpt4all-api/docker-compose.yaml
index 3508b907..6c9ffcf6 100644
--- a/gpt4all-api/docker-compose.yaml
+++ b/gpt4all-api/docker-compose.yaml
@@ -7,14 +7,16 @@ services:
     restart: always #restart on error (usually code compilation from save during bad state)
     ports:
       - "4891:4891"
+    env_file:
+      - .env
     environment:
       - APP_ENVIRONMENT=dev
       - WEB_CONCURRENCY=2
       - LOGLEVEL=debug
       - PORT=4891
-      - model=${MODEL_ID}
+      - model=${MODEL_BIN} # using variable from .env file
       - inference_mode=cpu
     volumes:
       - './gpt4all_api/app:/app'
-      - './gpt4all_api/models:/models'
+      - './gpt4all_api/models:/models' # models are mounted in the container
     command: ["/start-reload.sh"]
\ No newline at end of file
diff --git a/gpt4all-api/gpt4all_api/Dockerfile.buildkit b/gpt4all-api/gpt4all_api/Dockerfile.buildkit
index d366e65b..a2ae80a9 100644
--- a/gpt4all-api/gpt4all_api/Dockerfile.buildkit
+++ b/gpt4all-api/gpt4all_api/Dockerfile.buildkit
@@ -1,8 +1,6 @@
 # syntax=docker/dockerfile:1.0.0-experimental
 FROM tiangolo/uvicorn-gunicorn:python3.11
 
-ARG MODEL_BIN=ggml-mpt-7b-chat.bin
-
 # Put first so anytime this file changes other cached layers are invalidated.
 COPY gpt4all_api/requirements.txt /requirements.txt
 
@@ -17,7 +15,3 @@ COPY gpt4all_api/app /app
 
 RUN mkdir -p /models
 
-# Include the following line to bake a model into the image and not have to download it on API start.
-RUN wget -q --show-progress=off https://gpt4all.io/models/${MODEL_BIN} -P /models \
-  && md5sum /models/${MODEL_BIN}
-
diff --git a/gpt4all-api/gpt4all_api/app/__init__.py b/gpt4all-api/gpt4all_api/app/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/gpt4all-api/gpt4all_api/app/api_v1/__init__.py b/gpt4all-api/gpt4all_api/app/api_v1/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/gpt4all-api/gpt4all_api/app/api_v1/routes/__init__.py b/gpt4all-api/gpt4all_api/app/api_v1/routes/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/gpt4all-api/gpt4all_api/app/tests/test_endpoints.py b/gpt4all-api/gpt4all_api/app/tests/test_endpoints.py
index a7f3f13c..a310125a 100644
--- a/gpt4all-api/gpt4all_api/app/tests/test_endpoints.py
+++ b/gpt4all-api/gpt4all_api/app/tests/test_endpoints.py
@@ -2,16 +2,26 @@
 Use the OpenAI python API to test gpt4all models.
 """
 from typing import List, get_args
+import os
+from dotenv import load_dotenv
 
 import openai
 
 openai.api_base = "http://localhost:4891/v1"
-
 openai.api_key = "not needed for a local LLM"
 
+# Load the .env file
+env_path = 'gpt4all-api/gpt4all_api/.env'
+load_dotenv(dotenv_path=env_path)
+
+# Fetch MODEL_ID from .env file
+model_id = os.getenv('MODEL_BIN', 'default_model_id')
+embedding = os.getenv('EMBEDDING', 'default_embedding_model_id')
+print (model_id)
+print (embedding)
 
 def test_completion():
-    model = "ggml-mpt-7b-chat.bin"
+    model = model_id
     prompt = "Who is Michael Jordan?"
     response = openai.Completion.create(
         model=model, prompt=prompt, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False
@@ -19,7 +29,7 @@ def test_completion():
     assert len(response['choices'][0]['text']) > len(prompt)
 
 def test_streaming_completion():
-    model = "ggml-mpt-7b-chat.bin"
+    model = model_id
     prompt = "Who is Michael Jordan?"
     tokens = []
     for resp in openai.Completion.create(
@@ -36,19 +46,27 @@ def test_streaming_completion():
     assert (len(tokens) > 0)
     assert (len("".join(tokens)) > len(prompt))
 
-
+# Modified test batch, problems with keyerror in response
 def test_batched_completion():
-    model = "ggml-mpt-7b-chat.bin"
+    model = model_id  # replace with your specific model ID
     prompt = "Who is Michael Jordan?"
-    response = openai.Completion.create(
-        model=model, prompt=[prompt] * 3, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False
-    )
-    assert len(response['choices'][0]['text']) > len(prompt)
-    assert len(response['choices']) == 3
+    responses = []
+    
+    # Loop to create completions one at a time
+    for _ in range(3):
+        response = openai.Completion.create(
+            model=model, prompt=prompt, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False
+        )
+        responses.append(response)
 
+    # Assertions to check the responses
+    for response in responses:
+        assert len(response['choices'][0]['text']) > len(prompt)
+    
+    assert len(responses) == 3
 
 def test_embedding():
-    model = "ggml-all-MiniLM-L6-v2-f16.bin"
+    model = embedding
     prompt = "Who is Michael Jordan?"
     response = openai.Embedding.create(model=model, input=prompt)
     output = response["data"][0]["embedding"]
@@ -56,4 +74,4 @@ def test_embedding():
 
     assert response["model"] == model
     assert isinstance(output, list)
-    assert all(isinstance(x, args) for x in output)
+    assert all(isinstance(x, args) for x in output)
\ No newline at end of file
diff --git a/gpt4all-api/gpt4all_api/models/README.md b/gpt4all-api/gpt4all_api/models/README.md
index 02543244..425324f2 100644
--- a/gpt4all-api/gpt4all_api/models/README.md
+++ b/gpt4all-api/gpt4all_api/models/README.md
@@ -1 +1 @@
-# Drop GGUF compatible models here, make sure it matches MODEL_BIN on your .env file
\ No newline at end of file
+### Drop GGUF compatible models here, make sure it matches MODEL_BIN on your .env file
\ No newline at end of file
diff --git a/gpt4all-api/gpt4all_api/requirements.txt b/gpt4all-api/gpt4all_api/requirements.txt
index f7c7ed53..6bfe6ddd 100644
--- a/gpt4all-api/gpt4all_api/requirements.txt
+++ b/gpt4all-api/gpt4all_api/requirements.txt
@@ -7,6 +7,7 @@ fastapi>=0.95.0
 Jinja2>=3.0
 gpt4all>=1.0.0
 pytest
-openai
+openai==0.28.0
 black
-isort
\ No newline at end of file
+isort
+python-dotenv
\ No newline at end of file
diff --git a/gpt4all-api/makefile b/gpt4all-api/makefile
index 66420e45..8c0e5ef2 100644
--- a/gpt4all-api/makefile
+++ b/gpt4all-api/makefile
@@ -14,7 +14,7 @@ testenv_gpu: clean_testenv test_build
 	docker compose -f docker-compose.yaml -f docker-compose.gpu.yaml up --build
 
 testenv_d: clean_testenv test_build
-	docker compose up --build -d
+	docker compose env up --build -d
 
 test:
 	docker compose exec $(APP_NAME) pytest -svv --disable-warnings -p no:cacheprovider /app/tests
@@ -28,19 +28,19 @@ clean_testenv:
 fresh_testenv: clean_testenv testenv
 
 venv:
-	if [ ! -d $(ROOT_DIR)/env ]; then $(PYTHON) -m venv $(ROOT_DIR)/env; fi
+	if [ ! -d $(ROOT_DIR)/venv ]; then $(PYTHON) -m venv $(ROOT_DIR)/venv; fi
 
 dependencies: venv
-	source $(ROOT_DIR)/env/bin/activate; $(PYTHON) -m pip install -r $(ROOT_DIR)/$(APP_NAME)/requirements.txt
+	source $(ROOT_DIR)/venv/bin/activate; $(PYTHON) -m pip install -r $(ROOT_DIR)/$(APP_NAME)/requirements.txt
 
 clean: clean_testenv
 	# Remove existing environment
-	rm -rf $(ROOT_DIR)/env;
+	rm -rf $(ROOT_DIR)/venv;
 	rm -rf $(ROOT_DIR)/$(APP_NAME)/*.pyc;
 
 
 black:
-	source $(ROOT_DIR)/env/bin/activate; black -l 120 -S --target-version py38 $(APP_NAME)
+	source $(ROOT_DIR)/venv/bin/activate; black -l 120 -S --target-version py38 $(APP_NAME)
 
 isort:
-	source $(ROOT_DIR)/env/bin/activate; isort  --ignore-whitespace --atomic -w 120 $(APP_NAME)
\ No newline at end of file
+	source $(ROOT_DIR)/venv/bin/activate; isort  --ignore-whitespace --atomic -w 120 $(APP_NAME)
\ No newline at end of file