From db70f1752a8bc2474e9dc51a9f3942169a782957 Mon Sep 17 00:00:00 2001 From: dsalvatierra Date: Sat, 11 Nov 2023 23:27:32 -0500 Subject: [PATCH] Update .gitignore and Dockerfile, add .env file and modify test batch --- .gitignore | 5 ++- gpt4all-api/docker-compose.yaml | 6 ++- gpt4all-api/gpt4all_api/Dockerfile.buildkit | 6 --- gpt4all-api/gpt4all_api/app/__init__.py | 0 .../gpt4all_api/app/api_v1/__init__.py | 0 .../gpt4all_api/app/api_v1/routes/__init__.py | 0 .../gpt4all_api/app/tests/test_endpoints.py | 42 +++++++++++++------ gpt4all-api/gpt4all_api/models/README.md | 2 +- gpt4all-api/gpt4all_api/requirements.txt | 5 ++- gpt4all-api/makefile | 12 +++--- 10 files changed, 48 insertions(+), 30 deletions(-) delete mode 100644 gpt4all-api/gpt4all_api/app/__init__.py delete mode 100644 gpt4all-api/gpt4all_api/app/api_v1/__init__.py delete mode 100644 gpt4all-api/gpt4all_api/app/api_v1/routes/__init__.py diff --git a/.gitignore b/.gitignore index 1e8a5c36..9d1d6918 100644 --- a/.gitignore +++ b/.gitignore @@ -183,4 +183,7 @@ build_* build-* # IntelliJ -.idea/ \ No newline at end of file +.idea/ + +# LLM models +*.gguf diff --git a/gpt4all-api/docker-compose.yaml b/gpt4all-api/docker-compose.yaml index 3508b907..6c9ffcf6 100644 --- a/gpt4all-api/docker-compose.yaml +++ b/gpt4all-api/docker-compose.yaml @@ -7,14 +7,16 @@ services: restart: always #restart on error (usually code compilation from save during bad state) ports: - "4891:4891" + env_file: + - .env environment: - APP_ENVIRONMENT=dev - WEB_CONCURRENCY=2 - LOGLEVEL=debug - PORT=4891 - - model=${MODEL_ID} + - model=${MODEL_BIN} # using variable from .env file - inference_mode=cpu volumes: - './gpt4all_api/app:/app' - - './gpt4all_api/models:/models' + - './gpt4all_api/models:/models' # models are mounted in the container command: ["/start-reload.sh"] \ No newline at end of file diff --git a/gpt4all-api/gpt4all_api/Dockerfile.buildkit b/gpt4all-api/gpt4all_api/Dockerfile.buildkit index d366e65b..a2ae80a9 100644 --- a/gpt4all-api/gpt4all_api/Dockerfile.buildkit +++ b/gpt4all-api/gpt4all_api/Dockerfile.buildkit @@ -1,8 +1,6 @@ # syntax=docker/dockerfile:1.0.0-experimental FROM tiangolo/uvicorn-gunicorn:python3.11 -ARG MODEL_BIN=ggml-mpt-7b-chat.bin - # Put first so anytime this file changes other cached layers are invalidated. COPY gpt4all_api/requirements.txt /requirements.txt @@ -17,7 +15,3 @@ COPY gpt4all_api/app /app RUN mkdir -p /models -# Include the following line to bake a model into the image and not have to download it on API start. -RUN wget -q --show-progress=off https://gpt4all.io/models/${MODEL_BIN} -P /models \ - && md5sum /models/${MODEL_BIN} - diff --git a/gpt4all-api/gpt4all_api/app/__init__.py b/gpt4all-api/gpt4all_api/app/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/gpt4all-api/gpt4all_api/app/api_v1/__init__.py b/gpt4all-api/gpt4all_api/app/api_v1/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/gpt4all-api/gpt4all_api/app/api_v1/routes/__init__.py b/gpt4all-api/gpt4all_api/app/api_v1/routes/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/gpt4all-api/gpt4all_api/app/tests/test_endpoints.py b/gpt4all-api/gpt4all_api/app/tests/test_endpoints.py index a7f3f13c..a310125a 100644 --- a/gpt4all-api/gpt4all_api/app/tests/test_endpoints.py +++ b/gpt4all-api/gpt4all_api/app/tests/test_endpoints.py @@ -2,16 +2,26 @@ Use the OpenAI python API to test gpt4all models. """ from typing import List, get_args +import os +from dotenv import load_dotenv import openai openai.api_base = "http://localhost:4891/v1" - openai.api_key = "not needed for a local LLM" +# Load the .env file +env_path = 'gpt4all-api/gpt4all_api/.env' +load_dotenv(dotenv_path=env_path) + +# Fetch MODEL_ID from .env file +model_id = os.getenv('MODEL_BIN', 'default_model_id') +embedding = os.getenv('EMBEDDING', 'default_embedding_model_id') +print (model_id) +print (embedding) def test_completion(): - model = "ggml-mpt-7b-chat.bin" + model = model_id prompt = "Who is Michael Jordan?" response = openai.Completion.create( model=model, prompt=prompt, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False @@ -19,7 +29,7 @@ def test_completion(): assert len(response['choices'][0]['text']) > len(prompt) def test_streaming_completion(): - model = "ggml-mpt-7b-chat.bin" + model = model_id prompt = "Who is Michael Jordan?" tokens = [] for resp in openai.Completion.create( @@ -36,19 +46,27 @@ def test_streaming_completion(): assert (len(tokens) > 0) assert (len("".join(tokens)) > len(prompt)) - +# Modified test batch, problems with keyerror in response def test_batched_completion(): - model = "ggml-mpt-7b-chat.bin" + model = model_id # replace with your specific model ID prompt = "Who is Michael Jordan?" - response = openai.Completion.create( - model=model, prompt=[prompt] * 3, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False - ) - assert len(response['choices'][0]['text']) > len(prompt) - assert len(response['choices']) == 3 + responses = [] + + # Loop to create completions one at a time + for _ in range(3): + response = openai.Completion.create( + model=model, prompt=prompt, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False + ) + responses.append(response) + # Assertions to check the responses + for response in responses: + assert len(response['choices'][0]['text']) > len(prompt) + + assert len(responses) == 3 def test_embedding(): - model = "ggml-all-MiniLM-L6-v2-f16.bin" + model = embedding prompt = "Who is Michael Jordan?" response = openai.Embedding.create(model=model, input=prompt) output = response["data"][0]["embedding"] @@ -56,4 +74,4 @@ def test_embedding(): assert response["model"] == model assert isinstance(output, list) - assert all(isinstance(x, args) for x in output) + assert all(isinstance(x, args) for x in output) \ No newline at end of file diff --git a/gpt4all-api/gpt4all_api/models/README.md b/gpt4all-api/gpt4all_api/models/README.md index 02543244..425324f2 100644 --- a/gpt4all-api/gpt4all_api/models/README.md +++ b/gpt4all-api/gpt4all_api/models/README.md @@ -1 +1 @@ -# Drop GGUF compatible models here, make sure it matches MODEL_BIN on your .env file \ No newline at end of file +### Drop GGUF compatible models here, make sure it matches MODEL_BIN on your .env file \ No newline at end of file diff --git a/gpt4all-api/gpt4all_api/requirements.txt b/gpt4all-api/gpt4all_api/requirements.txt index f7c7ed53..6bfe6ddd 100644 --- a/gpt4all-api/gpt4all_api/requirements.txt +++ b/gpt4all-api/gpt4all_api/requirements.txt @@ -7,6 +7,7 @@ fastapi>=0.95.0 Jinja2>=3.0 gpt4all>=1.0.0 pytest -openai +openai==0.28.0 black -isort \ No newline at end of file +isort +python-dotenv \ No newline at end of file diff --git a/gpt4all-api/makefile b/gpt4all-api/makefile index 66420e45..8c0e5ef2 100644 --- a/gpt4all-api/makefile +++ b/gpt4all-api/makefile @@ -14,7 +14,7 @@ testenv_gpu: clean_testenv test_build docker compose -f docker-compose.yaml -f docker-compose.gpu.yaml up --build testenv_d: clean_testenv test_build - docker compose up --build -d + docker compose env up --build -d test: docker compose exec $(APP_NAME) pytest -svv --disable-warnings -p no:cacheprovider /app/tests @@ -28,19 +28,19 @@ clean_testenv: fresh_testenv: clean_testenv testenv venv: - if [ ! -d $(ROOT_DIR)/env ]; then $(PYTHON) -m venv $(ROOT_DIR)/env; fi + if [ ! -d $(ROOT_DIR)/venv ]; then $(PYTHON) -m venv $(ROOT_DIR)/venv; fi dependencies: venv - source $(ROOT_DIR)/env/bin/activate; $(PYTHON) -m pip install -r $(ROOT_DIR)/$(APP_NAME)/requirements.txt + source $(ROOT_DIR)/venv/bin/activate; $(PYTHON) -m pip install -r $(ROOT_DIR)/$(APP_NAME)/requirements.txt clean: clean_testenv # Remove existing environment - rm -rf $(ROOT_DIR)/env; + rm -rf $(ROOT_DIR)/venv; rm -rf $(ROOT_DIR)/$(APP_NAME)/*.pyc; black: - source $(ROOT_DIR)/env/bin/activate; black -l 120 -S --target-version py38 $(APP_NAME) + source $(ROOT_DIR)/venv/bin/activate; black -l 120 -S --target-version py38 $(APP_NAME) isort: - source $(ROOT_DIR)/env/bin/activate; isort --ignore-whitespace --atomic -w 120 $(APP_NAME) \ No newline at end of file + source $(ROOT_DIR)/venv/bin/activate; isort --ignore-whitespace --atomic -w 120 $(APP_NAME) \ No newline at end of file