Update .gitignore and Dockerfile, add .env file

and modify test batch
manyoso-patch-4
dsalvatierra 6 months ago committed by AT
parent f3eaa33ce7
commit db70f1752a

5
.gitignore vendored

@ -183,4 +183,7 @@ build_*
build-*
# IntelliJ
.idea/
.idea/
# LLM models
*.gguf

@ -7,14 +7,16 @@ services:
restart: always #restart on error (usually code compilation from save during bad state)
ports:
- "4891:4891"
env_file:
- .env
environment:
- APP_ENVIRONMENT=dev
- WEB_CONCURRENCY=2
- LOGLEVEL=debug
- PORT=4891
- model=${MODEL_ID}
- model=${MODEL_BIN} # using variable from .env file
- inference_mode=cpu
volumes:
- './gpt4all_api/app:/app'
- './gpt4all_api/models:/models'
- './gpt4all_api/models:/models' # models are mounted in the container
command: ["/start-reload.sh"]

@ -1,8 +1,6 @@
# syntax=docker/dockerfile:1.0.0-experimental
FROM tiangolo/uvicorn-gunicorn:python3.11
ARG MODEL_BIN=ggml-mpt-7b-chat.bin
# Put first so anytime this file changes other cached layers are invalidated.
COPY gpt4all_api/requirements.txt /requirements.txt
@ -17,7 +15,3 @@ COPY gpt4all_api/app /app
RUN mkdir -p /models
# Include the following line to bake a model into the image and not have to download it on API start.
RUN wget -q --show-progress=off https://gpt4all.io/models/${MODEL_BIN} -P /models \
&& md5sum /models/${MODEL_BIN}

@ -2,16 +2,26 @@
Use the OpenAI python API to test gpt4all models.
"""
from typing import List, get_args
import os
from dotenv import load_dotenv
import openai
openai.api_base = "http://localhost:4891/v1"
openai.api_key = "not needed for a local LLM"
# Load the .env file
env_path = 'gpt4all-api/gpt4all_api/.env'
load_dotenv(dotenv_path=env_path)
# Fetch MODEL_ID from .env file
model_id = os.getenv('MODEL_BIN', 'default_model_id')
embedding = os.getenv('EMBEDDING', 'default_embedding_model_id')
print (model_id)
print (embedding)
def test_completion():
model = "ggml-mpt-7b-chat.bin"
model = model_id
prompt = "Who is Michael Jordan?"
response = openai.Completion.create(
model=model, prompt=prompt, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False
@ -19,7 +29,7 @@ def test_completion():
assert len(response['choices'][0]['text']) > len(prompt)
def test_streaming_completion():
model = "ggml-mpt-7b-chat.bin"
model = model_id
prompt = "Who is Michael Jordan?"
tokens = []
for resp in openai.Completion.create(
@ -36,19 +46,27 @@ def test_streaming_completion():
assert (len(tokens) > 0)
assert (len("".join(tokens)) > len(prompt))
# Modified test batch, problems with keyerror in response
def test_batched_completion():
model = "ggml-mpt-7b-chat.bin"
model = model_id # replace with your specific model ID
prompt = "Who is Michael Jordan?"
response = openai.Completion.create(
model=model, prompt=[prompt] * 3, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False
)
assert len(response['choices'][0]['text']) > len(prompt)
assert len(response['choices']) == 3
responses = []
# Loop to create completions one at a time
for _ in range(3):
response = openai.Completion.create(
model=model, prompt=prompt, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False
)
responses.append(response)
# Assertions to check the responses
for response in responses:
assert len(response['choices'][0]['text']) > len(prompt)
assert len(responses) == 3
def test_embedding():
model = "ggml-all-MiniLM-L6-v2-f16.bin"
model = embedding
prompt = "Who is Michael Jordan?"
response = openai.Embedding.create(model=model, input=prompt)
output = response["data"][0]["embedding"]
@ -56,4 +74,4 @@ def test_embedding():
assert response["model"] == model
assert isinstance(output, list)
assert all(isinstance(x, args) for x in output)
assert all(isinstance(x, args) for x in output)

@ -1 +1 @@
# Drop GGUF compatible models here, make sure it matches MODEL_BIN on your .env file
### Drop GGUF compatible models here, make sure it matches MODEL_BIN on your .env file

@ -7,6 +7,7 @@ fastapi>=0.95.0
Jinja2>=3.0
gpt4all>=1.0.0
pytest
openai
openai==0.28.0
black
isort
isort
python-dotenv

@ -14,7 +14,7 @@ testenv_gpu: clean_testenv test_build
docker compose -f docker-compose.yaml -f docker-compose.gpu.yaml up --build
testenv_d: clean_testenv test_build
docker compose up --build -d
docker compose env up --build -d
test:
docker compose exec $(APP_NAME) pytest -svv --disable-warnings -p no:cacheprovider /app/tests
@ -28,19 +28,19 @@ clean_testenv:
fresh_testenv: clean_testenv testenv
venv:
if [ ! -d $(ROOT_DIR)/env ]; then $(PYTHON) -m venv $(ROOT_DIR)/env; fi
if [ ! -d $(ROOT_DIR)/venv ]; then $(PYTHON) -m venv $(ROOT_DIR)/venv; fi
dependencies: venv
source $(ROOT_DIR)/env/bin/activate; $(PYTHON) -m pip install -r $(ROOT_DIR)/$(APP_NAME)/requirements.txt
source $(ROOT_DIR)/venv/bin/activate; $(PYTHON) -m pip install -r $(ROOT_DIR)/$(APP_NAME)/requirements.txt
clean: clean_testenv
# Remove existing environment
rm -rf $(ROOT_DIR)/env;
rm -rf $(ROOT_DIR)/venv;
rm -rf $(ROOT_DIR)/$(APP_NAME)/*.pyc;
black:
source $(ROOT_DIR)/env/bin/activate; black -l 120 -S --target-version py38 $(APP_NAME)
source $(ROOT_DIR)/venv/bin/activate; black -l 120 -S --target-version py38 $(APP_NAME)
isort:
source $(ROOT_DIR)/env/bin/activate; isort --ignore-whitespace --atomic -w 120 $(APP_NAME)
source $(ROOT_DIR)/venv/bin/activate; isort --ignore-whitespace --atomic -w 120 $(APP_NAME)
Loading…
Cancel
Save