Add CLI for chatting with OpenAI model

- Add CLI functionality for chatting with OpenAI model
- Implement  function to allow users to input OpenAI API key and model name
- Implement  function to allow users to chat with OpenAI model using retrieved documents
- Add  module to handle sending questions to OpenAI model
- Add  module to load and split text documents, create retriever, and define StreamStdOut callback class
pull/1/head
Saryev Rustam 1 year ago
commit 226203e4d9

4
.gitignore vendored

@ -0,0 +1,4 @@
/.env
/.idea/
/.vscode/
/.venv/

@ -0,0 +1,29 @@
# talk-codebase is a powerful tool for chatting with your codebase
<p align="center">
<img src="https://github.com/rsaryev/talk-codebase/assets/70219513/b0cb4d00-94b6-407e-8545-92e79d442d89" width="800" alt="chat">
</p>
## Description
In the chat, you can ask questions about the codebase. AI will answer your questions, and if necessary, it will offer code improvements. This is very convenient when you want to quickly find something in the codebase, but don't want to waste time searching. It is also convenient when you want to improve a specific function, you can ask "How can I improve the function {function name}?" and AI will suggest improvements. Codebase is analyzed using openai.
## Installation
```bash
pip install talk-codebase
```
## Usage
talk-codebase works only with files of popular programming languages and additionally with .txt files. All other files will be ignored.
```bash
# Start chatting with your codebase
talk-codebase chat <directory>
# Configure
talk-codebase configure
# Help
talk-codebase --help
```

1316
poetry.lock generated

File diff suppressed because it is too large Load Diff

@ -0,0 +1,30 @@
[tool.poetry]
name = "talk-codebase"
version = "0.1.1"
description = "talk-codebase is a powerful tool for querying and analyzing codebases."
authors = ["Saryev Rustam <rustam1997@gmail.com>"]
readme = "README.md"
packages = [{include = "talk_codebase"}]
keywords = ["chatgpt", "openai", "cli"]
[tool.poetry.dependencies]
python = "^3.9"
langchain = "^0.0.180"
fire = "^0.5.0"
openai = "^0.27.7"
tiktoken = "^0.4.0"
faiss-cpu = "^1.7.4"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[project.urls]
"Source" = "https://github.com/rsaryev/talk-codebase"
"Bug Tracker" = "https://github.com/rsaryev/talk-codebase/issues"
[tool.poetry.scripts]
talk-codebase = "talk_codebase.cli:main"

@ -0,0 +1,53 @@
aiohttp==3.8.4
aiosignal==1.3.1
async-timeout==4.0.2
attrs==23.1.0
bleach==6.0.0
certifi==2023.5.7
charset-normalizer==3.1.0
dataclasses-json==0.5.7
docutils==0.20.1
faiss-cpu==1.7.4
fire==0.5.0
frozenlist==1.3.3
idna==3.4
importlib-metadata==6.6.0
jaraco.classes==3.2.3
keyring==23.13.1
langchain==0.0.180
markdown-it-py==2.2.0
marshmallow==3.19.0
marshmallow-enum==1.5.1
mdurl==0.1.2
more-itertools==9.1.0
multidict==6.0.4
mypy-extensions==1.0.0
numexpr==2.8.4
numpy==1.24.3
openai==0.27.7
openapi-schema-pydantic==1.2.4
packaging==23.1
pkginfo==1.9.6
pydantic==1.10.8
Pygments==2.15.1
PyYAML==6.0
readme-renderer==37.3
regex==2023.5.5
requests==2.31.0
requests-toolbelt==1.0.0
rfc3986==2.0.0
rich==13.3.5
six==1.16.0
SQLAlchemy==2.0.15
talk-codebase==0.1.0
tenacity==8.2.2
termcolor==2.3.0
tiktoken==0.4.0
tqdm==4.65.0
twine==4.0.2
typing-inspect==0.9.0
typing_extensions==4.6.2
urllib3==2.0.2
webencodings==0.5.1
yarl==1.9.2
zipp==3.15.0

@ -0,0 +1,71 @@
import os
import fire
import yaml
from talk_codebase.utils import create_retriever
from talk_codebase.llm import send_question
def get_config():
home_dir = os.path.expanduser("~")
config_path = os.path.join(home_dir, ".config.yaml")
if os.path.exists(config_path):
with open(config_path, "r") as f:
config = yaml.safe_load(f)
else:
config = {}
return config
def save_config(config):
home_dir = os.path.expanduser("~")
config_path = os.path.join(home_dir, ".config.yaml")
with open(config_path, "w") as f:
yaml.dump(config, f)
def configure():
config = get_config()
api_key = input("🤖 Enter your OpenAI API key: ")
model_name = input("🤖 Enter your model name (default: gpt-3.5-turbo): ") or "gpt-3.5-turbo"
config["api_key"] = api_key
config["model_name"] = model_name
save_config(config)
def chat(root_dir):
try:
config = get_config()
api_key = config.get("api_key")
model_name = config.get("model_name")
if not (api_key and model_name):
configure()
chat(root_dir)
retriever = create_retriever(root_dir, api_key)
while True:
question = input("👉 ")
if not question:
print("🤖 Please enter a question.")
continue
if question.lower() in ('exit', 'quit'):
break
send_question(question, retriever, api_key, model_name)
except KeyboardInterrupt:
print("\n🤖 Bye!")
except Exception as e:
if str(e) == "<empty message>":
print("🤖 Please configure your API key.")
configure()
chat(root_dir)
else:
print(f"🤖 Error: {e}")
def main():
fire.Fire({
"chat": chat,
"configure": configure,
})
if __name__ == "__main__":
main()

@ -0,0 +1,6 @@
EXCLUDE_DIRS = ['__pycache__', '.venv', '.git', '.idea', 'venv', 'env', 'node_modules', 'dist', 'build', '.vscode',
'.github', '.gitlab']
ALLOW_FILES = ['.txt', '.js', '.mjs', '.ts', '.tsx', '.css', '.scss', '.less', '.html', '.htm', '.json', '.py',
'.java', '.c', '.cpp', '.cs', '.go', '.php', '.rb', '.rs', '.swift', '.kt', '.scala', '.m', '.h',
'.sh', '.pl', '.pm', '.lua', '.sql']
EXCLUDE_FILES = ['requirements.txt', 'package.json', 'package-lock.json', 'yarn.lock']

@ -0,0 +1,12 @@
from langchain.callbacks.manager import CallbackManager
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from talk_codebase.utils import StreamStdOut
def send_question(question, retriever, openai_api_key, model_name):
model = ChatOpenAI(model_name=model_name, openai_api_key=openai_api_key, streaming=True,
callback_manager=CallbackManager([StreamStdOut()]))
qa = ConversationalRetrievalChain.from_llm(model, retriever=retriever)
answer = qa({"question": question, "chat_history": []})
return answer

@ -0,0 +1,54 @@
import os
import sys
from langchain import FAISS
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.document_loaders import TextLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from talk_codebase.consts import EXCLUDE_DIRS, EXCLUDE_FILES, ALLOW_FILES
class StreamStdOut(StreamingStdOutCallbackHandler):
def on_llm_new_token(self, token: str, **kwargs) -> None:
sys.stdout.write(token)
sys.stdout.flush()
def on_llm_start(self, serialized, prompts, **kwargs):
sys.stdout.write("🤖 ")
def on_llm_end(self, response, **kwargs):
sys.stdout.write("\n")
sys.stdout.flush()
def load_files(root_dir):
docs = []
for dirpath, dirnames, filenames in os.walk(root_dir):
if any(exclude_dir in dirpath for exclude_dir in EXCLUDE_DIRS):
continue
if not filenames:
continue
for file in filenames:
if any(file.endswith(allow_file) for allow_file in ALLOW_FILES) and not any(
file == exclude_file for exclude_file in EXCLUDE_FILES):
try:
loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')
docs.extend(loader.load_and_split())
except Exception as e:
print(f"Error loading file {file}: {e}")
print(f"🤖 Loaded {len(docs)} documents")
return docs
def create_retriever(root_dir, openai_api_key):
docs = load_files(root_dir)
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(docs)
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
db = FAISS.from_documents(texts, embeddings)
retriever = db.as_retriever()
return retriever
Loading…
Cancel
Save