You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
talk-codebase/talk_codebase/consts.py

62 lines
1.7 KiB
Python

import os
from pathlib import Path
from langchain.document_loaders import CSVLoader, UnstructuredWordDocumentLoader, UnstructuredEPubLoader, \
PDFMinerLoader, UnstructuredMarkdownLoader, TextLoader
EXCLUDE_DIRS = ['__pycache__', '.venv', '.git', '.idea', 'venv', 'env', 'node_modules', 'dist', 'build', '.vscode',
'.github', '.gitlab']
ALLOW_FILES = ['.txt', '.js', '.mjs', '.ts', '.tsx', '.css', '.scss', '.less', '.html', '.htm', '.json', '.py',
'.java', '.c', '.cpp', '.cs', '.go', '.php', '.rb', '.rs', '.swift', '.kt', '.scala', '.m', '.h',
'.sh', '.pl', '.pm', '.lua', '.sql']
EXCLUDE_FILES = ['requirements.txt', 'package.json', 'package-lock.json', 'yarn.lock']
MODEL_TYPES = {
"OPENAI": "openai",
"LOCAL": "local",
}
DEFAULT_MODEL_DIRECTORY = os.path.join(str(Path.home()), ".cache", "gpt4all").replace("\\", "\\\\")
DEFAULT_CONFIG = {
"max_tokens": "2056",
"chunk_size": "2056",
"chunk_overlap": "256",
"k": "2",
"temperature": "0.7",
"model_path": DEFAULT_MODEL_DIRECTORY,
"n_batch": "8",
}
LOADER_MAPPING = {
".csv": {
"loader": CSVLoader,
"args": {}
},
".doc": {
"loader": UnstructuredWordDocumentLoader,
"args": {}
},
".docx": {
"loader": UnstructuredWordDocumentLoader,
"args": {}
},
".epub": {
"loader": UnstructuredEPubLoader,
"args": {}
},
".md": {
"loader": UnstructuredMarkdownLoader,
"args": {}
},
".pdf": {
"loader": PDFMinerLoader,
"args": {}
}
}
for ext in ALLOW_FILES:
if ext not in LOADER_MAPPING:
LOADER_MAPPING[ext] = {
"loader": TextLoader,
"args": {}
}