mirror of https://github.com/sean1832/GPT-Brain
commit
b54071fe75
@ -0,0 +1,3 @@
|
||||
from GPT import query
|
||||
from GPT import toolkit
|
||||
from GPT import model
|
@ -1,73 +0,0 @@
|
||||
import openai
|
||||
import numpy as np
|
||||
import textwrap
|
||||
import utilities
|
||||
|
||||
openai.api_key = utilities.open_file(r'.user\API-KEYS.txt').strip()
|
||||
BRAIN_DATA = utilities.read_json_file(r'.user\brain-data.json')
|
||||
|
||||
# this function compare similarity between two vectors.
|
||||
# The higher value the dot product have, the more alike between these vectors
|
||||
def similarity(v1, v2):
|
||||
return np.dot(v1, v2)
|
||||
|
||||
def search_chunks(text, data, count=1):
|
||||
vector = utilities.embedding(text)
|
||||
points = []
|
||||
|
||||
for item in data:
|
||||
# compare search terms with brain-data
|
||||
point = similarity(vector, item['vector'])
|
||||
points.append({
|
||||
'content': item['content'],
|
||||
'point': point
|
||||
})
|
||||
# sort points base on decendent order
|
||||
ordered = sorted(points, key=lambda d: d['point'], reverse=True)
|
||||
|
||||
return ordered[0:count]
|
||||
|
||||
def gpt3(prompt, model='text-davinci-003'):
|
||||
response = openai.Completion.create(
|
||||
model= model,
|
||||
prompt=prompt,
|
||||
temperature=0.1,
|
||||
max_tokens=1000,
|
||||
top_p=1,
|
||||
frequency_penalty=0,
|
||||
presence_penalty=0
|
||||
)
|
||||
text = response['choices'][0]['text'].strip()
|
||||
return text
|
||||
|
||||
def main():
|
||||
while True:
|
||||
|
||||
query = input('\n\nAsk brain: ')
|
||||
results = search_chunks(query, BRAIN_DATA)
|
||||
answers = []
|
||||
answers_count = 0
|
||||
for result in results:
|
||||
my_info = utilities.open_file(r'prompt\my-info.txt')
|
||||
|
||||
prompt = utilities.open_file(r'prompt\question.txt')
|
||||
prompt = prompt.replace('<<INFO>>', result['content'])
|
||||
prompt = prompt.replace('<<QS>>', query)
|
||||
prompt = prompt.replace('<<MY-INFO>>', my_info)
|
||||
|
||||
answer = gpt3(prompt, model='text-davinci-003')
|
||||
answers.append(answer)
|
||||
answers_count += 1
|
||||
|
||||
all_answers = '\n\n'.join(answers)
|
||||
print('\n\n============ANSWER============\n\n', all_answers)
|
||||
|
||||
chunks = textwrap.wrap(all_answers, 10000)
|
||||
end = []
|
||||
for chunk in chunks:
|
||||
prompt = utilities.open_file(r'prompt\summarize.txt').replace('<<SUM>>', chunk)
|
||||
summary = gpt3(prompt, model='text-curie-001')
|
||||
end.append(summary)
|
||||
print('\n\n============SUMMRY============\n\n', '\n\n'.join(end))
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,25 +0,0 @@
|
||||
import openai
|
||||
import textwrap
|
||||
import utilities
|
||||
|
||||
|
||||
openai.api_key = utilities.open_file(r'.user\API-KEYS.txt').strip()
|
||||
|
||||
def main():
|
||||
all_text = utilities.open_file(r'.user\input.txt')
|
||||
|
||||
# split text into smaller chunk of 4000 char each
|
||||
chunks = textwrap.wrap(all_text, 4000)
|
||||
|
||||
result = []
|
||||
|
||||
for chunk in chunks:
|
||||
embedding = utilities.embedding(chunk.encode(encoding='ASCII', errors='ignore').decode())
|
||||
info = {'content':chunk, 'vector':embedding}
|
||||
print(info, '\n\n\n')
|
||||
result.append(info)
|
||||
|
||||
utilities.write_json_file(result, r'.user\brain-data.json')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,44 +0,0 @@
|
||||
import os
|
||||
import time
|
||||
import utilities
|
||||
|
||||
file_path = r'.user\input.txt'
|
||||
temp_file = r'.user\input_last-run.temp'
|
||||
sig_file = r'.user\input_sig.temp'
|
||||
|
||||
def compare_time(t1, t2):
|
||||
return t1 == t2
|
||||
|
||||
def write_sig(bool):
|
||||
utilities.write_file(bool, sig_file)
|
||||
|
||||
def check():
|
||||
if os.path.exists(file_path):
|
||||
# get modification time of the file
|
||||
mod_time = os.path.getmtime(file_path)
|
||||
|
||||
# convert the modification time to readable format
|
||||
read_mod_time = time.ctime(mod_time)
|
||||
|
||||
if os.path.exists(temp_file):
|
||||
temp_info = utilities.open_file(temp_file)
|
||||
if compare_time(read_mod_time, temp_info):
|
||||
write_sig('not updated')
|
||||
print('File has not been updated.')
|
||||
else:
|
||||
print('File has been updated.')
|
||||
utilities.write_file(read_mod_time, temp_file)
|
||||
write_sig('updated')
|
||||
else:
|
||||
print('Temp file not exist, writing temp file...')
|
||||
# write to temp file
|
||||
utilities.write_file(read_mod_time, temp_file)
|
||||
write_sig('not updated')
|
||||
else:
|
||||
raise FileNotFoundError(f'File: {file_path} does not exist.')
|
||||
|
||||
def main():
|
||||
check()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,27 +0,0 @@
|
||||
@echo off
|
||||
cd..
|
||||
echo Activating Virtural environment...
|
||||
call .\venv\Scripts\activate
|
||||
|
||||
rem checking if input.txt is updated
|
||||
python console_app\check_update.py
|
||||
|
||||
setlocal enabledelayedexpansion
|
||||
set "tempFile=.user\input_sig.temp"
|
||||
|
||||
for /f "usebackq delims=" %%a in ("%tempFile%") do (
|
||||
set "tempValue=%%a"
|
||||
)
|
||||
|
||||
if "%tempValue%" == "not updated" (
|
||||
goto end
|
||||
) else (
|
||||
call batch-programs\run-build-brain.bat
|
||||
cls
|
||||
echo Brain updated!
|
||||
)
|
||||
|
||||
|
||||
:end
|
||||
echo running brain...
|
||||
python console_app\brain.py
|
@ -1,24 +0,0 @@
|
||||
import json
|
||||
import openai
|
||||
|
||||
def open_file(filepath):
|
||||
with open(filepath, 'r', encoding='utf-8') as file:
|
||||
return file.read()
|
||||
|
||||
def write_file(content, filepath):
|
||||
with open(filepath, 'w') as file:
|
||||
file.write(content)
|
||||
|
||||
def write_json_file(content, filepath):
|
||||
with open(filepath, 'w') as file:
|
||||
json.dump(content, file, indent=2)
|
||||
|
||||
def read_json_file(filepath):
|
||||
with open(filepath, 'r') as file:
|
||||
return json.load(file)
|
||||
|
||||
# return a list of vectors
|
||||
def embedding(content, engine='text-embedding-ada-002'):
|
||||
response = openai.Embedding.create(input=content, engine=engine)
|
||||
vector = response['data'][0]['embedding']
|
||||
return vector
|
@ -0,0 +1,31 @@
|
||||
import streamlit as st
|
||||
import time
|
||||
import modules.utilities as util
|
||||
|
||||
st.set_page_config(
|
||||
page_title='GPT Brain'
|
||||
)
|
||||
|
||||
# path
|
||||
USER_DIR = '.user'
|
||||
LOG_PATH = '.user/log'
|
||||
BRAIN_MEMO = '.user/brain-memo.json'
|
||||
MANIFEST = '.core/manifest.json'
|
||||
INIT_LANGUAGE = '.user/language.json'
|
||||
|
||||
# activate session
|
||||
if 'SESSION_TIME' not in st.session_state:
|
||||
st.session_state['SESSION_TIME'] = time.strftime("%Y%m%d-%H%H%S")
|
||||
|
||||
if 'SESSION_LANGUAGE' not in st.session_state:
|
||||
st.session_state['SESSION_LANGUAGE'] = util.read_json_at(INIT_LANGUAGE, 'SESSION_LANGUAGE')
|
||||
|
||||
if 'FILTER_ROW_COUNT' not in st.session_state:
|
||||
st.session_state['FILTER_ROW_COUNT'] = util.read_json_at(BRAIN_MEMO, 'filter_row_count')
|
||||
|
||||
SESSION_TIME = st.session_state['SESSION_TIME']
|
||||
|
||||
CURRENT_LOG_FILE = f'{LOG_PATH}/log_{SESSION_TIME}.log'
|
||||
|
||||
# models
|
||||
MODELS_OPTIONS = ['text-davinci-003', 'text-curie-001', 'text-babbage-001', 'text-ada-001']
|
@ -0,0 +1,4 @@
|
||||
from modules import language
|
||||
from modules import utilities
|
||||
from modules import check_update
|
||||
from modules import INFO
|
@ -1,6 +1,6 @@
|
||||
import os
|
||||
import time
|
||||
from modules import utilities as util
|
||||
import modules.utilities as util
|
||||
|
||||
file_path = r'.user\input.txt'
|
||||
temp_file = r'.user\input_last-run.temp'
|
@ -0,0 +1 @@
|
||||
from streamlit_toolkit import tools
|
@ -0,0 +1,286 @@
|
||||
import os
|
||||
import time
|
||||
import streamlit as st
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog
|
||||
|
||||
import modules.utilities as util
|
||||
import modules.INFO as INFO
|
||||
import modules as mod
|
||||
import GPT
|
||||
|
||||
_ = mod.language.set_language()
|
||||
|
||||
|
||||
def create_log():
|
||||
if not os.path.exists(INFO.CURRENT_LOG_FILE):
|
||||
util.write_file(f'Session {INFO.SESSION_TIME}\n\n', INFO.CURRENT_LOG_FILE)
|
||||
return INFO.CURRENT_LOG_FILE
|
||||
|
||||
|
||||
def log(content, delimiter=''):
|
||||
log_file = create_log()
|
||||
if delimiter != '':
|
||||
delimiter = f'\n\n=============={delimiter}==============\n'
|
||||
util.write_file(f'\n{delimiter + content}', log_file, 'a')
|
||||
|
||||
|
||||
def clear_log():
|
||||
log_file_name = f'log_{INFO.SESSION_TIME}.log'
|
||||
for root, dirs, files in os.walk(INFO.LOG_PATH):
|
||||
for file in files:
|
||||
if not file == log_file_name:
|
||||
os.remove(os.path.join(root, file))
|
||||
|
||||
|
||||
def download_as():
|
||||
# download log file
|
||||
with open(INFO.CURRENT_LOG_FILE, 'rb') as f:
|
||||
content = f.read()
|
||||
st.download_button(
|
||||
label=_("📥download log"),
|
||||
data=content,
|
||||
file_name=f'log_{INFO.SESSION_TIME}.txt',
|
||||
mime='text/plain'
|
||||
)
|
||||
|
||||
|
||||
def save(content, path, page='', json_value: dict = None):
|
||||
if json_value is None:
|
||||
json_value = []
|
||||
save_but = st.button(_('💾Save'))
|
||||
if save_but:
|
||||
util.write_file(content, path)
|
||||
st.success(_('✅File saved!'))
|
||||
# write to json file
|
||||
if page == '💽Brain Memory':
|
||||
util.update_json(INFO.BRAIN_MEMO, 'delimiter', json_value['delimiter'])
|
||||
util.update_json(INFO.BRAIN_MEMO, 'append_mode', json_value['append_mode'])
|
||||
util.update_json(INFO.BRAIN_MEMO, 'force_mode', json_value['force_mode'])
|
||||
util.update_json(INFO.BRAIN_MEMO, 'advanced_mode', json_value['advanced_mode'])
|
||||
util.update_json(INFO.BRAIN_MEMO, 'filter_info', json_value['filter_info'])
|
||||
util.update_json(INFO.BRAIN_MEMO, 'filter_row_count', json_value['filter_row_count'])
|
||||
time.sleep(1)
|
||||
# refresh page
|
||||
st.experimental_rerun()
|
||||
|
||||
|
||||
def match_logic(operator, filter_val, value):
|
||||
if operator == 'IS':
|
||||
return filter_val == value
|
||||
elif operator == 'IS NOT':
|
||||
return filter_val != value
|
||||
elif operator == 'CONTAINS':
|
||||
return filter_val in value
|
||||
elif operator == 'NOT CONTAINS':
|
||||
return filter_val not in value
|
||||
elif operator == 'MORE THAN':
|
||||
# check if value is float
|
||||
if not value.isnumeric():
|
||||
return False
|
||||
return float(filter_val) < float(value)
|
||||
elif operator == 'LESS THAN':
|
||||
# check if value is float
|
||||
if not value.isnumeric():
|
||||
return False
|
||||
return float(filter_val) > float(value)
|
||||
elif operator == 'MORE THAN OR EQUAL':
|
||||
# check if value is float
|
||||
if not value.isnumeric():
|
||||
return False
|
||||
return float(filter_val) <= float(value)
|
||||
elif operator == 'LESS THAN OR EQUAL':
|
||||
# check if value is float
|
||||
if not value.isnumeric():
|
||||
return False
|
||||
return float(filter_val) >= float(value)
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def select_directory():
|
||||
root = tk.Tk()
|
||||
root.withdraw()
|
||||
# make sure the dialog is on top of the main window
|
||||
root.attributes('-topmost', True)
|
||||
directory = filedialog.askdirectory(initialdir=os.getcwd(), title=_('Select Note Directory'))
|
||||
return directory
|
||||
|
||||
|
||||
def match_fields(pages: list, filter_datas: list[dict]):
|
||||
filtered_contents = []
|
||||
for page in pages:
|
||||
fields = util.extract_frontmatter(page, delimiter='---')
|
||||
|
||||
found_data = []
|
||||
|
||||
for field in fields:
|
||||
if field == '':
|
||||
continue
|
||||
found_key, found_value = field.split(':')
|
||||
found_key = found_key.strip()
|
||||
found_value = found_value.strip()
|
||||
|
||||
found_data.append({
|
||||
'key': found_key,
|
||||
'value': found_value
|
||||
})
|
||||
|
||||
found_match = []
|
||||
for data in filter_datas:
|
||||
for found in found_data:
|
||||
data_key = data['key'].lower()
|
||||
data_val = data['value'].lower()
|
||||
found_key = found['key'].lower()
|
||||
found_val = found['value'].lower()
|
||||
if data_key == found_key:
|
||||
if match_logic(data['logic'], data_val, found_val):
|
||||
# found single match
|
||||
found_match.append(True)
|
||||
|
||||
# if all match
|
||||
if found_match.count(True) == len(filter_datas):
|
||||
filtered_contents.append(page)
|
||||
|
||||
combined_contents = '\n\n\n\n'.join(filtered_contents)
|
||||
return combined_contents
|
||||
|
||||
|
||||
def add_filter(num, val_filter_key, val_filter_logic, val_filter_val):
|
||||
# filters
|
||||
col1, col2, col3 = st.columns(3)
|
||||
with col1:
|
||||
filter_key = st.text_input(f'Key{num}', placeholder='Key', value=val_filter_key)
|
||||
with col2:
|
||||
options = ['CONTAINS',
|
||||
'NOT CONTAINS',
|
||||
'IS',
|
||||
'IS NOT',
|
||||
'MORE THAN',
|
||||
'LESS THAN',
|
||||
'MORE THAN OR EQUAL',
|
||||
'LESS THAN OR EQUAL']
|
||||
default_index = util.get_index(options, val_filter_logic, 0)
|
||||
logic_select = st.selectbox(f'Logic{num}', options, index=default_index)
|
||||
with col3:
|
||||
if isinstance(val_filter_val, int):
|
||||
val_filter_val = "{:02}".format(val_filter_val)
|
||||
filter_val = st.text_input(f'value{num}', placeholder='Value', value=val_filter_val)
|
||||
return filter_key, logic_select, filter_val
|
||||
|
||||
|
||||
def filter_data(pages: list, add_filter_button, del_filter_button):
|
||||
init_filter_infos = util.read_json_at(INFO.BRAIN_MEMO, 'filter_info')
|
||||
|
||||
filter_datas = []
|
||||
if add_filter_button:
|
||||
st.session_state['FILTER_ROW_COUNT'] += 1
|
||||
if del_filter_button:
|
||||
st.session_state['FILTER_ROW_COUNT'] -= 1
|
||||
if st.session_state['FILTER_ROW_COUNT'] >= 1:
|
||||
for i in range(st.session_state['FILTER_ROW_COUNT'] + 1):
|
||||
try:
|
||||
init_info = init_filter_infos[i - 1]
|
||||
init_key = init_info['key']
|
||||
init_logic = init_info['logic']
|
||||
init_val = init_info['value']
|
||||
except IndexError:
|
||||
init_key = ''
|
||||
init_logic = 'CONTAINS'
|
||||
init_val = ''
|
||||
except KeyError:
|
||||
init_key = ''
|
||||
init_logic = 'CONTAINS'
|
||||
init_val = ''
|
||||
|
||||
if i == 0:
|
||||
continue
|
||||
# add filter
|
||||
filter_key, logic_select, filter_val = add_filter(i, init_key, init_logic, init_val)
|
||||
data = {'key': filter_key, 'logic': logic_select, 'value': filter_val}
|
||||
filter_datas.append(data)
|
||||
|
||||
# filter data
|
||||
filtered_contents = match_fields(pages, filter_datas)
|
||||
return filtered_contents, filter_datas
|
||||
|
||||
|
||||
def process_response(query, target_model, prompt_file: str, data: GPT.model.param):
|
||||
# check if exclude model is not target model
|
||||
file_name = util.get_file_name(prompt_file)
|
||||
with st.spinner(_('Thinking on ') + f"{file_name}..."):
|
||||
results = GPT.query.run(query, target_model, prompt_file,
|
||||
data.temp,
|
||||
data.max_tokens,
|
||||
data.top_p,
|
||||
data.frequency_penalty,
|
||||
data.present_penalty)
|
||||
# displaying results
|
||||
st.header(f'📃{file_name}')
|
||||
st.info(f'{results}')
|
||||
time.sleep(1)
|
||||
log(results, delimiter=f'{file_name.upper()}')
|
||||
|
||||
|
||||
def execute_brain(q, params: GPT.model.param,
|
||||
op: GPT.model.Operation,
|
||||
model: GPT.model.Model,
|
||||
prompt_dictionary: dict,
|
||||
session_language):
|
||||
# log question
|
||||
log(f'\n\n\n\n[{str(time.ctime())}] - QUESTION: {q}')
|
||||
|
||||
if mod.check_update.isUpdated():
|
||||
st.success(_('Building Brain...'))
|
||||
# if brain-info is updated
|
||||
GPT.query.build(params.chunk_size)
|
||||
st.success(_('Brain rebuild!'))
|
||||
time.sleep(2)
|
||||
|
||||
# thinking on answer
|
||||
with st.spinner(_('Thinking on Answer')):
|
||||
answer = GPT.query.run_answer(q, model.question_model,
|
||||
params.temp,
|
||||
params.max_tokens,
|
||||
params.top_p,
|
||||
params.frequency_penalty,
|
||||
params.present_penalty,
|
||||
chunk_count=params.chunk_count)
|
||||
if util.contains(op.operations, _('question')):
|
||||
# displaying results
|
||||
st.header(_('💬Answer'))
|
||||
st.info(f'{answer}')
|
||||
time.sleep(1)
|
||||
log(answer, delimiter='ANSWER')
|
||||
|
||||
# thinking on other outputs
|
||||
if len(op.operations_no_question) > 0:
|
||||
for i in range(len(op.operations_no_question)):
|
||||
prompt_path = prompt_dictionary[op.operations_no_question[i]]
|
||||
other_model = model.other_models[i]
|
||||
process_response(answer, other_model, prompt_path, params)
|
||||
# convert param to dictionary
|
||||
param_dict = vars(params)
|
||||
|
||||
# write param to json
|
||||
for key in param_dict:
|
||||
value = param_dict[key]
|
||||
util.update_json(INFO.BRAIN_MEMO, key, value)
|
||||
|
||||
# write operation to json
|
||||
util.update_json(INFO.BRAIN_MEMO, f'operations_{session_language}', op.operations)
|
||||
|
||||
# write question model to json
|
||||
util.update_json(INFO.BRAIN_MEMO, 'question_model', model.question_model)
|
||||
|
||||
# write other models to json
|
||||
for i in range(len(op.operations_no_question)):
|
||||
util.update_json(INFO.BRAIN_MEMO, f'{op.operations_no_question[i]}_model', model.other_models[i])
|
||||
|
||||
|
||||
def message(msg, condition=None):
|
||||
if condition is not None:
|
||||
if condition:
|
||||
st.warning("⚠️" + msg)
|
||||
else:
|
||||
st.warning("⚠️" + msg)
|
Loading…
Reference in New Issue