BREAKING CHANGE: add Experimental function for output streaming

a new feature for output streaming which enables real-time response streaming from the OpenAI server.
1 year ago · 5125811dbd
parent e8a077e026
commit 5125811dbd
5 changed files with 146 additions and 26 deletions
--- a/GPT/query.py
+++ b/GPT/query.py
@ -6,7 +6,9 @@ import modules.utilities as util
 import modules.language as language
 import GPT

-openai.api_key = util.read_file(r'.user\API-KEYS.txt').strip()
+API_KEY = util.read_file(r'.user\API-KEYS.txt').strip()
+
+openai.api_key = API_KEY

 # if 'SESSION_LANGUAGE' not in st.session_state:
 #     st.session_state['SESSION_LANGUAGE'] = util.read_json_at('.user/language.json', 'SESSION_LANGUAGE', 'en_US')
@ -54,6 +56,20 @@ def run_answer(query, model, temp, max_tokens, top_p, freq_penl, pres_penl, chun
    return all_answers


+def run_answer_stream(query, model, temp, max_tokens, top_p, freq_penl, pres_penl):
+    brain_data = util.read_json(r'.user\brain-data.json')
+    results = GPT.toolkit.search_chunks(query, brain_data, count=1)
+    for result in results:
+        my_info = util.read_file(f'{prompt_dir}/' + _('my-info') + '.txt')
+        prompt = util.read_file(f'{prompt_dir}/' + _('question') + '.txt')
+        prompt = prompt.replace('<<INFO>>', result['content'])
+        prompt = prompt.replace('<<QS>>', query)
+        prompt = prompt.replace('<<MY-INFO>>', my_info)
+
+        answer_client = GPT.toolkit.gpt3_stream(API_KEY, prompt, model, temp, max_tokens, top_p, freq_penl, pres_penl)
+        return answer_client
+
+
 def run(query, model, prompt_file, temp, max_tokens, top_p, freq_penl, pres_penl):
    chunks = textwrap.wrap(query, 10000)
    responses = []
@ -63,3 +79,10 @@ def run(query, model, prompt_file, temp, max_tokens, top_p, freq_penl, pres_penl
        responses.append(response)
    all_response = '\n\n'.join(responses)
    return all_response
+
+
+def run_stream(query, model, prompt_file, temp, max_tokens, top_p, freq_penl, pres_penl):
+    chunk = textwrap.wrap(query, 10000)[0]
+    prompt = util.read_file(prompt_file).replace('<<DATA>>', chunk)
+    client = GPT.toolkit.gpt3_stream(API_KEY, prompt, model, temp, max_tokens, top_p, freq_penl, pres_penl)
+    return client
--- a/GPT/toolkit.py
+++ b/GPT/toolkit.py
@ -1,5 +1,8 @@
 import openai
 import numpy as np
+import requests
+import sseclient
+import json


 # this function compare similarity between two vectors.
@ -44,3 +47,27 @@ def gpt3(prompt, model, temp, max_tokens, top_p, freq_penl, pres_penl):
    )
    text = response['choices'][0]['text'].strip()
    return text
+
+
+def gpt3_stream(API_KEY, prompt, model, temp, max_tokens, top_p, freq_penl, pres_penl):
+
+    url = 'https://api.openai.com/v1/completions'
+    headers = {
+        'Accept': 'text/event-stream',
+        'Authorization': 'Bearer ' + API_KEY
+    }
+    body = {
+        'model': model,
+        'prompt': prompt,
+        'max_tokens': max_tokens,
+        'temperature': temp,
+        'top_p': top_p,
+        'frequency_penalty': freq_penl,
+        'presence_penalty': pres_penl,
+        'stream': True,
+    }
+
+    req = requests.post(url, stream=True, headers=headers, json=body)
+    client = sseclient.SSEClient(req)
+    return client
+            # print(json.loads(event.data)['choices'][0]['text'], end='', flush=True)
--- a/Seanium_Brain.py
+++ b/Seanium_Brain.py
@ -2,6 +2,7 @@ import os
 import time

 import streamlit as st
+import streamlit_toggle as st_toggle

 import modules.INFO as INFO
 import modules as mod
@ -89,10 +90,16 @@ with st.sidebar:
                               help=_("The number of tokens to consider at each step. The larger this is, the more "
                                      "context the model has to work with, but the slower generation and expensive "
                                      "will it be."))
-        chunk_count = st.slider(_('Answer count'), 1, 5, value=util.read_json_at(INFO.BRAIN_MEMO, 'chunk_count', 1),
-                                help=_("The number of answers to generate. The model will continue to iteratively "
-                                       "generating answers until it reaches the answer count."))
-
+        enable_stream = st_toggle.st_toggle_switch(_('Stream (experimental)'),
+                                                   default_value=util.read_json_at(INFO.BRAIN_MEMO, 'enable_stream', True))
+
+        if not enable_stream:
+            chunk_count = st.slider(_('Answer count'), 1, 5, value=util.read_json_at(INFO.BRAIN_MEMO, 'chunk_count', 1),
+                                    help=_("The number of answers to generate. The model will continue to iteratively "
+                                           "generating answers until it reaches the answer count."
+                                           "\n\nNote that this function does not supports `stream` mode."))
+        else:
+            chunk_count = 1
    param = GPT.model.param(temp=temp,
                            max_tokens=max_tokens,
                            top_p=top_p,
@ -136,4 +143,4 @@ with body:
            st_tool.download_as(_("📥download log"))
    # execute brain calculation
    if not question == '' and send:
-        st_tool.execute_brain(question, param, op, models, prompt_dictionary, _('question'), SESSION_LANG)
+        st_tool.execute_brain(question, param, op, models, prompt_dictionary, _('question'), enable_stream, SESSION_LANG)
--- a/requirements.txt
+++ b/requirements.txt
@ -5,3 +5,5 @@ streamlit_tags==1.2.8
 streamlit_toggle==0.1.3
 streamlit_toggle_switch==1.0.2
 ~treamlit==1.18.1
+
+requests~=2.28.2
--- a/streamlit_toolkit/tools.py
+++ b/streamlit_toolkit/tools.py
@ -1,5 +1,6 @@
 import os
 import time
+import json
 import streamlit as st
 import tkinter as tk
 from tkinter import filedialog
@ -228,11 +229,37 @@ def process_response(query, target_model, prompt_file: str, data: GPT.model.para
        log(results, delimiter=f'{file_name.upper()}')


+def process_response_stream(query, target_model, prompt_file: str, data: GPT.model.param):
+    # check if exclude model is not target model
+    file_name = util.get_file_name(prompt_file)
+    with st.spinner(_('Thinking on ') + f"{file_name}..."):
+        client = GPT.query.run_stream(query, target_model, prompt_file,
+                                      data.temp,
+                                      data.max_tokens,
+                                      data.top_p,
+                                      data.frequency_penalty,
+                                      data.present_penalty)
+    # displaying results
+    st.header(f'📃{file_name}')
+    response_panel = st.empty()
+    previous_chars = ''
+    for event in client.events():
+        if event.data != '[DONE]':
+            char = json.loads(event.data)['choices'][0]['text']
+            response = previous_chars + char
+            response_panel.info(f'{response}')
+            previous_chars += char
+
+    time.sleep(1)
+    log(previous_chars, delimiter=f'{file_name.upper()}')
+
+
 def execute_brain(q, params: GPT.model.param,
                  op: GPT.model.Operation,
                  model: GPT.model.Model,
                  prompt_dictionary: dict,
                  question_prompt: str,
+                  stream: bool,
                  session_language,
                  ):
    # log question
@ -246,28 +273,62 @@ def execute_brain(q, params: GPT.model.param,
        msg.success(_('Brain Updated!'), icon="👍")
        time.sleep(2)

-    # thinking on answer
-    with st.spinner(_('Thinking on Answer')):
-        answer = GPT.query.run_answer(q, model.question_model,
-                                      params.temp,
-                                      params.max_tokens,
-                                      params.top_p,
-                                      params.frequency_penalty,
-                                      params.present_penalty,
-                                      chunk_count=params.chunk_count)
-        if util.contains(op.operations, question_prompt):
+    # =================stream=================
+    if stream:
+        previous_chars = ''
+        is_question_selected = util.contains(op.operations, question_prompt)
+        with st.spinner(_('Thinking on Answer')):
+            answer_clients = GPT.query.run_answer_stream(q, model.question_model,
+                                                         params.temp,
+                                                         params.max_tokens,
+                                                         params.top_p,
+                                                         params.frequency_penalty,
+                                                         params.present_penalty)
+        if is_question_selected:
            # displaying results
            st.header(_('💬Answer'))
-            st.info(f'{answer}')
-            time.sleep(1.5)
-            log(answer, delimiter='ANSWER')
-
-    # thinking on other outputs
-    if len(op.operations_no_question) > 0:
-        for i in range(len(op.operations_no_question)):
-            prompt_path = prompt_dictionary[op.operations_no_question[i]]
-            other_model = model.other_models[i]
-            process_response(answer, other_model, prompt_path, params)
+
+        answer_panel = st.empty()
+        for event in answer_clients.events():
+            if event.data != '[DONE]':
+                char = json.loads(event.data)['choices'][0]['text']
+                answer = previous_chars + char
+                if is_question_selected:
+                    answer_panel.info(f'{answer}')
+                previous_chars += char
+
+        time.sleep(0.1)
+        log(previous_chars, delimiter='ANSWER')
+        if len(op.operations_no_question) > 0:
+            for i in range(len(op.operations_no_question)):
+                prompt_path = prompt_dictionary[op.operations_no_question[i]]
+                other_model = model.other_models[i]
+                process_response_stream(previous_chars, other_model, prompt_path, params)
+    # =================stream=================
+    else:
+        # thinking on answer
+        with st.spinner(_('Thinking on Answer')):
+            answer = GPT.query.run_answer(q, model.question_model,
+                                          params.temp,
+                                          params.max_tokens,
+                                          params.top_p,
+                                          params.frequency_penalty,
+                                          params.present_penalty,
+                                          chunk_count=params.chunk_count)
+            if util.contains(op.operations, question_prompt):
+                # displaying results
+                st.header(_('💬Answer'))
+                st.info(f'{answer}')
+                time.sleep(1.5)
+                log(answer, delimiter='ANSWER')
+
+        # thinking on other outputs
+        if len(op.operations_no_question) > 0:
+            for i in range(len(op.operations_no_question)):
+                prompt_path = prompt_dictionary[op.operations_no_question[i]]
+                other_model = model.other_models[i]
+                process_response(answer, other_model, prompt_path, params)
+
    # convert param to dictionary
    param_dict = vars(params)