NBs and README

pull/13/head
Beck LaBash 1 year ago
parent e531a5c0d6
commit c2159d4b93

@ -36,11 +36,50 @@ Run the trial
The logs will be sent to `./root/<run_name>`.
### To Run: reasoning (HotPotQA)
Clone this repo and move to the HotPotQA directory
We provide a set of notebooks to easily run, explore, and interact with the results of the reasoning experiments. Each experiment consists of a random sample of 100 questions from the HotPotQA distractor dataset. Each question in the sample is attempted by an agent with a specific type and reflexion strategy.
#### Setup
To get started:
1. Clone this repo and move to the HotPotQA directory:
```bash
git clone https://github.com/noahshinn024/reflexion && cd ./hotpotqa_runs
```
2. Install the module dependencies into your environment:
```bash
pip install -r requirements.txt
```
3. Set `OPENAI_API_KEY` environment variable to your OpenAI API key:
```bash
export OPENAI_API_KEY=<your key>
```
#### Agent Types
Agent type is determined by the notebook you choose to run. The available agent types include:
- `ReAct` - ReAct Agent
- `CoT_context` - CoT Agent given supporting context about the question
- `CoT_no_context` - CoT Agent given no supporting context about the question
The notebook for each agent type is located in the `./hotpot_runs/notebooks` directory.
#### Reflexion Strategies
Each notebook allows you to specify the reflexion strategy to be used by the agents. The available reflexion strategies, which are defined in an `Enum`, include:
- `ReflexionStrategy.NONE` - The agent is not given any information about its last attempt.
- `ReflexionStrategy.LAST_ATTEMPT` - The agent is given its reasoning trace from its last attempt on the question as context.
- `ReflexionStrategy.REFLEXION` - The agent is given its self-reflection on the last attempt as context.
- `ReflexionStrategy.LAST_ATTEMPT_AND_REFLEXION` - The agent is given both its reasoning trace and self-reflection on the last attempt as context.
### Another Note
Due to the nature of these experiments, it may not be feasible for individual developers to rerun the results as GPT-4 has limited access and significant API charges. All runs from the paper and additional results are logged in `./alfworld_runs/root` for decision-making and `./hotpotqa_runs/root` for reasoning.
@ -64,4 +103,4 @@ For all questions, contact [noahshinn024@gmail.com](noahshinn024@gmail.com)
journal={arXiv preprint arXiv:2303.11366},
year={2023}
}
```
```

@ -1,6 +1,6 @@
import re, string, os
from typing import List, Union, Literal
from enum import Enum
import tiktoken
from langchain import OpenAI, Wikipedia
from langchain.llms.base import BaseLLM
@ -11,6 +11,19 @@ from prompts import reflect_prompt, react_agent_prompt, react_reflect_agent_prom
from prompts import cot_agent_prompt, cot_reflect_agent_prompt, cot_reflect_prompt, COT_INSTRUCTION, COT_REFLECT_INSTRUCTION
from fewshots import WEBTHINK_SIMPLE6, REFLECTIONS, COT, COT_REFLECT
class ReflexionStrategy(Enum):
"""
NONE: No reflection
LAST_ATTEMPT: Use last reasoning trace in context
REFLEXION: Apply reflexion to the next reasoning trace
LAST_ATTEMPT_AND_REFLEXION: Use last reasoning trace in context and apply reflexion to the next reasoning trace
"""
NONE = 'base'
LAST_ATTEMPT = 'last_trial'
REFLEXION = 'reflexion'
LAST_ATTEMPT_AND_REFLEXION = 'last_trial_and_reflexion'
class CoTAgent:
def __init__(self,
question: str,
@ -18,7 +31,6 @@ class CoTAgent:
key: str,
agent_prompt: PromptTemplate = cot_reflect_agent_prompt,
reflect_prompt: PromptTemplate = cot_reflect_prompt,
reflect_header: str = REFLECTION_HEADER,
cot_examples: str = COT,
reflect_examples: str = COT_REFLECT,
self_reflect_llm: BaseLLM = OpenAI(
@ -34,13 +46,11 @@ class CoTAgent:
model_kwargs={"stop": "\n"},
openai_api_key=os.environ['OPENAI_API_KEY']),
) -> None:
self.question = question
self.context = context
self.key = key
self.agent_prompt = agent_prompt
self.reflect_prompt = reflect_prompt
self.reflect_header = reflect_header
self.cot_examples = cot_examples
self.reflect_examples = reflect_examples
self.self_reflect_llm = self_reflect_llm
@ -51,12 +61,10 @@ class CoTAgent:
self.step_n: int = 0
self.reset()
def run(self, reflect: bool = True,
reflect_strategy: Union[Literal['last_attempt'],
Literal['reflexion'],
Literal['last_attempt + reflexion']] = 'reflexion') -> None:
if self.step_n > 0 and not self.is_correct() and reflect:
self.reflect(reflect_strategy)
def run(self,
reflexion_strategy: ReflexionStrategy = ReflexionStrategy.REFLEXION) -> None:
if self.step_n > 0 and not self.is_correct() and reflexion_strategy != ReflexionStrategy.NONE:
self.reflect(reflexion_strategy)
self.reset()
self.step()
self.step_n += 1
@ -87,17 +95,15 @@ class CoTAgent:
print('Invalid action type, please try again.')
def reflect(self,
strategy: Union[Literal['last_attempt'],
Literal['reflexion'],
Literal['last_attempt + reflexion']]) -> None:
print('Reflecting...')
if strategy == 'last_attempt':
strategy: ReflexionStrategy) -> None:
print('Running Reflexion strategy...')
if strategy == ReflexionStrategy.LAST_ATTEMPT:
self.reflections = [self.scratchpad]
self.reflections_str = format_last_attempt(self.question , self.reflections[0])
elif strategy == 'reflexion':
elif strategy == ReflexionStrategy.REFLEXION:
self.reflections += [self.prompt_reflection()]
self.reflections_str = format_reflections(self.reflections)
elif strategy == 'last_attempt + reflexion':
elif strategy == ReflexionStrategy.LAST_ATTEMPT_AND_REFLEXION:
self.reflections_str = format_last_attempt(self.question , self.scratchpad)
self.reflections = [self.prompt_reflection()]
self.reflections_str += '\n'+ format_reflections(self.reflections, header = REFLECTION_AFTER_LAST_TRIAL_HEADER)
@ -253,7 +259,6 @@ class ReactReflectAgent(ReactAgent):
max_steps: int = 6,
agent_prompt: PromptTemplate = react_reflect_agent_prompt,
reflect_prompt: PromptTemplate = reflect_prompt,
reflect_header: str = REFLECTION_HEADER,
docstore: Docstore = Wikipedia(),
react_llm: BaseLLM = OpenAI(
temperature=0,
@ -269,29 +274,28 @@ class ReactReflectAgent(ReactAgent):
) -> None:
super().__init__(question, key, max_steps, agent_prompt, docstore, react_llm)
self.reflect_header = reflect_header
self.reflect_llm = reflect_llm
self.reflect_prompt = reflect_prompt
self.reflect_examples = REFLECTIONS
self.reflections: List[str] = []
self.reflections_str: str = ''
def run(self, reset = True, reflect_strategy: Union[Literal['last_attempt'], Literal['reflexion'], Literal['last_attempt + reflexion']] = 'reflexion') -> None:
def run(self, reset = True, reflect_strategy: ReflexionStrategy = ReflexionStrategy.REFLEXION) -> None:
if (self.is_finished() or self.is_halted()) and not self.is_correct():
self.reflect(reflect_strategy)
ReactAgent.run(self, reset)
def reflect(self,
strategy: Union[Literal['last_attempt'], Literal['reflexion'], Literal['last_attempt + reflexion']]) -> None:
strategy: ReflexionStrategy) -> None:
print('Reflecting...')
if strategy == 'last_attempt':
if strategy == ReflexionStrategy.LAST_ATTEMPT:
self.reflections = [self.scratchpad]
self.reflections_str = format_last_attempt(self.question, self.reflections[0])
elif strategy == 'reflexion':
elif strategy == ReflexionStrategy.REFLEXION:
self.reflections += [self.prompt_reflection()]
self.reflections_str = format_reflections(self.reflections)
elif strategy == 'last_attempt + reflexion':
elif strategy == ReflexionStrategy.LAST_ATTEMPT_AND_REFLEXION:
self.reflections_str = format_last_attempt(self.question, self.scratchpad)
self.reflections = [self.prompt_reflection()]
self.reflections_str += format_reflections(self.reflections, header = REFLECTION_AFTER_LAST_TRIAL_HEADER)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,210 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Notebook for running Chain-of-Thought with supporting context experiments "
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"import sys, os\n",
"sys.path.append('..')\n",
"root = '../root/'"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"import joblib\n",
"import numpy as np\n",
"from agents import CoTAgent, ReflexionStrategy\n",
"from util import summarize_trial, log_trial, save_agents"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Load the HotPotQA Sample"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"hotpot = joblib.load('../data/hotpot-qa-distractor-sample.joblib').reset_index(drop = True)\n",
"\n",
"hotpot['supporting_paragraphs'] = None\n",
"for ind, row in hotpot.iterrows():\n",
" supporting_articles = row['supporting_facts']['title']\n",
" articles = row['context']['title']\n",
" sentences = row['context']['sentences'] \n",
" supporting_paragraphs = []\n",
" for article in supporting_articles:\n",
" supporting_paragraph = ''.join(sentences[np.where(articles == article)][0])\n",
" supporting_paragraphs.append(supporting_paragraph)\n",
" supporting_paragraphs = '\\n\\n'.join(supporting_paragraphs)\n",
" hotpot.at[ind, 'supporting_paragraphs'] = supporting_paragraphs"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Define the Reflexion Strategy"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" NONE: No reflection\n",
" LAST_ATTEMPT: Use last reasoning trace in context \n",
" REFLEXION: Apply reflexion to the next reasoning trace \n",
" LAST_ATTEMPT_AND_REFLEXION: Use last reasoning trace in context and apply reflexion to the next reasoning trace \n",
" \n"
]
}
],
"source": [
"print(ReflexionStrategy.__doc__)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"strategy: ReflexionStrategy = ReflexionStrategy.REFLEXION"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Initialize a CoTAgent for each question"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"from prompts import cot_agent_prompt, cot_reflect_agent_prompt, cot_reflect_prompt\n",
"from fewshots import COT, COT_REFLECT\n",
"agents = [CoTAgent(row['question'],\n",
" row['supporting_paragraphs'],\n",
" row['answer'],\n",
" agent_prompt=cot_agent_prompt if strategy == ReflexionStrategy.NONE else cot_reflect_agent_prompt,\n",
" cot_examples=COT,\n",
" reflect_prompt=cot_reflect_prompt,\n",
" reflect_examples=COT_REFLECT,\n",
" ) for _, row in hotpot.iterrows()]"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Run `n` trials"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"n = 5\n",
"trial = 0\n",
"log = ''"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for i in range(n):\n",
" for agent in [a for a in agents if not a.is_correct()]:\n",
" agent.run(reflexion_strategy = strategy)\n",
" print(f'Answer: {agent.key}')\n",
" trial += 1\n",
" log += log_trial(agents, trial)\n",
" correct, incorrect = summarize_trial(agents)\n",
" print(f'Finished Trial {trial}, Correct: {len(correct)}, Incorrect: {len(incorrect)}')"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Save the result log"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"with open(os.path.join(root, 'CoT', 'context', strategy.value, f'{len(agents)}_questions_{trial}_trials.txt'), 'w') as f:\n",
" f.write(log)\n",
"save_agents(agents, os.path.join(root, 'CoT', 'context', strategy.value, 'agents'))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "e23f799cbd2581634725fbf6ce3480ae26192d78438dfafc8efe944acd6490d5"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

@ -0,0 +1,190 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Notebook for running Chain-of-Thought with no supporting context experiments"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"import sys, os\n",
"sys.path.append('..')\n",
"root = '../root/'"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"from util import summarize_trial, log_trial, save_agents\n",
"import joblib\n",
"from agents import CoTAgent, ReflexionStrategy"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Load the HotPotQA Sample"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"hotpot = joblib.load('../data/hotpot-qa-distractor-sample.joblib').reset_index(drop = True)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Define the Reflexion Strategy"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" NONE: No reflection\n",
" LAST_ATTEMPT: Use last reasoning trace in context \n",
" REFLEXION: Apply reflexion to the next reasoning trace \n",
" LAST_ATTEMPT_AND_REFLEXION: Use last reasoning trace in context and apply reflexion to the next reasoning trace \n",
" \n"
]
}
],
"source": [
"print(ReflexionStrategy.__doc__)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"strategy: ReflexionStrategy = ReflexionStrategy.REFLEXION"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Initialize a CoTAgent for each question"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"from prompts import cot_simple_reflect_agent_prompt, cot_simple_reflect_prompt, cot_simple_agent_prompt\n",
"from fewshots import COTQA_SIMPLE6, COT_SIMPLE_REFLECTION\n",
"\n",
"agents = [CoTAgent(question = row['question'],\n",
" context = '',\n",
" key = row['answer'],\n",
" agent_prompt=cot_simple_agent_prompt if strategy == ReflexionStrategy.NONE else cot_simple_reflect_agent_prompt,\n",
" cot_examples = COTQA_SIMPLE6,\n",
" reflect_prompt = cot_simple_reflect_prompt,\n",
" reflect_examples = COT_SIMPLE_REFLECTION,\n",
" ) for _, row in hotpot.iterrows()]"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Run `n` trials"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"n = 5\n",
"trial = 0\n",
"log = ''\n",
"for i in range(n):\n",
" for agent in [a for a in agents if not a.is_correct()]:\n",
" agent.run(reflexion_strategy = strategy)\n",
" print(f'Answer: {agent.key}')\n",
" trial += 1\n",
" log += log_trial(agents, trial)\n",
" correct, incorrect = summarize_trial(agents)\n",
" print(f'Finished Trial {trial}, Correct: {len(correct)}, Incorrect: {len(incorrect)}')"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Save the result log"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"with open(os.path.join(root, 'CoT', 'no_context', strategy.value, f'{len(agents)}_questions_{trial}_trials.txt'), 'w') as f:\n",
" f.write(log)\n",
"save_agents(agents, os.path.join(root, 'CoT', 'no_context', strategy.value, 'agents'))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "e23f799cbd2581634725fbf6ce3480ae26192d78438dfafc8efe944acd6490d5"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

@ -1,245 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import joblib\n",
"from react_cls import ReactAgent\n",
"from mocks import DocStoreExplorerMock, LLMMock"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def summarize_trial(agents):\n",
" correct = [a for a in agents if a.is_correct()]\n",
" halted = [a for a in agents if a.is_halted()]\n",
" incorrect = [a for a in agents if a.is_finished() and not a.is_correct()]\n",
" return correct, incorrect, halted\n",
"\n",
"def log_trial(agents, trial_n):\n",
" correct, incorrect, halted = summarize_trial(agents)\n",
"\n",
" log = f\"\"\"\n",
"########################################\n",
"BEGIN TRIAL {trial_n}\n",
"Trial summary: Correct: {len(correct)}, Incorrect: {len(incorrect)}, Halted: {len(halted)}\n",
"#######################################\n",
"\"\"\"\n",
"\n",
" log += '------------- BEGIN CORRECT AGENTS -------------\\n\\n'\n",
" for agent in correct:\n",
" log += f'Question: {agent.question}{agent.scratchpad}\\nCorrect answer: {agent.key}\\n\\n'\n",
"\n",
" log += '------------- BEGIN INCORRECT AGENTS -----------\\n\\n'\n",
" for agent in incorrect:\n",
" log += f'Question: {agent.question}{agent.scratchpad}\\nCorrect answer: {agent.key}\\n\\n'\n",
"\n",
" log += '------------- BEGIN HALTED AGENTS --------------\\n\\n'\n",
" for agent in halted:\n",
" log += f'Question: {agent.question}{agent.scratchpad}\\nCorrect answer: {agent.key}\\n\\n'\n",
"\n",
" return log"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"hotpot = joblib.load('data/hotpot-qa-distractor-sample.joblib').reset_index(drop = True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"agents = [ReactAgent(row['question'], row['answer']) for _, row in hotpot.iterrows()]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"trial = 0\n",
"log = ''"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"q = 0"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Trial: 4 (0/66)\n",
"Trial: 4 (1/66)\n",
"Trial: 4 (2/66)\n",
"Trial: 4 (3/66)\n",
"Trial: 4 (4/66)\n",
"Trial: 4 (5/66)\n",
"Trial: 4 (6/66)\n",
"Trial: 4 (7/66)\n",
"Trial: 4 (8/66)\n",
"Trial: 4 (9/66)\n",
"Trial: 4 (10/66)\n",
"Trial: 4 (11/66)\n",
"Trial: 4 (12/66)\n",
"Trial: 4 (13/66)\n",
"Trial: 4 (14/66)\n",
"Trial: 4 (15/66)\n",
"Trial: 4 (16/66)\n",
"Trial: 4 (17/66)\n",
"Trial: 4 (18/66)\n",
"Trial: 4 (19/66)\n",
"Trial: 4 (20/66)\n",
"Trial: 4 (21/66)\n",
"Trial: 4 (22/66)\n",
"Trial: 4 (23/66)\n",
"Trial: 4 (24/66)\n",
"Trial: 4 (25/66)\n",
"Trial: 4 (26/66)\n",
"Trial: 4 (27/66)\n",
"Trial: 4 (28/66)\n",
"Trial: 4 (29/66)\n",
"Trial: 4 (30/66)\n",
"Trial: 4 (31/66)\n",
"Trial: 4 (32/66)\n",
"Trial: 4 (33/66)\n",
"Trial: 4 (34/66)\n",
"Trial: 4 (35/66)\n",
"Trial: 4 (36/66)\n",
"Trial: 4 (37/66)\n",
"Trial: 4 (38/66)\n",
"Trial: 4 (39/66)\n",
"Trial: 4 (40/66)\n",
"Trial: 4 (41/66)\n",
"Trial: 4 (42/66)\n",
"Trial: 4 (43/66)\n",
"Trial: 4 (44/66)\n",
"Trial: 4 (45/66)\n",
"Trial: 4 (46/66)\n",
"Trial: 4 (47/66)\n",
"Trial: 4 (48/66)\n",
"Trial: 4 (49/66)\n",
"Trial: 4 (50/66)\n",
"Trial: 4 (51/66)\n",
"Trial: 4 (52/66)\n",
"Trial: 4 (53/66)\n",
"Trial: 4 (54/66)\n",
"Trial: 4 (55/66)\n",
"Trial: 4 (56/66)\n",
"Trial: 4 (57/66)\n",
"Trial: 4 (58/66)\n",
"Trial: 4 (59/66)\n",
"Trial: 4 (60/66)\n",
"Trial: 4 (61/66)\n",
"Trial: 4 (62/66)\n",
"Trial: 4 (63/66)\n",
"Trial: 4 (64/66)\n",
"Trial: 4 (65/66)\n",
"Finished Trial 5, Correct: 34, Incorrect: 56, Halted: 12\n"
]
}
],
"source": [
"agents_to_run = [a for a in agents if not a.is_correct()]\n",
"\n",
"while q < len(agents_to_run):\n",
" print(f'Trial: {trial} ({q}/{len(agents_to_run)})')\n",
" agents_to_run[q].run()\n",
" q += 1\n",
"\n",
"trial += 1\n",
"\n",
"log += log_trial(agents, trial)\n",
"correct, incorrect, halted = summarize_trial(agents)\n",
"print(f'Finished Trial {trial}, Correct: {len(correct)}, Incorrect: {len(incorrect)}, Halted: {len(halted)}')"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"with open('output/base_react/100_questions_5_trials.txt', 'w') as f:\n",
" f.write(log)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['output/base_react_dicts.joblib']"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dicts = [dict(a.__dict__) for a in agents]\n",
"for d in dicts:\n",
" for k, v in d.items():\n",
" d[k] = str(v)\n",
"\n",
"joblib.dump(dicts, 'output/base_react_dicts.joblib')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.9"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "e23f799cbd2581634725fbf6ce3480ae26192d78438dfafc8efe944acd6490d5"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

@ -1,215 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"import joblib\n",
"from react_cls import ReactReflectAgent, format_reflections\n",
"from mocks import DocStoreExplorerMock, LLMMock"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"def summarize_trial(agents):\n",
" correct = [a for a in agents if a.is_correct()]\n",
" incorrect = [a for a in agents if a.is_finished() and not a.is_correct()]\n",
" return correct, incorrect\n",
"\n",
"def remove_fewshot(prompt: str) -> str:\n",
" prefix = prompt.split('Here are some examples:')[0]\n",
" suffix = prompt.split('(END OF EXAMPLES)')[1]\n",
" return prefix.strip('\\n').strip() +'\\n' + suffix.strip('\\n').strip()\n",
"\n",
"def log_trial(agents, trial_n):\n",
" correct, incorrect = summarize_trial(agents)\n",
"\n",
" log = f\"\"\"\n",
"########################################\n",
"BEGIN TRIAL {trial_n}\n",
"Trial summary: Correct: {len(correct)}, Incorrect: {len(incorrect)}\n",
"#######################################\n",
"\"\"\"\n",
"\n",
" log += '------------- BEGIN CORRECT AGENTS -------------\\n\\n'\n",
" for agent in correct:\n",
" log += remove_fewshot(agent._build_agent_prompt()) + f'\\nCorrect answer: {agent.key}\\n\\n'\n",
"\n",
" log += '------------- BEGIN INCORRECT AGENTS -----------\\n\\n'\n",
" for agent in incorrect:\n",
" log += remove_fewshot(agent._build_agent_prompt()) + f'\\nCorrect answer: {agent.key}\\n\\n'\n",
"\n",
" return log\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"hotpot = joblib.load('data/hotpot-qa-distractor-sample.joblib').reset_index(drop = True)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"agents = [ReactReflectAgent(row['question'], row['answer']) for _, row in hotpot.iterrows()]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"trial = 0\n",
"log = ''\n",
"last_correct = 0 "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for agent in [a for a in agents if not a.is_correct()]:\n",
" agent.run(reflect_strategy='last_attempt')\n",
" print(f'Answer: {agent.key}')\n",
"trial += 1\n",
"log += log_trial(agents, trial)\n",
"correct, incorrect = summarize_trial(agents)\n",
"print(f'Finished Trial {trial}, Correct: {len(correct)}, Incorrect: {len(incorrect)}')"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['output/last_trial_react/react_incorrect_dicts_trial_0.joblib']"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dicts = [dict(a.__dict__) for a in incorrect]\n",
"for d in dicts:\n",
" for k, v in d.items():\n",
" d[k] = str(v)\n",
"\n",
"joblib.dump(dicts, 'output/last_trial_react/react_incorrect_dicts_trial_0.joblib')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"while last_correct != correct:\n",
" last_correct, _ = summarize_trial(agents)\n",
" for agent in [a for a in agents if not a.is_correct()]:\n",
" agent.run(reflect_strategy='last_attempt')\n",
" print(f'Answer: {agent.key}')\n",
" trial += 1\n",
" log += log_trial(agents, trial)\n",
" correct, incorrect = summarize_trial(agents)\n",
" print(f'Finished Trial {trial}, Correct: {len(correct)}, Incorrect: {len(incorrect)}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for agent in [a for a in agents if not a.is_correct()]:\n",
" agent.run(reflect_strategy='last_attempt + reflexion')\n",
" print(f'Answer: {agent.key}')\n",
"trial += 1\n",
"log += log_trial(agents, trial)\n",
"correct, incorrect = summarize_trial(agents)\n",
"print(f'Finished Trial {trial}, Correct: {len(correct)}, Incorrect: {len(incorrect)}')"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"with open('output/last_trial_react/100_questions_5_trials.txt', 'w') as f:\n",
" f.write(log)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['output/reflect/react_reflect_50_correct_dicts.joblib']"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dicts = [dict(a.__dict__) for a in correct]\n",
"for d in dicts:\n",
" for k, v in d.items():\n",
" d[k] = str(v)\n",
"\n",
"joblib.dump(dicts, 'output/reflect/react_reflect_50_correct_dicts.joblib')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.9"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "e23f799cbd2581634725fbf6ce3480ae26192d78438dfafc8efe944acd6490d5"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

@ -0,0 +1,249 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Notebook for running React experiments"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import sys, os\n",
"sys.path.append('..')\n",
"root = '../root/'"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import joblib\n",
"from util import summarize_react_trial, log_react_trial, save_agents\n",
"from agents import ReactReflectAgent, ReactAgent, ReflexionStrategy"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Load the HotpotQA Sample"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"hotpot = joblib.load('../data/hotpot-qa-distractor-sample.joblib').reset_index(drop = True)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Define the Reflexion Strategy"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" NONE: No reflection\n",
" LAST_ATTEMPT: Use last reasoning trace in context \n",
" REFLEXION: Apply reflexion to the next reasoning trace \n",
" LAST_ATTEMPT_AND_REFLEXION: Use last reasoning trace in context and apply reflexion to the next reasoning trace \n",
" \n"
]
}
],
"source": [
"print(ReflexionStrategy.__doc__)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"strategy: ReflexionStrategy = ReflexionStrategy.REFLEXION"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Initialize a React Agent for each question"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"agent_cls = ReactReflectAgent if strategy != ReflexionStrategy.NONE else ReactAgent\n",
"agents = [agent_cls(row['question'], row['answer']) for _, row in hotpot.iterrows()]"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Run `n` trials"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"n = 5\n",
"trial = 0\n",
"log = ''"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Reflecting...\n",
"You have attempted to answer following question before and failed. The following reflection(s) give a plan to avoid failing to answer the question in the same way you did previously. Use them to improve your strategy of correctly answering the given question.\n",
"Reflections:\n",
"- I got stuck in a loop where I kept trying to search 'VIVA Media AG 2004 name change acronym stands for' but the page could not be found. Instead I should have tried to search the similar results that had a similar name to see if they had the answer I was looking for.\n",
"Thought 1: I need to search VIVA Media AG and find what their new acronym stands for after their name change in 2004.\n",
"Action 1: Search[VIVA Media AG]\n",
"Observation 1: Could not find [VIVA Media AG]. Similar: ['MTV Music (Polish TV channel)', 'Paramount International Networks', 'VIVA Plus', 'Viacom (19522006)', 'Vauxhall Viva', 'Sartorius AG', 'GfK Entertainment charts', 'Viva Rapid Transit', 'Aivita Muze', 'Spellbound Entertainment']\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[16], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[39mfor\u001b[39;00m agent \u001b[39min\u001b[39;00m [a \u001b[39mfor\u001b[39;00m a \u001b[39min\u001b[39;00m agents \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m a\u001b[39m.\u001b[39mis_correct()]:\n\u001b[1;32m 3\u001b[0m \u001b[39mif\u001b[39;00m strategy \u001b[39m!=\u001b[39m ReflexionStrategy\u001b[39m.\u001b[39mNONE:\n\u001b[0;32m----> 4\u001b[0m agent\u001b[39m.\u001b[39;49mrun(reflect_strategy \u001b[39m=\u001b[39;49m strategy)\n\u001b[1;32m 5\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 6\u001b[0m agent\u001b[39m.\u001b[39mrun()\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/hotpotqa_runs/notebooks/../agents.py:287\u001b[0m, in \u001b[0;36mReactReflectAgent.run\u001b[0;34m(self, reset, reflect_strategy)\u001b[0m\n\u001b[1;32m 284\u001b[0m \u001b[39mif\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_finished() \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_halted()) \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_correct():\n\u001b[1;32m 285\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mreflect(reflect_strategy)\n\u001b[0;32m--> 287\u001b[0m ReactAgent\u001b[39m.\u001b[39;49mrun(\u001b[39mself\u001b[39;49m, reset)\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/hotpotqa_runs/notebooks/../agents.py:180\u001b[0m, in \u001b[0;36mReactAgent.run\u001b[0;34m(self, reset)\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m__reset_agent()\n\u001b[1;32m 179\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_halted() \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_finished():\n\u001b[0;32m--> 180\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mstep()\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/hotpotqa_runs/notebooks/../agents.py:185\u001b[0m, in \u001b[0;36mReactAgent.step\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 182\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mstep\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 183\u001b[0m \u001b[39m# Think\u001b[39;00m\n\u001b[1;32m 184\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mscratchpad \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39mThought \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstep_n\u001b[39m}\u001b[39;00m\u001b[39m:\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m--> 185\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mscratchpad \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39m \u001b[39m\u001b[39m'\u001b[39m \u001b[39m+\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mprompt_agent()\n\u001b[1;32m 186\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mscratchpad\u001b[39m.\u001b[39msplit(\u001b[39m'\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m'\u001b[39m)[\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m])\n\u001b[1;32m 188\u001b[0m \u001b[39m# Act\u001b[39;00m\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/hotpotqa_runs/notebooks/../agents.py:229\u001b[0m, in \u001b[0;36mReactAgent.prompt_agent\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mprompt_agent\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mstr\u001b[39m:\n\u001b[0;32m--> 229\u001b[0m \u001b[39mreturn\u001b[39;00m format_step(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mllm(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_build_agent_prompt()))\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/langchain/llms/base.py:281\u001b[0m, in \u001b[0;36mBaseLLM.__call__\u001b[0;34m(self, prompt, stop, callbacks)\u001b[0m\n\u001b[1;32m 276\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__call__\u001b[39m(\n\u001b[1;32m 277\u001b[0m \u001b[39mself\u001b[39m, prompt: \u001b[39mstr\u001b[39m, stop: Optional[List[\u001b[39mstr\u001b[39m]] \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m, callbacks: Callbacks \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 278\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mstr\u001b[39m:\n\u001b[1;32m 279\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Check Cache and run the LLM on the given prompt and input.\"\"\"\u001b[39;00m\n\u001b[1;32m 280\u001b[0m \u001b[39mreturn\u001b[39;00m (\n\u001b[0;32m--> 281\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgenerate([prompt], stop\u001b[39m=\u001b[39;49mstop, callbacks\u001b[39m=\u001b[39;49mcallbacks)\n\u001b[1;32m 282\u001b[0m \u001b[39m.\u001b[39mgenerations[\u001b[39m0\u001b[39m][\u001b[39m0\u001b[39m]\n\u001b[1;32m 283\u001b[0m \u001b[39m.\u001b[39mtext\n\u001b[1;32m 284\u001b[0m )\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/langchain/llms/base.py:176\u001b[0m, in \u001b[0;36mBaseLLM.generate\u001b[0;34m(self, prompts, stop, callbacks)\u001b[0m\n\u001b[1;32m 174\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mKeyboardInterrupt\u001b[39;00m, \u001b[39mException\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 175\u001b[0m run_manager\u001b[39m.\u001b[39mon_llm_error(e)\n\u001b[0;32m--> 176\u001b[0m \u001b[39mraise\u001b[39;00m e\n\u001b[1;32m 177\u001b[0m run_manager\u001b[39m.\u001b[39mon_llm_end(output)\n\u001b[1;32m 178\u001b[0m \u001b[39mreturn\u001b[39;00m output\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/langchain/llms/base.py:170\u001b[0m, in \u001b[0;36mBaseLLM.generate\u001b[0;34m(self, prompts, stop, callbacks)\u001b[0m\n\u001b[1;32m 165\u001b[0m run_manager \u001b[39m=\u001b[39m callback_manager\u001b[39m.\u001b[39mon_llm_start(\n\u001b[1;32m 166\u001b[0m {\u001b[39m\"\u001b[39m\u001b[39mname\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m}, prompts\n\u001b[1;32m 167\u001b[0m )\n\u001b[1;32m 168\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 169\u001b[0m output \u001b[39m=\u001b[39m (\n\u001b[0;32m--> 170\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_generate(prompts, stop\u001b[39m=\u001b[39;49mstop, run_manager\u001b[39m=\u001b[39;49mrun_manager)\n\u001b[1;32m 171\u001b[0m \u001b[39mif\u001b[39;00m new_arg_supported\n\u001b[1;32m 172\u001b[0m \u001b[39melse\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_generate(prompts, stop\u001b[39m=\u001b[39mstop)\n\u001b[1;32m 173\u001b[0m )\n\u001b[1;32m 174\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mKeyboardInterrupt\u001b[39;00m, \u001b[39mException\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 175\u001b[0m run_manager\u001b[39m.\u001b[39mon_llm_error(e)\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/langchain/llms/openai.py:306\u001b[0m, in \u001b[0;36mBaseOpenAI._generate\u001b[0;34m(self, prompts, stop, run_manager)\u001b[0m\n\u001b[1;32m 304\u001b[0m choices\u001b[39m.\u001b[39mextend(response[\u001b[39m\"\u001b[39m\u001b[39mchoices\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[1;32m 305\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 306\u001b[0m response \u001b[39m=\u001b[39m completion_with_retry(\u001b[39mself\u001b[39;49m, prompt\u001b[39m=\u001b[39;49m_prompts, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mparams)\n\u001b[1;32m 307\u001b[0m choices\u001b[39m.\u001b[39mextend(response[\u001b[39m\"\u001b[39m\u001b[39mchoices\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[1;32m 308\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstreaming:\n\u001b[1;32m 309\u001b[0m \u001b[39m# Can't update token usage if streaming\u001b[39;00m\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/langchain/llms/openai.py:106\u001b[0m, in \u001b[0;36mcompletion_with_retry\u001b[0;34m(llm, **kwargs)\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[39m@retry_decorator\u001b[39m\n\u001b[1;32m 103\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_completion_with_retry\u001b[39m(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs: Any) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Any:\n\u001b[1;32m 104\u001b[0m \u001b[39mreturn\u001b[39;00m llm\u001b[39m.\u001b[39mclient\u001b[39m.\u001b[39mcreate(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m--> 106\u001b[0m \u001b[39mreturn\u001b[39;00m _completion_with_retry(\u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/tenacity/__init__.py:289\u001b[0m, in \u001b[0;36mBaseRetrying.wraps.<locals>.wrapped_f\u001b[0;34m(*args, **kw)\u001b[0m\n\u001b[1;32m 287\u001b[0m \u001b[39m@functools\u001b[39m\u001b[39m.\u001b[39mwraps(f)\n\u001b[1;32m 288\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mwrapped_f\u001b[39m(\u001b[39m*\u001b[39margs: t\u001b[39m.\u001b[39mAny, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkw: t\u001b[39m.\u001b[39mAny) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m t\u001b[39m.\u001b[39mAny:\n\u001b[0;32m--> 289\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m(f, \u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkw)\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/tenacity/__init__.py:379\u001b[0m, in \u001b[0;36mRetrying.__call__\u001b[0;34m(self, fn, *args, **kwargs)\u001b[0m\n\u001b[1;32m 377\u001b[0m retry_state \u001b[39m=\u001b[39m RetryCallState(retry_object\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m, fn\u001b[39m=\u001b[39mfn, args\u001b[39m=\u001b[39margs, kwargs\u001b[39m=\u001b[39mkwargs)\n\u001b[1;32m 378\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[0;32m--> 379\u001b[0m do \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49miter(retry_state\u001b[39m=\u001b[39;49mretry_state)\n\u001b[1;32m 380\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(do, DoAttempt):\n\u001b[1;32m 381\u001b[0m \u001b[39mtry\u001b[39;00m:\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/tenacity/__init__.py:314\u001b[0m, in \u001b[0;36mBaseRetrying.iter\u001b[0;34m(self, retry_state)\u001b[0m\n\u001b[1;32m 312\u001b[0m is_explicit_retry \u001b[39m=\u001b[39m fut\u001b[39m.\u001b[39mfailed \u001b[39mand\u001b[39;00m \u001b[39misinstance\u001b[39m(fut\u001b[39m.\u001b[39mexception(), TryAgain)\n\u001b[1;32m 313\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (is_explicit_retry \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mretry(retry_state)):\n\u001b[0;32m--> 314\u001b[0m \u001b[39mreturn\u001b[39;00m fut\u001b[39m.\u001b[39;49mresult()\n\u001b[1;32m 316\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mafter \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 317\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mafter(retry_state)\n",
"File \u001b[0;32m/usr/local/Cellar/python@3.9/3.9.16/Frameworks/Python.framework/Versions/3.9/lib/python3.9/concurrent/futures/_base.py:439\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 437\u001b[0m \u001b[39mraise\u001b[39;00m CancelledError()\n\u001b[1;32m 438\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_state \u001b[39m==\u001b[39m FINISHED:\n\u001b[0;32m--> 439\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m__get_result()\n\u001b[1;32m 441\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_condition\u001b[39m.\u001b[39mwait(timeout)\n\u001b[1;32m 443\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_state \u001b[39min\u001b[39;00m [CANCELLED, CANCELLED_AND_NOTIFIED]:\n",
"File \u001b[0;32m/usr/local/Cellar/python@3.9/3.9.16/Frameworks/Python.framework/Versions/3.9/lib/python3.9/concurrent/futures/_base.py:391\u001b[0m, in \u001b[0;36mFuture.__get_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 389\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_exception:\n\u001b[1;32m 390\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 391\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_exception\n\u001b[1;32m 392\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[1;32m 393\u001b[0m \u001b[39m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n\u001b[1;32m 394\u001b[0m \u001b[39mself\u001b[39m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/tenacity/__init__.py:382\u001b[0m, in \u001b[0;36mRetrying.__call__\u001b[0;34m(self, fn, *args, **kwargs)\u001b[0m\n\u001b[1;32m 380\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(do, DoAttempt):\n\u001b[1;32m 381\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 382\u001b[0m result \u001b[39m=\u001b[39m fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 383\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mBaseException\u001b[39;00m: \u001b[39m# noqa: B902\u001b[39;00m\n\u001b[1;32m 384\u001b[0m retry_state\u001b[39m.\u001b[39mset_exception(sys\u001b[39m.\u001b[39mexc_info()) \u001b[39m# type: ignore[arg-type]\u001b[39;00m\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/langchain/llms/openai.py:104\u001b[0m, in \u001b[0;36mcompletion_with_retry.<locals>._completion_with_retry\u001b[0;34m(**kwargs)\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[39m@retry_decorator\u001b[39m\n\u001b[1;32m 103\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_completion_with_retry\u001b[39m(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs: Any) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Any:\n\u001b[0;32m--> 104\u001b[0m \u001b[39mreturn\u001b[39;00m llm\u001b[39m.\u001b[39;49mclient\u001b[39m.\u001b[39;49mcreate(\u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/openai/api_resources/completion.py:25\u001b[0m, in \u001b[0;36mCompletion.create\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m 24\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 25\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49mcreate(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 26\u001b[0m \u001b[39mexcept\u001b[39;00m TryAgain \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 27\u001b[0m \u001b[39mif\u001b[39;00m timeout \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m time\u001b[39m.\u001b[39mtime() \u001b[39m>\u001b[39m start \u001b[39m+\u001b[39m timeout:\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/openai/api_resources/abstract/engine_api_resource.py:153\u001b[0m, in \u001b[0;36mEngineAPIResource.create\u001b[0;34m(cls, api_key, api_base, api_type, request_id, api_version, organization, **params)\u001b[0m\n\u001b[1;32m 127\u001b[0m \u001b[39m@classmethod\u001b[39m\n\u001b[1;32m 128\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mcreate\u001b[39m(\n\u001b[1;32m 129\u001b[0m \u001b[39mcls\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams,\n\u001b[1;32m 137\u001b[0m ):\n\u001b[1;32m 138\u001b[0m (\n\u001b[1;32m 139\u001b[0m deployment_id,\n\u001b[1;32m 140\u001b[0m engine,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 150\u001b[0m api_key, api_base, api_type, api_version, organization, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams\n\u001b[1;32m 151\u001b[0m )\n\u001b[0;32m--> 153\u001b[0m response, _, api_key \u001b[39m=\u001b[39m requestor\u001b[39m.\u001b[39;49mrequest(\n\u001b[1;32m 154\u001b[0m \u001b[39m\"\u001b[39;49m\u001b[39mpost\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[1;32m 155\u001b[0m url,\n\u001b[1;32m 156\u001b[0m params\u001b[39m=\u001b[39;49mparams,\n\u001b[1;32m 157\u001b[0m headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 158\u001b[0m stream\u001b[39m=\u001b[39;49mstream,\n\u001b[1;32m 159\u001b[0m request_id\u001b[39m=\u001b[39;49mrequest_id,\n\u001b[1;32m 160\u001b[0m request_timeout\u001b[39m=\u001b[39;49mrequest_timeout,\n\u001b[1;32m 161\u001b[0m )\n\u001b[1;32m 163\u001b[0m \u001b[39mif\u001b[39;00m stream:\n\u001b[1;32m 164\u001b[0m \u001b[39m# must be an iterator\u001b[39;00m\n\u001b[1;32m 165\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(response, OpenAIResponse)\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/openai/api_requestor.py:216\u001b[0m, in \u001b[0;36mAPIRequestor.request\u001b[0;34m(self, method, url, params, headers, files, stream, request_id, request_timeout)\u001b[0m\n\u001b[1;32m 205\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mrequest\u001b[39m(\n\u001b[1;32m 206\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 207\u001b[0m method,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 214\u001b[0m request_timeout: Optional[Union[\u001b[39mfloat\u001b[39m, Tuple[\u001b[39mfloat\u001b[39m, \u001b[39mfloat\u001b[39m]]] \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m,\n\u001b[1;32m 215\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Tuple[Union[OpenAIResponse, Iterator[OpenAIResponse]], \u001b[39mbool\u001b[39m, \u001b[39mstr\u001b[39m]:\n\u001b[0;32m--> 216\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mrequest_raw(\n\u001b[1;32m 217\u001b[0m method\u001b[39m.\u001b[39;49mlower(),\n\u001b[1;32m 218\u001b[0m url,\n\u001b[1;32m 219\u001b[0m params\u001b[39m=\u001b[39;49mparams,\n\u001b[1;32m 220\u001b[0m supplied_headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 221\u001b[0m files\u001b[39m=\u001b[39;49mfiles,\n\u001b[1;32m 222\u001b[0m stream\u001b[39m=\u001b[39;49mstream,\n\u001b[1;32m 223\u001b[0m request_id\u001b[39m=\u001b[39;49mrequest_id,\n\u001b[1;32m 224\u001b[0m request_timeout\u001b[39m=\u001b[39;49mrequest_timeout,\n\u001b[1;32m 225\u001b[0m )\n\u001b[1;32m 226\u001b[0m resp, got_stream \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_interpret_response(result, stream)\n\u001b[1;32m 227\u001b[0m \u001b[39mreturn\u001b[39;00m resp, got_stream, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mapi_key\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/openai/api_requestor.py:516\u001b[0m, in \u001b[0;36mAPIRequestor.request_raw\u001b[0;34m(self, method, url, params, supplied_headers, files, stream, request_id, request_timeout)\u001b[0m\n\u001b[1;32m 514\u001b[0m _thread_context\u001b[39m.\u001b[39msession \u001b[39m=\u001b[39m _make_session()\n\u001b[1;32m 515\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 516\u001b[0m result \u001b[39m=\u001b[39m _thread_context\u001b[39m.\u001b[39;49msession\u001b[39m.\u001b[39;49mrequest(\n\u001b[1;32m 517\u001b[0m method,\n\u001b[1;32m 518\u001b[0m abs_url,\n\u001b[1;32m 519\u001b[0m headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 520\u001b[0m data\u001b[39m=\u001b[39;49mdata,\n\u001b[1;32m 521\u001b[0m files\u001b[39m=\u001b[39;49mfiles,\n\u001b[1;32m 522\u001b[0m stream\u001b[39m=\u001b[39;49mstream,\n\u001b[1;32m 523\u001b[0m timeout\u001b[39m=\u001b[39;49mrequest_timeout \u001b[39mif\u001b[39;49;00m request_timeout \u001b[39melse\u001b[39;49;00m TIMEOUT_SECS,\n\u001b[1;32m 524\u001b[0m proxies\u001b[39m=\u001b[39;49m_thread_context\u001b[39m.\u001b[39;49msession\u001b[39m.\u001b[39;49mproxies,\n\u001b[1;32m 525\u001b[0m )\n\u001b[1;32m 526\u001b[0m \u001b[39mexcept\u001b[39;00m requests\u001b[39m.\u001b[39mexceptions\u001b[39m.\u001b[39mTimeout \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 527\u001b[0m \u001b[39mraise\u001b[39;00m error\u001b[39m.\u001b[39mTimeout(\u001b[39m\"\u001b[39m\u001b[39mRequest timed out: \u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(e)) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/requests/sessions.py:587\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 582\u001b[0m send_kwargs \u001b[39m=\u001b[39m {\n\u001b[1;32m 583\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mtimeout\u001b[39m\u001b[39m\"\u001b[39m: timeout,\n\u001b[1;32m 584\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mallow_redirects\u001b[39m\u001b[39m\"\u001b[39m: allow_redirects,\n\u001b[1;32m 585\u001b[0m }\n\u001b[1;32m 586\u001b[0m send_kwargs\u001b[39m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 587\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msend(prep, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49msend_kwargs)\n\u001b[1;32m 589\u001b[0m \u001b[39mreturn\u001b[39;00m resp\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/requests/sessions.py:701\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 698\u001b[0m start \u001b[39m=\u001b[39m preferred_clock()\n\u001b[1;32m 700\u001b[0m \u001b[39m# Send the request\u001b[39;00m\n\u001b[0;32m--> 701\u001b[0m r \u001b[39m=\u001b[39m adapter\u001b[39m.\u001b[39;49msend(request, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 703\u001b[0m \u001b[39m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[1;32m 704\u001b[0m elapsed \u001b[39m=\u001b[39m preferred_clock() \u001b[39m-\u001b[39m start\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/requests/adapters.py:486\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 483\u001b[0m timeout \u001b[39m=\u001b[39m TimeoutSauce(connect\u001b[39m=\u001b[39mtimeout, read\u001b[39m=\u001b[39mtimeout)\n\u001b[1;32m 485\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 486\u001b[0m resp \u001b[39m=\u001b[39m conn\u001b[39m.\u001b[39;49murlopen(\n\u001b[1;32m 487\u001b[0m method\u001b[39m=\u001b[39;49mrequest\u001b[39m.\u001b[39;49mmethod,\n\u001b[1;32m 488\u001b[0m url\u001b[39m=\u001b[39;49murl,\n\u001b[1;32m 489\u001b[0m body\u001b[39m=\u001b[39;49mrequest\u001b[39m.\u001b[39;49mbody,\n\u001b[1;32m 490\u001b[0m headers\u001b[39m=\u001b[39;49mrequest\u001b[39m.\u001b[39;49mheaders,\n\u001b[1;32m 491\u001b[0m redirect\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 492\u001b[0m assert_same_host\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 493\u001b[0m preload_content\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 494\u001b[0m decode_content\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 495\u001b[0m retries\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmax_retries,\n\u001b[1;32m 496\u001b[0m timeout\u001b[39m=\u001b[39;49mtimeout,\n\u001b[1;32m 497\u001b[0m chunked\u001b[39m=\u001b[39;49mchunked,\n\u001b[1;32m 498\u001b[0m )\n\u001b[1;32m 500\u001b[0m \u001b[39mexcept\u001b[39;00m (ProtocolError, \u001b[39mOSError\u001b[39;00m) \u001b[39mas\u001b[39;00m err:\n\u001b[1;32m 501\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mConnectionError\u001b[39;00m(err, request\u001b[39m=\u001b[39mrequest)\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/urllib3/connectionpool.py:790\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m 787\u001b[0m response_conn \u001b[39m=\u001b[39m conn \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m release_conn \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 789\u001b[0m \u001b[39m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[0;32m--> 790\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_make_request(\n\u001b[1;32m 791\u001b[0m conn,\n\u001b[1;32m 792\u001b[0m method,\n\u001b[1;32m 793\u001b[0m url,\n\u001b[1;32m 794\u001b[0m timeout\u001b[39m=\u001b[39;49mtimeout_obj,\n\u001b[1;32m 795\u001b[0m body\u001b[39m=\u001b[39;49mbody,\n\u001b[1;32m 796\u001b[0m headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 797\u001b[0m chunked\u001b[39m=\u001b[39;49mchunked,\n\u001b[1;32m 798\u001b[0m retries\u001b[39m=\u001b[39;49mretries,\n\u001b[1;32m 799\u001b[0m response_conn\u001b[39m=\u001b[39;49mresponse_conn,\n\u001b[1;32m 800\u001b[0m preload_content\u001b[39m=\u001b[39;49mpreload_content,\n\u001b[1;32m 801\u001b[0m decode_content\u001b[39m=\u001b[39;49mdecode_content,\n\u001b[1;32m 802\u001b[0m \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mresponse_kw,\n\u001b[1;32m 803\u001b[0m )\n\u001b[1;32m 805\u001b[0m \u001b[39m# Everything went great!\u001b[39;00m\n\u001b[1;32m 806\u001b[0m clean_exit \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/urllib3/connectionpool.py:536\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[1;32m 534\u001b[0m \u001b[39m# Receive the response from the server\u001b[39;00m\n\u001b[1;32m 535\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 536\u001b[0m response \u001b[39m=\u001b[39m conn\u001b[39m.\u001b[39;49mgetresponse()\n\u001b[1;32m 537\u001b[0m \u001b[39mexcept\u001b[39;00m (BaseSSLError, \u001b[39mOSError\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 538\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_raise_timeout(err\u001b[39m=\u001b[39me, url\u001b[39m=\u001b[39murl, timeout_value\u001b[39m=\u001b[39mread_timeout)\n",
"File \u001b[0;32m~/Desktop/Projects/reflexion/.venv/lib/python3.9/site-packages/urllib3/connection.py:454\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 451\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mresponse\u001b[39;00m \u001b[39mimport\u001b[39;00m HTTPResponse\n\u001b[1;32m 453\u001b[0m \u001b[39m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[0;32m--> 454\u001b[0m httplib_response \u001b[39m=\u001b[39m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49mgetresponse()\n\u001b[1;32m 456\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 457\u001b[0m assert_header_parsing(httplib_response\u001b[39m.\u001b[39mmsg)\n",
"File \u001b[0;32m/usr/local/Cellar/python@3.9/3.9.16/Frameworks/Python.framework/Versions/3.9/lib/python3.9/http/client.py:1377\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1375\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1376\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 1377\u001b[0m response\u001b[39m.\u001b[39;49mbegin()\n\u001b[1;32m 1378\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mConnectionError\u001b[39;00m:\n\u001b[1;32m 1379\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclose()\n",
"File \u001b[0;32m/usr/local/Cellar/python@3.9/3.9.16/Frameworks/Python.framework/Versions/3.9/lib/python3.9/http/client.py:320\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 318\u001b[0m \u001b[39m# read until we get a non-100 response\u001b[39;00m\n\u001b[1;32m 319\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[0;32m--> 320\u001b[0m version, status, reason \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_read_status()\n\u001b[1;32m 321\u001b[0m \u001b[39mif\u001b[39;00m status \u001b[39m!=\u001b[39m CONTINUE:\n\u001b[1;32m 322\u001b[0m \u001b[39mbreak\u001b[39;00m\n",
"File \u001b[0;32m/usr/local/Cellar/python@3.9/3.9.16/Frameworks/Python.framework/Versions/3.9/lib/python3.9/http/client.py:281\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 280\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_read_status\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[0;32m--> 281\u001b[0m line \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mfp\u001b[39m.\u001b[39;49mreadline(_MAXLINE \u001b[39m+\u001b[39;49m \u001b[39m1\u001b[39;49m), \u001b[39m\"\u001b[39m\u001b[39miso-8859-1\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 282\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(line) \u001b[39m>\u001b[39m _MAXLINE:\n\u001b[1;32m 283\u001b[0m \u001b[39mraise\u001b[39;00m LineTooLong(\u001b[39m\"\u001b[39m\u001b[39mstatus line\u001b[39m\u001b[39m\"\u001b[39m)\n",
"File \u001b[0;32m/usr/local/Cellar/python@3.9/3.9.16/Frameworks/Python.framework/Versions/3.9/lib/python3.9/socket.py:704\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 702\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m 703\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 704\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv_into(b)\n\u001b[1;32m 705\u001b[0m \u001b[39mexcept\u001b[39;00m timeout:\n\u001b[1;32m 706\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_timeout_occurred \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n",
"File \u001b[0;32m/usr/local/Cellar/python@3.9/3.9.16/Frameworks/Python.framework/Versions/3.9/lib/python3.9/ssl.py:1242\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1238\u001b[0m \u001b[39mif\u001b[39;00m flags \u001b[39m!=\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[1;32m 1239\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 1240\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m\n\u001b[1;32m 1241\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m)\n\u001b[0;32m-> 1242\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mread(nbytes, buffer)\n\u001b[1;32m 1243\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1244\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39mrecv_into(buffer, nbytes, flags)\n",
"File \u001b[0;32m/usr/local/Cellar/python@3.9/3.9.16/Frameworks/Python.framework/Versions/3.9/lib/python3.9/ssl.py:1100\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 1098\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1099\u001b[0m \u001b[39mif\u001b[39;00m buffer \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m-> 1100\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sslobj\u001b[39m.\u001b[39;49mread(\u001b[39mlen\u001b[39;49m, buffer)\n\u001b[1;32m 1101\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1102\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sslobj\u001b[39m.\u001b[39mread(\u001b[39mlen\u001b[39m)\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"for i in range(n):\n",
" for agent in [a for a in agents if not a.is_correct()]:\n",
" if strategy != ReflexionStrategy.NONE:\n",
" agent.run(reflect_strategy = strategy)\n",
" else:\n",
" agent.run()\n",
" print(f'Answer: {agent.key}')\n",
" trial += 1\n",
" log += log_react_trial(agents, trial)\n",
" correct, incorrect, halted = summarize_react_trial(agents)\n",
" print(f'Finished Trial {trial}, Correct: {len(correct)}, Incorrect: {len(incorrect)}, Halted: {len(halted)}')"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Save the result log"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open(os.path.join(root, 'ReAct', strategy.value, f'{len(agents)}_questions_{trial}_trials.txt'), 'w') as f:\n",
" f.write(log)\n",
"save_agents(agents, os.path.join('ReAct', strategy.value, 'agents'))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "e23f799cbd2581634725fbf6ce3480ae26192d78438dfafc8efe944acd6490d5"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

@ -1,4 +1,3 @@
EdgeGPT==0.3.6
gym==0.26.2
joblib==1.2.0
langchain==0.0.162
@ -9,4 +8,5 @@ tenacity==8.2.2
tiktoken==0.4.0
transformers==4.28.1
pandas==1.5.3
scikit-learn
scikit-learn
wikipedia

@ -31,7 +31,37 @@ Trial summary: Correct: {len(correct)}, Incorrect: {len(incorrect)}
return log
def summarize_react_trial(agents):
correct = [a for a in agents if a.is_correct()]
halted = [a for a in agents if a.is_halted()]
incorrect = [a for a in agents if a.is_finished() and not a.is_correct()]
return correct, incorrect, halted
def log_react_trial(agents, trial_n):
correct, incorrect, halted = summarize_react_trial(agents)
log = f"""
########################################
BEGIN TRIAL {trial_n}
Trial summary: Correct: {len(correct)}, Incorrect: {len(incorrect)}, Halted: {len(halted)}
#######################################
"""
log += '------------- BEGIN CORRECT AGENTS -------------\n\n'
for agent in correct:
log += remove_fewshot(agent._build_agent_prompt()) + f'\nCorrect answer: {agent.key}\n\n'
log += '------------- BEGIN INCORRECT AGENTS -----------\n\n'
for agent in incorrect:
log += remove_fewshot(agent._build_agent_prompt()) + f'\nCorrect answer: {agent.key}\n\n'
log += '------------- BEGIN HALTED AGENTS -----------\n\n'
for agent in halted:
log += remove_fewshot(agent._build_agent_prompt()) + f'\nCorrect answer: {agent.key}\n\n'
return log
def save_agents(agents, dir: str):
os.makedirs(dir, exist_ok=True)
for i, agent in enumerate(agents):
joblib.dump(agent, f'{dir}/{i}.joblib')
joblib.dump(agent, os.path.join(dir, f'{i}.joblib'))
Loading…
Cancel
Save