Repository: samuelschmidgall/agentlaboratory.git Files analyzed: 30 Estimated tokens: 117.9k Directory structure: └── samuelschmidgall-agentlaboratory.git/ ├── README.md ├── agents.py ├── ai_lab_repo.py ├── app.py ├── common_imports.py ├── inference.py ├── LICENSE ├── mlesolver.py ├── papersolver.py ├── requirements.txt ├── tools.py ├── utils.py ├── experiment_configs/ │ ├── MATH_agentlab.yaml │ └── MATH_agentrxiv.yaml ├── media/ └── readme/ ├── README-arabic.md ├── README-bengali.md ├── README-chinese.md ├── README-farsi.md ├── README-filipino.md ├── README-french.md ├── README-hindi.md ├── README-italian.md ├── README-japanese.md ├── README-korean.md ├── README-portugues.md ├── README-russian.md ├── README-slovak.md ├── README-spanish.md ├── README-turkish.md └── README-vietnamese.md ================================================ FILE: README.md ================================================ # Agent Laboratory: Using LLM Agents as Research Assistants
【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】
【📝 Paper | 🌐 Website | 🌐 AgentRxiv Website | 💻 Software | 📰 Citation】
### News * [March/24/2025] 🎉 🎊 🎉 Now introducing **AgentRxiv**, a framework where autonomous research agents can upload, retrieve, and build on each other’s research. This allows agents to make cumulative progress on their research. ## 📖 Overview - **Agent Laboratory** is an end-to-end autonomous research workflow meant to assist **you** as the human researcher toward **implementing your research ideas**. Agent Laboratory consists of specialized agents driven by large language models to support you through the entire research workflow—from conducting literature reviews and formulating plans to executing experiments and writing comprehensive reports. - This system is not designed to replace your creativity but to complement it, enabling you to focus on ideation and critical thinking while automating repetitive and time-intensive tasks like coding and documentation. By accommodating varying levels of computational resources and human involvement, Agent Laboratory aims to accelerate scientific discovery and optimize your research productivity.
\n```, where REPLACE is the word REPLACE and will be the new code that is replacing the entire set of old code. This tool is useful if you want to make very significant changes, such as entirely changing the model, or the learning process. Before changing the existing code to be your new code, your new code will be tested and if it returns an error it will not replace the existing code. Try limiting the use of rewriting and aim for editing the code more."
)
def execute_command(self, *args) -> str:
# args[0] -> new code
args = args[0]
return args[0]
def matches_command(self, cmd_str) -> bool:
if "```REPLACE" in cmd_str: return True
return False
def parse_command(self, *args) -> tuple:
new_code = extract_prompt(args[0], "REPLACE")
code_exec = f"{args[1]}\n{new_code}"
code_ret = execute_code(code_exec)
if "[CODE EXECUTION ERROR]" in code_ret: return False, (None, code_ret,)
return True, (new_code.split("\n"), code_ret)
class Edit(Command):
def __init__(self):
super().__init__()
self.cmd_type = "CODE-edit"
def docstring(self) -> str:
return (
"============= CODE EDITING TOOL =============\n"
"You also have access to a code editing tool. \n"
"This tool allows you to replace lines indexed n through m (n:m) of the current code with as many lines of new code as you want to add. This removal is inclusive meaning that line n and m and everything between n and m is removed. This will be the primary way that you interact with code. \n"
"You can edit code using the following command: ```EDIT N M\n\n``` EDIT is the word EDIT, N is the first line index you want to replace and M the the last line index you want to replace (everything inbetween will also be removed), and will be the new code that is replacing the old code. Before changing the existing code to be your new code, your new code will be tested and if it returns an error it will not replace the existing code. Your changes should significantly change the functionality of the code."
)
def execute_command(self, *args) -> str:
# args[0] -> N (int)
# args[1] -> M (int)
# args[2] -> old code
# args[3] -> new lines to replace
# args[4] -> new lines to replace
try:
args = args[0]
current_code = args[2]
lines_to_add = list(reversed(args[3]))
lines_to_replace = list(reversed(range(args[0], args[1]+1)))
for _ln in lines_to_replace:
current_code.pop(_ln)
for _line in lines_to_add:
current_code.insert(args[0], _line)
new_code = "\n".join(current_code)
code_exec = f"{args[4]}\n{new_code}"
code_ret = execute_code(code_exec)
if "CODE EXECUTION ERROR" in code_ret: return (False, None, code_ret)
return (True, current_code, code_ret)
except Exception as e:
return (False, None, str(e))
def matches_command(self, cmd_str) -> bool:
if "```EDIT" in cmd_str: return True
return False
def parse_command(self, *args) -> tuple:
cmd_str, codelines, datasetcode = args[0], args[1], args[2]
success = True
try:
text = extract_prompt(cmd_str, "EDIT").split("\n")
if len(text) == 0: return False, None
lines_to_edit = text[0].split(" ")
if len(lines_to_edit) != 2: return False, None
lines_to_edit = [int(_) for _ in lines_to_edit]
if len(text[1:]) == 0: return False, None
return success, (lines_to_edit[0], lines_to_edit[1], codelines, text[1:], datasetcode)
except Exception as e:
return False, (None, None, None, None, None)
def get_score(outlined_plan, code, code_return, REWARD_MODEL_LLM, attempts=3, openai_api_key=None):
e = str()
for _attempt in range(attempts):
try:
# todo: have a reward function here
sys = (
f"You are a professor agent who is serving as an expert reward model that can read a research plan, research code, and code output and are able to determine how well a model followed the plan, built the code, and got the proper output scored from 0 to 1 as a float.\n\n"
f"You must structure your score exactly in the following way: ```SCORE\n\n``` where SCORE is just the word score, is a floating point number between 0 and 1 representing how well the model followed the plan, built the code, and got the proper output."
)
scoring = query_model(
model_str=f"{REWARD_MODEL_LLM}",
system_prompt=sys,
openai_api_key=openai_api_key,
prompt=(
f"Outlined in the following text is the research plan that the machine learning engineer was tasked with building: {outlined_plan}\n\n"
f"The following text is the research code that the model produced: \n{code}\n\n"
f"The following is the output from the model: {code_return}\n\n"), temp=0.6)
performance = extract_prompt(text=scoring, word="SCORE")
performance = float(performance)
return performance, f"The performance of your submission is: {performance}", True
except Exception as e:
return None, str(e), False
return 0, e
def code_repair(code, error, ctype, REPAIR_LLM, openai_api_key=None):
if ctype == "replace":
repair_sys = (
"You are an automated code repair tool.\n"
"Your goal is to take in code and an error and repair the code to make sure the same error does not repeat itself, and also to remove any other potential errors from the code without affecting the code output.\n"
"Your output should match the original code as closely as possible.\n"
"You must wrap the code in the following ```python\n\n```\n"
"Do not forget the opening ```python and the closing ```."
)
model_resp = query_model(
openai_api_key=openai_api_key,
model_str=f"{REPAIR_LLM}",
system_prompt=repair_sys,
prompt=f"Provided here is the error: {error}\n\nProvided below is the code:\n\n{code}", temp=0.8)
return extract_prompt(model_resp, "python")
elif ctype == "edit":
repair_sys = (
"You are an automated code repair tool.\n"
"Your goal is to take in code and an error and repair the code to make sure the same error does not repeat itself, and also to remove any other potential errors from the code without affecting the code output.\n"
"Your output should match the original code as closely as possible.\n"
"============= CODE EDITING TOOL =============\n"
"You have access to a code editing tool. \n"
"This tool allows you to replace lines indexed n through m (n:m) of the current code with as many lines of new code as you want to add. This removal is inclusive meaning that line n and m and everything between n and m is removed. This will be the primary way that you interact with code. \n"
"You can edit code using the following command: ```EDIT N M\n\n``` EDIT is the word EDIT, N is the first line index you want to replace and M the the last line index you want to replace (everything inbetween will also be removed), and will be the new code that is replacing the old code. Before changing the existing code to be your new code, your new code will be tested and if it returns an error it will not replace the existing code.\n"
"Please use the code editing tool to fix this code."
"Do not forget the opening ```EDIT N M and the closing ```."
"Your output should look like the following\n\n```EDIT N M\n\n```"
)
model_resp = query_model(
openai_api_key=openai_api_key,
model_str=f"{REPAIR_LLM}",
system_prompt=repair_sys,
prompt=f"Provided here is the error: {error}\n\nProvided below is the code:\n\n{code}", temp=0.2)
return model_resp
class MLESolver:
def __init__(self, dataset_code, openai_api_key=None, notes=None, max_steps=10, insights=None, plan=None, llm_str=None):
self.supress_print = False
if notes is None: self.notes = []
else: self.notes = notes
self.dataset_code = dataset_code
if plan is None: self.plan = ""
else: self.plan = plan
self.llm_str = llm_str
self.verbose = False
self.max_codes = 1
self.st_hist_len = 2
self.min_gen_trials = 1
self.code_lines = str()
self.st_history = list()
self.insights = insights
self.code_reflect = str()
self.max_steps = max_steps
self.prev_code_ret = str()
self.should_execute_code = True
self.openai_api_key = openai_api_key
def initial_solve(self):
"""
Initialize the solver and get an initial set of code and a return
@return: None
"""
# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# @@ Initial CodeGen Commands @@
# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
self.best_score = None
self.commands = [Replace()]
self.model = f"{self.llm_str}"
init_code, init_return, self.best_score = self.gen_initial_code()
self.best_codes = [(copy(init_code), self.best_score, init_return) for _ in range(1)]
self.code_lines = init_code
self.model = f"{self.llm_str}"
self.commands = [Edit(), Replace()]
self.prev_working_code = copy(self.code_lines)
@staticmethod
def clean_text(text):
text = text.replace("```\n", "```")
text = text.replace("```python\n", "```REPLACE\n")
return text
def gen_initial_code(self):
num_attempts = 0
error_hist = list()
while True:
if num_attempts == 0:
err = str()
err_hist = str()
else:
err = f"The following was the previous command generated: {model_resp}. This was the error return {cmd_str}. You should make sure not to repeat this error and to solve the presented problem."
error_hist.append(err)
if len(error_hist) == 5: _ = error_hist.pop(0)
err = "\n".join(error_hist)
err_hist = "The following is a history of your previous errors\n" + err + "\nDO NOT REPEAT THESE."
model_resp = query_model(
openai_api_key=self.openai_api_key,
model_str=self.model,
system_prompt=self.system_prompt(),
prompt=f"{err_hist}\nYou should now use ```REPLACE to create initial code to solve the challenge. Now please enter the ```REPLACE command below:\n ", temp=1.0)
model_resp = self.clean_text(model_resp)
cmd_str, code_lines, prev_code_ret, should_execute_code, score = self.process_command(model_resp)
if not self.supress_print: print(f"@@@ INIT ATTEMPT: Command Exec // Attempt {num_attempts}: ", str(cmd_str).replace("\n", " | "))
if not self.supress_print: print(f"$$$ Score: {score}")
if score is not None: break
num_attempts += 1
return code_lines, prev_code_ret, score
def solve(self):
num_attempts = 0
best_pkg = None
top_score = None
self.prev_code_ret = None
self.should_execute_code = False
while True:
if len(self.commands) == 2: cmd_app_str = "You must output either the ```EDIT or ```REPLACE command immediately. "
else: cmd_app_str = ""
model_resp = query_model(
openai_api_key=self.openai_api_key,
model_str=self.model,
system_prompt=self.system_prompt(),
prompt=f"The following is your history:{self.history_str()}\n\n{cmd_app_str}Now please enter a command: ", temp=1.0)
model_resp = self.clean_text(model_resp)
self.code_lines = copy(random.choice(self.best_codes)[0])
cmd_str, code_lines, prev_code_ret, should_execute_code, score = self.process_command(model_resp)
self.st_history.append([model_resp, prev_code_ret, code_lines, cmd_str])
if len(self.st_history) > self.st_hist_len: self.st_history.pop(0)
if score is not None:
if top_score is None:
best_pkg = copy(code_lines), copy(prev_code_ret), copy(should_execute_code), copy(model_resp), copy(cmd_str)
top_score = score
elif score > top_score:
best_pkg = copy(code_lines), copy(prev_code_ret), copy(should_execute_code), copy(model_resp), copy(cmd_str)
top_score = score
if not self.supress_print: print(f"@@@ Command Exec // Attempt {num_attempts}: ", str(cmd_str).replace("\n", " | "))
if not self.supress_print: print(f"$$$ Score: {score}")
if num_attempts >= self.min_gen_trials and top_score is not None: break
num_attempts += 1
self.code_lines, self.prev_code_ret, self.should_execute_code, model_resp, cmd_str = best_pkg
if not self.supress_print: print(prev_code_ret)
# add top scoring code that was successful to the best codes
if top_score > self.best_codes[-1][1]:
# replace the lowest scoring one
if len(self.best_codes) >= self.max_codes:
self.best_codes.pop(-1)
self.code_reflect = self.reflect_code()
self.best_codes.append((copy(self.code_lines), copy(top_score), self.prev_code_ret))
# sort by score, to make sure lowest are removed in future
self.best_codes.sort(key=lambda x: x[1], reverse=True)
return model_resp, cmd_str
def reflect_code(self):
"""
Provide a reflection on produced behavior for next execution
@return: (str) language model-produced reflection
"""
code_strs = ("$"*40 + "\n\n").join([self.generate_code_lines(_code[0]) + f"\nCode Return {_code[1]}" for _code in self.best_codes])
code_strs = f"Please reflect on the following sets of code: {code_strs} and come up with generalizable insights that will help you improve your performance on this benchmark."
syst = self.system_prompt(commands=False) + code_strs
return query_model(prompt="Please reflect on ideas for how to improve your current code. Examine the provided code and think very specifically (with precise ideas) on how to improve performance, which methods to use, how to improve generalization on the test set with line-by-line examples below:\n", system_prompt=syst, model_str=f"{self.llm_str}", openai_api_key=self.openai_api_key)
def process_command(self, model_resp):
"""
Take command from language model and execute if valid
@param model_resp: (str) language model output
@return: (tuple) tuple containing the following items
- cmd_str: (str) code execution return and success flag
- code_lines: (list) list of code lines as strings
- prev_code_ret: (str) output from running code
- should_execute_code: (bool) did the code change, if so we need to re-execute it
- score: (float) score of model
"""
prev_code_ret = self.prev_code_ret
should_execute_code = self.should_execute_code
code_lines = copy(self.code_lines)
remove_figures()
for cmd in self.commands:
if cmd.matches_command(model_resp):
# attempt to execute the code edit command
if cmd.cmd_type == "CODE-edit":
score = None
failed = True
code_err = str()
for _tries in range(GLOBAL_REPAIR_ATTEMPTS):
success, args = cmd.parse_command(model_resp, copy(self.code_lines), self.dataset_code)
if success:
cmd_return = cmd.execute_command(args)
code_err = f"Return from executing code: {cmd_return[2]}"
if cmd_return[0]: # if success
code_lines = copy(cmd_return[1])
score, cmd_str, is_valid = get_score(self.plan, "\n".join(code_lines), cmd_return[2], openai_api_key=self.openai_api_key, REWARD_MODEL_LLM=self.llm_str)
if is_valid:
failed = False
break
code_err += f"\nReturn from executing code on real test set {cmd_str}"
repaired_code = code_repair(model_resp, code_err, REPAIR_LLM=self.llm_str, ctype="edit", openai_api_key=self.openai_api_key)
model_resp = repaired_code
if not self.supress_print: print(f" * Attempting repair // try {_tries}*")
if failed:
cmd_str = f"Code editing FAILED due to the following error: {code_err}. Code was reverted back to original state before edits."
if not self.supress_print: print("$$$$ CODE EDIT (failed)")
else:
cmd_str = "Code was successfully edited."
prev_code_ret = copy(cmd_return[2])
if not self.supress_print: print("$$$$ CODE EDIT (success)")
should_execute_code = True
return cmd_str, code_lines, prev_code_ret, should_execute_code, score
# attempt to execute the code replace command
elif cmd.cmd_type == "CODE-replace": # DONE
score = None
failed = True
code_err = str()
for _tries in range(GLOBAL_REPAIR_ATTEMPTS):
success, args = cmd.parse_command(model_resp, self.dataset_code)
code_err = f"Return from executing code: {args[1]}"
if success:
code_lines = copy(args[0])
score, cmd_str, is_valid = get_score(self.plan, "\n".join(code_lines), args[1], openai_api_key=self.openai_api_key, REWARD_MODEL_LLM=self.llm_str)
if is_valid:
failed = False
break
code_err += f"\nReturn from executing code on real test set {cmd_str}"
repaired_code = code_repair(extract_prompt(model_resp, "REPLACE", ), code_err, ctype="replace", openai_api_key=self.openai_api_key, REPAIR_LLM=self.llm_str)
repaired_code = f"```REPLACE\n{repaired_code}\n```"
model_resp = repaired_code
if not self.supress_print: print(f" * Attempting repair // try {_tries}*")
if failed:
cmd_str = f"Code replacement FAILED due to the following error: {code_err}. Code was reverted back to original state before edits."
if not self.supress_print: print("$$$$ CODE REPLACE (failed)")
else:
cmd_str = "Code was successfully replaced."
code_lines = copy(args[0])
prev_code_ret = copy(args[1])
if not self.supress_print: print("$$$$ CODE REPLACE (success)")
should_execute_code = True
return cmd_str, code_lines, prev_code_ret, should_execute_code, score
if not self.supress_print: print("$$$$ INVALID COMMAND (failed)")
return "Command not supported, choose from existing commands", None, None, None, None
def history_str(self):
"""
Well-formatted history string
@return: (str) history string
"""
hist_str = ""
for _hist in range(len(self.st_history)):
hist_str += f"-------- History ({len(self.st_history)-_hist} steps ago) -----\n"
hist_str += f"Because of the following response: {self.st_history[_hist][0]}\n" if len(self.st_history[_hist][0]) > 0 else ""
hist_str += f"and the following COMMAND response output: {self.st_history[_hist][3]}\n"
hist_str += f"With the following code used: {'#'*20}\n{self.st_history[_hist][2]}\n{'#'*20}\n\n"
hist_str += f"The environment feedback and reflection was as follows: {self.st_history[_hist][1]}\n"
hist_str += f"-------- End of history ({len(self.st_history)-_hist} steps ago) -------\n"
return hist_str
def system_prompt(self, commands=True):
"""
Produce a system prompt for the mle-solver to solve ml problems
@param commands: (bool) whether to use command prompt
@return: (str) system prompt
"""
return (
# ROLE DESCRIPTION
f"{self.role_description()}.\n"
# TASK INSTRUCTIONS
f"The following are your task instructions: {self.phase_prompt()}\n"
# LIT REVIEW INSIGHTS
f"Provided below are some insights from a literature review summary:\n{self.insights}\n"
# CODE INSIGHTS
f"{self.code_reflect}"
# NOTES
f"The following are notes, instructions, and general tips for you: {self.notes}"
# PLAN DESCRIPTION
f"You are given a machine learning research task described, where the plan is described as follows: {self.plan}\n"
# DATASET DESCRIPTION
f"{self.generate_dataset_descr_prompt()}"
# Create Figures
f"You should also try generating at least two figures to showcase the results, titled Figure_1.png and Figure_2.png\n"
f"Your method MUST not get 0% accuracy. If it does, you have done something wrong and must correct this. Make sure to check your accuracy calculation is correct.\n"
# transition
f"Your goal is to solve the research plan as well as possible. You will receive a score after you write the code and should aim to maximize the score by following the plan instructions and writing high quality code.\n"
f"Before each experiment please include a print statement explaining exactly what the results are meant to show in great detail before printing the results out.\n"
# COMMAND SET
f"The following are commands you have access to: {self.command_descriptions()}\n. You should try to have a diversity of command responses if appropriate. Do not repeat the same commend too many times. Please consider looking through your history and not repeating commands too many times." if commands else ""
)
def generate_code_lines(self, code):
"""
Generate well-formatted code lines with line numbers
@param code: (list) list of code line strings
@return: (str) code lines formatted with line numbers
"""
codestr = str()
for _index in range(len(code)):
codestr += f"{_index} |{code[_index]}\n"
return codestr
def feedback(self, code_return):
"""
Provide execution feedback after command is run
@param code_return: (str) return from code execution
@return: (str) feedback string
"""
if code_return is not None:
code_str = self.generate_code_lines(self.code_lines)
if "[CODE EXECUTION ERROR]" in code_return:
if not self.supress_print: print(f"@@@@ ERROR") # , {code_return.replace('\n', '')}")
reflect_prompt = f"This is your code: {code_str}\n\nYour code returned the following error {code_return}. Please provide a detailed reflection on why this error was returned, which lines in the code caused this error, and exactly (line by line) how you hope to fix this in the next update. This step is mostly meant to reflect in order to help your future self fix the error better. Do not provide entirely new code but provide suggestions on how to fix the bug using LINE EDITS."
elif os.path.exists("submission.csv"):
self.prev_working_code = copy(self.code_lines)
grade_return = get_score(self.plan, "\n".join(self.prev_working_code), code_return, openai_api_key=self.openai_api_key)[0]
if not self.supress_print: print(f"@@@@ SUBMISSION: model score {grade_return}", REWARD_MODEL_LLM=self.llm_str)
f"Your code was properly submitted and you have just received a grade for your model.\nYour score was {grade_return}.\n\n"
reflect_prompt = f"This is your code: {code_str}\n\nYour code successfully returned a submission csv. Consider further improving your technique through advanced learning techniques, data augmentation, or hyperparamter tuning to increase the score. Please provide a detailed reflection on how to improve your performance, which lines in the code could be improved upon, and exactly (line by line) how you hope to improve this in the next update. This step is mostly meant to reflect in order to help your future self."
for file in os.listdir("."):
if file.endswith(".csv"):
os.system(f"rm {file}")
else:
if not self.supress_print: print("@@@@ No return")
reflect_prompt = f"This is your code: {code_str}\n\nYour code did not return an error, but also did not successfully submit a submission csv file. Please reflect on how you can improve your submission for the next cycle to submit a file and obtain a high score."
elif not self.should_execute_code:
code_return = "No changes were made to the code."
reflect_prompt = "Reflect on your future plans and next steps to improve the code."
reflection = self.reflection(reflect_prompt, code_str, code_return)
return f"Code return: {code_return}\n\nReflection: {reflection}"
def reflection(self, reflect_prompt, code_str, code_return):
"""
Reflect on your future plans and next steps to improve the code
@param reflect_prompt: (str) reflection prompt
@param code_str: (str) code string
@return: (str) reflection string
"""
refl = query_model(prompt=reflect_prompt, system_prompt=self.system_prompt(commands=False), model_str=f"{self.llm_str}", openai_api_key=self.openai_api_key)
return f"During the previous execution, the following code was run: \n\n{code_str}\n\nThis code returned the following: \n{code_return}\nThe following is your reflection from this feedback {refl}\n"
def generate_dataset_descr_prompt(self):
"""
Generate description prompt for kaggle dataset
@param data_loader: (DataLoader) data loader
@return: (str) description prompt
"""
return f"\n- The following dataset code will be added to the beginning of your code always, so this does not need to be rewritten: {self.dataset_code}"
def phase_prompt(self,):
"""
Describe system role and general tips for mle-solver
@return: (str) system role
"""
phase_str = (
"You are an ML engineer and you will be writing the code for a research project.\n"
"Your goal is to produce code that obtains final results for a set of research experiments. You should aim for simple code to collect all results, not complex code. You should integrate the provided literature review and the plan to make sure you are implementing everything outlined in the plan. The dataset code will be added to the beginning of your code always, so this does not need to be rewritten. Make sure you do not write functions, only loose code.\n"
"I would recommend writing smaller code so you do not run out of time but make sure to work on all points in the plan in the same code. You code should run every experiment outlined in the plan for a single code.\n",
"You cannot pip install new libraries, but many machine learning libraries already work. If you wish to use a language model in your code, please use the following:\nAnything you decide to print inside your code will be provided to you as input, and you will be able to see that part of the code. Using print statements is useful for figuring out what is wrong and understanding your code better."
)
return phase_str
def role_description(self):
"""
Provide role description
@return: (str) role description
"""
return "You are an expert machine learning engineer working at a top university to write code to solve machine learning research challenges using your machine learning expertise."
@staticmethod
def _common_code_errors():
"""
Some general tips to avoid common code errors, also TF has many errors so we avoid this and ask to use pytorch
@return: (str) common code errors
"""
return (
"Make sure to import everything that you are using.\n"
"Reflect on the code before writing it to make sure there are no bugs or compilation issues.\n"
"YOU MUST USE COMMANDS PROPERLY. Do not use the word COMMAND for the command that is incorrect. You must use an actual command (e.g. EDIT, REPLACE...) NOT THE WORD COMMAND. Do not make this mistake.\n"
"Under no circumstances should you use tensorflow or keras. Only use pytorch for scikitlearn for deep learning.\n"
)
def command_descriptions(self):
"""
Provide command descriptions
@return: (str) command descriptions
"""
cmd_strings = "\n".join([_cmd.docstring() for _cmd in self.commands])
return f"\nYou also have access to tools which can be interacted with using the following structure: ```COMMAND\n\n```, where COMMAND is whichever command you want to run (e.g. EDIT, REPLACE...), is information used for the command, such as code to run or a search query, and ``` are meant to encapsulate the command. ``` must be included as part of the command both at the beginning and at the end of the code. DO NOT FORGOT TO HAVE ``` AT THE TOP AND BOTTOM OF CODE. and this structure must be followed to execute a command correctly. YOU CAN ONLY EXECUTE A SINGLE COMMAND AT A TIME! Do not try to perform multiple commands EVER only one. {self._common_code_errors()}" + cmd_strings
def run_code(self):
"""
Actually execute the code that was generated
@return: (str) code return
"""
if self.prev_code_ret is not None:
return self.prev_code_ret
elif self.should_execute_code:
return execute_code("\n".join(self.code_lines))
return "Changes have not yet been made to the code."
================================================
FILE: papersolver.py
================================================
import random
import string
from utils import *
from tools import *
from copy import copy
from inference import *
from pathlib import Path
from copy import deepcopy
from common_imports import *
from agents import get_score
from abc import abstractmethod
from contextlib import contextmanager
import sys, os
class Command:
def __init__(self):
self.cmd_type = "OTHER"
@abstractmethod
def docstring(self) -> str:
pass
@abstractmethod
def execute_command(self, *args) -> str:
pass
@abstractmethod
def matches_command(self, cmd_str) -> bool:
pass
@abstractmethod
def parse_command(self, cmd_str) -> tuple:
pass
def execute_latex():
return True
"""
@@@@@@@@@@@@@@@@@@
@@ SEARCH TOOLS @@
@@@@@@@@@@@@@@@@@@
"""
class Arxiv(Command):
def __init__(self):
super().__init__()
self.arxiv_eng = ArxivSearch()
self.num_papers_per_search = 10
self.cmd_type = "SEARCH-arxiv"
def docstring(self) -> str:
return (
"============= ARXIV SEARCH TOOL ============="
"You also have access to machine learning paper from Arxiv. "
"To search for summaries of papers on arxiv you can use the following command: ```SUMMARY\n\n```\n where is a string that will be used as the search query to find papers with semantically similar content and SUMMARY is just the word SUMMARY.\n"
"To get the full paper text for an arXiv paper, use the following command: ```FULL_TEXT\n\n```\n where is the ID of the arXiv paper (which can be found by using the SUMMARY command), and FULL_TEXT is just the word FULL_TEXT. Make sure to read the full text using the FULL_TEXT command before adding it to your list of relevant papers.\n"
"When you read arxiv paper, make sure to take note of the techniques they are using to solve their problem as well as the hyperparameters and implementation details. These are very important for successfully solving machine learning problems."
)
def execute_command(self, *args) -> str:
# args[0] -> command
# args[1] -> query
if args[0] == "SUMMARY":
return self.arxiv_eng.find_papers_by_str(args[1], self.num_papers_per_search)
elif args[0] == "FULL_TEXT":
return self.arxiv_eng.retrieve_full_paper_text(args[1])
raise Exception("Invalid Arxiv Search")
def matches_command(self, cmd_str) -> bool:
if "```SUMMARY" in cmd_str: return True
elif "```FULL_TEXT" in cmd_str: return True
return False
def parse_command(self, *args) -> tuple:
sum_text = extract_prompt(args[0], "SUMMARY").split("\n")
full_text = extract_prompt(args[0], "FULL_TEXT").split("\n")
if len(sum_text) == 0 and len(full_text) == 0: return False, None
if len(sum_text) > 0: return True, ("SUMMARY", sum_text,)
if len(full_text) > 0: return True, ("FULL_TEXT", sum_text,)
"""
@@@@@@@@@@@@@@@@@@@
@@ WRITING TOOLS @@
@@@@@@@@@@@@@@@@@@@
"""
class PaperReplace(Command):
def __init__(self, save_loc):
super().__init__()
self.save_loc = save_loc
self.cmd_type = "PAPER-replace"
def docstring(self) -> str:
return (
"============= PAPER REPLACING TOOL =============\n"
"You also have access to a paper replacing tool. \n"
"This tool allows you to entirely re-write/replace all of the current latex and erase all existing latex.\n"
"You can use this tool via the following command: ```REPLACE\n\n```, where REPLACE is the word REPLACE and will be the new latex that is replacing the entire set of old latex. This tool is useful if you want to make very significant changes, such as entirely changing the model, or the learning process. Before changing the existing latex to be your new latex, your new latex will be tested and if it returns an error it will not replace the existing latex. Try limiting the use of rewriting and aim for editing the latex more."
)
def execute_command(self, *args) -> str:
# args[0] -> new latex
args = args[0]
return args[0]
def matches_command(self, cmd_str) -> bool:
if "```REPLACE" in cmd_str: return True
return False
def parse_command(self, *args) -> tuple:
new_latex = extract_prompt(args[0], "REPLACE")
latex_ret = compile_latex(new_latex, self.save_loc, compile=args[1])
if "[CODE EXECUTION ERROR]" in latex_ret: return False, (None, latex_ret,)
return True, (new_latex.split("\n"), latex_ret)
class PaperEdit(Command):
def __init__(self, save_loc):
super().__init__()
self.save_loc = save_loc
self.cmd_type = "PAPER-edit"
def docstring(self) -> str:
return (
"============= PAPER EDITING TOOL =============\n"
"You also have access to a paper editing tool. \n"
"This tool allows you to replace lines indexed n through m (n:m) of the current latex with as many lines of new latex as you want to add. This removal is inclusive meaning that line n and m and everything between n and m is removed. This will be the primary way that you interact with latex. \n"
"You can edit latex using the following command: ```EDIT N M\n\n``` EDIT is the word EDIT, N is the first line index you want to replace and M the last line index you want to replace (everything inbetween will also be removed), and will be the new latex that is replacing the old latex. Before changing the existing latex to be your new latex, your new latex will be tested and if it returns an error it will not replace the existing latex. Your changes should significantly change the latex. You should write new paragraphs and update old ones. Try using the edit command often. Make sure to generate lots of text. You should also avoid editing lines 0 0, and should edit the main text of the paragraphs, such as editing lines in the middle of the text body."
)
def execute_command(self, *args) -> str:
# args[0] -> N (int)
# args[1] -> M (int)
# args[2] -> old latex
# args[3] -> new lines to replace
try:
args = args[0]
current_latex = args[2]
lines_to_add = list(reversed(args[3]))
lines_to_replace = list(reversed(range(args[0], args[1]+1)))
for _ln in lines_to_replace:
current_latex.pop(_ln)
for _line in lines_to_add:
current_latex.insert(args[0], _line)
new_latex = "\n".join(current_latex)
latex_exec = f"{new_latex}"
latex_ret = compile_latex(latex_exec, self.save_loc, compile=args[4])
if "error" in latex_ret.lower(): return (False, None, latex_ret)
return (True, current_latex, latex_ret)
except Exception as e:
return (False, None, str(e))
def matches_command(self, cmd_str) -> bool:
if "```EDIT" in cmd_str: return True
return False
def parse_command(self, *args) -> tuple:
cmd_str, latexlines = args[0], args[1]
success = True
try:
text = extract_prompt(cmd_str, "EDIT").split("\n")
if len(text) == 0: return False, (None, None, None, None)
lines_to_edit = text[0].split(" ")
if len(lines_to_edit) != 2: return False, (None, None, None, None)
lines_to_edit = [int(_) for _ in lines_to_edit]
if len(text[1:]) == 0: return False, (None, None, None, None)
return success, (lines_to_edit[0], lines_to_edit[1], latexlines, text[1:])
except Exception as e:
return False, (None, None, None, None)
# Modified version of section tips from the AI scientist paper!
# Good work guys :) https://github.com/SakanaAI/AI-Scientist/blob/main/ai_scientist/perform_writeup.py
per_section_tips = {
"abstract": """
- TL;DR of the paper
- What are we trying to do and why is it relevant?
- Why is this hard?
- How do we solve it (i.e. our contribution!)
- How do we verify that we solved it (e.g. Experiments and results)
- This must only be a single paragraph, not more.
Please make sure the abstract reads smoothly and is well-motivated. This should be one continuous paragraph with no breaks between the lines.
""",
"introduction": """
- Longer version of the Abstract, i.e. of the entire paper
- What are we trying to do and why is it relevant?
- Why is this hard?
- How do we solve it (i.e. our contribution!)
- How do we verify that we solved it (e.g. Experiments and results)
- New trend: specifically list your contributions as bullet points
- Extra space? Future work!
""",
"related work": """
- Academic siblings of our work, i.e. alternative attempts in literature at trying to solve the same problem.
- Goal is to “Compare and contrast” - how does their approach differ in either assumptions or method? If their method is applicable to our Problem Setting I expect a comparison in the experimental section. If not, there needs to be a clear statement why a given method is not applicable.
- Note: Just describing what another paper is doing is not enough. We need to compare and contrast.
""",
"background": """
- Academic Ancestors of our work, i.e. all concepts and prior work that are required for understanding our method.
- Usually includes a subsection, Problem Setting, which formally introduces the problem setting and notation (Formalism) for our method. Highlights any specific assumptions that are made that are unusual.
- Make sure to use mathematical notation when necessary.
- Note: If our paper introduces a novel problem setting as part of its contributions, it's best to have a separate Section.
""",
"methods": """
- What we do. Why we do it. All described using the general Formalism introduced in the Problem Setting and building on top of the concepts / foundations introduced in Background.
- Make sure you clearly report precise mathematical equations in the methods section and the precise methodology.
""",
"experimental setup": """
- How do we test that our stuff works? Introduces a specific instantiation of the Problem Setting and specific implementation details of our Method for this Problem Setting.
- Do not imagine unknown hardware details.
- Includes a description of the dataset, evaluation metrics, important hyperparameters, and implementation details.
""",
"results": """
- Shows the results of running Method on our problem described in Experimental Setup.
- Includes statements on hyperparameters and other potential issues of fairness.
- Only includes results that have actually been run and saved in the logs. Do not hallucinate results that don't exist.
- Make sure you clearly and numerically report experimental results in the results section.
- If results exist: compares to baselines and includes statistics and confidence intervals.
- If results exist: includes ablation studies to show that specific parts of the method are relevant.
- Discusses limitations of the method.
- Make sure to include all the results from the experiments, and include all relevant figures.
""",
"discussion": """
- Brief recap of the entire paper.
- To keep going with the analogy, you can think of future work as (potential) academic offspring.
""",
}
class PaperSolver:
def __init__(self, llm_str, notes=None, max_steps=10, insights=None, plan=None, exp_code=None, exp_results=None, lit_review=None, ref_papers=None, topic=None, openai_api_key=None, compile_pdf=True, save_loc=None):
self.supress_print = True
if notes is None: self.notes = []
else: self.notes = notes
if plan is None: self.plan = ""
else: self.plan = plan
if exp_code is None: self.exp_code = ""
else: self.exp_code = exp_code
if exp_results is None: self.exp_results = ""
else: self.exp_results = exp_results
if lit_review is None: self.lit_review = ""
else: self.lit_review = lit_review
if insights is None: self.insights = ""
else: self.insights = insights
if ref_papers is None: self.ref_papers = ""
else: self.ref_papers = ref_papers
if topic is None: self.topic = ""
else: self.topic = topic
self.save_loc = save_loc
self.compile_pdf = compile_pdf
self.llm_str = llm_str
self.notes = notes
self.max_papers = 1
self.st_hist_len = 10
self.min_gen_trials = 2
self.max_steps = max_steps
self.paper_lines = str()
self.prev_paper_ret = str()
self.section_related_work = {}
self.openai_api_key = openai_api_key
def solve(self):
num_attempts = 0
best_pkg = None
top_score = None
self.prev_paper_ret = None
while True:
self.paper_lines = copy(random.choice(self.best_report)[0])
model_resp = query_model(
model_str=self.model,
system_prompt=self.system_prompt(),
prompt=f"\nNow please enter a command: ",
temp=1.0,
openai_api_key=self.openai_api_key)
model_resp = self.clean_text(model_resp)
cmd_str, paper_lines, prev_paper_ret, score = self.process_command(model_resp)
if score is not None:
if top_score is None:
best_pkg = copy(paper_lines), copy(prev_paper_ret), copy(model_resp), copy(cmd_str)
top_score = score
elif score > top_score:
best_pkg = copy(paper_lines), copy(prev_paper_ret), copy(model_resp), copy(cmd_str)
top_score = score
if num_attempts >= self.min_gen_trials and top_score is not None: break
if not self.supress_print: print(f"@@@ Command Exec // Attempt {num_attempts}: ", str(cmd_str).replace("\n", " | "))
if not self.supress_print: print(f"$$$ Score: {score}")
num_attempts += 1
self.paper_lines, self.prev_paper_ret, model_resp, cmd_str = best_pkg
# add top scoring paper that was successful to the best papers
if top_score > self.best_report[-1][1]:
# replace the lowest scoring one
if len(self.best_report) >= self.max_papers:
self.best_report.pop(-1)
self.best_report.append((copy(self.paper_lines), copy(top_score), self.prev_paper_ret))
# sort by score, to make sure lowest are removed in future
self.best_report.sort(key=lambda x: x[1], reverse=True)
return model_resp, cmd_str
def initial_solve(self):
"""
Initialize the solver and get an initial set of papers and a return
@return: None
"""
# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# @@ Initial PaperGen Commands @@
# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
self.best_score = None
self.commands = [PaperReplace(self.save_loc)]
self.model = f"{self.llm_str}"
init_report, init_return, self.best_score = self.gen_initial_report()
self.best_report = [(copy(init_report), self.best_score, init_return) for _ in range(1)]
self.paper_lines = init_report
self.model = f"{self.llm_str}"
self.commands = [PaperEdit(self.save_loc)] #, Replace()]
self.prev_working_report = copy(self.paper_lines)
@staticmethod
def clean_text(text):
text = text.replace("```\n", "```")
return text
def gen_initial_report(self):
num_attempts = 0
arx = ArxivSearch()
section_scaffold = str()
# 1. Abstract 2. Introduction, 3. Background, 4. Methods, 5. Experimental Setup 6. Results, and 7. Discussion
for _section in ["scaffold", "abstract", "introduction", "related work", "background", "methods", "experimental setup", "results", "discussion"]:
section_complete = False
if _section in ["introduction", "related work", "background", "methods", "discussion"]:
attempts = 0
papers = str()
first_attempt = True
while len(papers) == 0:
att_str = str()
if attempts > 5:
break
if not first_attempt:
att_str = "This is not your first attempt please try to come up with a simpler search query."
search_query = query_model(model_str=f"{self.llm_str}", prompt=f"Given the following research topic {self.topic} and research plan: \n\n{self.plan}\n\nPlease come up with a search query to find relevant papers on arXiv. Respond only with the search query and nothing else. This should be a a string that will be used to find papers with semantically similar content. {att_str}", system_prompt=f"You are a research paper finder. You must find papers for the section {_section}. Query must be text nothing else.", openai_api_key=self.openai_api_key)
search_query.replace('"', '')
papers = arx.find_papers_by_str(query=search_query, N=10)
first_attempt = False
attempts += 1
if len(papers) != 0:
self.section_related_work[_section] = papers
while not section_complete:
section_scaffold_temp = copy(section_scaffold)
if num_attempts == 0: err = str()
else: err = f"The following was the previous command generated: {model_resp}. This was the error return {cmd_str}. You should make sure not to repeat this error and to solve the presented problem."
if _section == "scaffold":
prompt = f"{err}\nNow please enter the ```REPLACE command to create the scaffold:\n "
else:
rp = str()
if _section in self.section_related_work:
rp = f"Here are related papers you can cite: {self.section_related_work[_section]}. You can cite them just by putting the arxiv ID in parentheses, e.g. (arXiv 2308.11483v1)\n"
prompt = f"{err}\n{rp}\nNow please enter the ```REPLACE command to create the designated section, make sure to only write the text for that section and nothing else. Do not include packages or section titles, just the section content:\n "
model_resp = query_model(
model_str=self.model,
system_prompt=self.system_prompt(section=_section),
prompt=f"{prompt}",
temp=0.8,
openai_api_key=self.openai_api_key)
model_resp = self.clean_text(model_resp)
if _section == "scaffold":
# minimal scaffold (some other sections can be combined)
for _sect in ["[ABSTRACT HERE]", "[INTRODUCTION HERE]", "[METHODS HERE]", "[RESULTS HERE]", "[DISCUSSION HERE]"]:
if _sect not in model_resp:
cmd_str = "Error: scaffold section placeholders were not present (e.g. [ABSTRACT HERE])."
if not self.supress_print: print("@@@ INIT ATTEMPT:", cmd_str)
continue
elif _section != "scaffold":
new_text = extract_prompt(model_resp, "REPLACE")
section_scaffold_temp = section_scaffold_temp.replace(f"[{_section.upper()} HERE]", new_text)
model_resp = '```REPLACE\n' + copy(section_scaffold_temp) + '\n```'
if "documentclass{article}" in new_text or "usepackage{" in new_text:
cmd_str = "Error: You must not include packages or documentclass in the text! Your latex must only include the section text, equations, and tables."
if not self.supress_print: print("@@@ INIT ATTEMPT:", cmd_str)
continue
cmd_str, latex_lines, prev_latex_ret, score = self.process_command(model_resp, scoring=False)
if not self.supress_print: print(f"@@@ INIT ATTEMPT: Command Exec // Attempt {num_attempts}: ", str(cmd_str).replace("\n", " | "))
#print(f"$$$ Score: {score}")
if score is not None:
section_complete = True
section_scaffold = "\n".join(latex_lines)
num_attempts += 1
self.paper_lines = section_scaffold.split("\n")
if not self.supress_print: print("$"*10, f"SCAFFOLD [{_section}] CREATED", "$"*10)
if not self.supress_print: print("$"*10, "SCAFFOLD CREATED", "$"*10)
return latex_lines, prev_latex_ret, score
def process_command(self, model_resp, scoring=True):
"""
Take command from language model and execute if valid
@param model_resp: (str) language model output
@return: (tuple) tuple containing the following items
- cmd_str: (str) paper execution return and success flag
- paper_lines: (list) list of paper lines as strings
- prev_paper_ret: (str) output from running paper
- score: (float) score of model
"""
cmd_str = None
score = None
prev_paper_ret = self.prev_paper_ret
paper_lines = copy(self.paper_lines)
if "\\includegraphics[width=\\textwidth]{Figure_1.png}" in model_resp or "\\includegraphics[width=\\textwidth]{Figure_2.png}" in model_resp:
cwd = os.getcwd()
model_resp = model_resp.replace("\\includegraphics[width=\\textwidth]{Figure_1.png}", "\\includegraphics[width=\\textwidth]{" + cwd + "/Figure_1.png}")
model_resp = model_resp.replace("\\includegraphics[width=\\textwidth]{Figure_2.png}", "\\includegraphics[width=\\textwidth]{" + cwd + "/Figure_2.png}")
for cmd in self.commands:
if cmd.matches_command(model_resp):
# attempt to execute the paper edit command
if cmd.cmd_type == "PAPER-edit": # DONE
score = None
failed = True
success, args = cmd.parse_command(model_resp, paper_lines)
paper_err = f"Return from executing latex: {args[1]}"
if success:
# True, current_latex, latex_ret
args = cmd.execute_command((args[0], args[1], paper_lines, args[3], self.compile_pdf))
success = success and args[0]
if not success: pass
else:
paper_lines = copy(args[1]) #
if scoring:
score, cmd_str, is_valid = get_score(self.plan, "\n".join(paper_lines), reward_model_llm=self.llm_str)
else:
score, cmd_str, is_valid = 0.0, "Paper scored successfully", True
if is_valid: failed = False
paper_err += f"\nReturn from executing latex: {cmd_str}"
if not self.supress_print: print("$$$$ PAPER EDIT (success)")
if failed:
cmd_str = f"Paper edit FAILED due to the following error: {paper_err}. Paper was reverted back to original state before edits."
if not self.supress_print: print("$$$$ PAPER EDIT (failed)")
else:
cmd_str = "Paper was successfully edited."
paper_lines = copy(args[1])
prev_paper_ret = copy(args[2])
if not self.supress_print: print("$$$$ PAPER EDIT (success)")
elif cmd.cmd_type == "PAPER-replace": # DONE
score = None
failed = True
success, args = cmd.parse_command(model_resp, self.compile_pdf)
paper_err = f"Return from executing latex: {args[1]}"
if success:
paper_lines = copy(args[0]) #
if scoring:
score, cmd_str, is_valid = get_score(self.plan, "\n".join(paper_lines), reward_model_llm=self.llm_str)
else:
score, cmd_str, is_valid = 0.0, "Paper scored successfully", True
if is_valid: failed = False
paper_err += f"\nReturn from executing code on real test set {cmd_str}"
if failed:
cmd_str = f"Paper replacement FAILED due to the following error: {paper_err}. Paper was reverted back to original state before edits."
if not self.supress_print: print("$$$$ PAPER REPLACE (failed)")
else:
cmd_str = "Paper was successfully replaced."
paper_lines = copy(args[0])
prev_paper_ret = copy(args[1])
if not self.supress_print: print("$$$$ PAPER REPLACE (success)")
return cmd_str, paper_lines, prev_paper_ret, score
def generate_paper_lines(self, code):
"""
Generate well-formatted code lines with line numbers
@param code: (list) list of code line strings
@return: (str) code lines formatted with line numbers
"""
codestr = str()
for _index in range(len(code)):
codestr += f"{_index} |{code[_index]}\n"
return codestr
def system_prompt(self, commands=True, section=None):
"""
Produce a system prompt for the paper-solver
@param commands: (bool) whether to use command prompt
@return: (str) system prompt
"""
if section == "abstract": length = "This section should be ONLY 1 paragraph."
else: length = "This section should be approximately 2-4 paragraphs and so your output should be several paragraphs of latex."
methods_str = str()
if section == "methods":
fig1_text="""\n\\begin{figure}[h]
\\caption{}
\\centering
\\includegraphics[width=\\textwidth]{Figure_1.png}
\\label{fig:fig1}
\\end{figure}\n"""
fig2_text="""\n\\begin{figure}[h]
\\caption{}
\\centering
\\includegraphics[width=\\textwidth]{Figure_2.png}
\\label{fig:fig1}
\\end{figure}\n"""
if os.path.exists("Figure_1.png") and os.path.exists("Figure_2.png"):
methods_str += f"You ABSOLUTELY must without fail also include Figure_1.png and Figure_2.png in your paper using {fig1_text} and {fig2_text} on a new line. Make sure to place these figures in separate locations."
elif os.path.exists("Figure_1.png"):
methods_str += f"You ABSOLUTELY must without fail also include Figure_1.png in your paper using {fig1_text} on a new line.\n"
elif os.path.exists("Figure_2.png"):
methods_str += f"You ABSOLUTELY must without fail also include Figure_2.png in your paper using {fig2_text} on a new line.\n"
if section is not None and section == "scaffold": section_cmd = f"Your objective right now is to only build the scaffolding for the paper. You should not include any text in the body of the paper, but should have an empty scaffold for each of the sections. Where the sections go, write [ABSTRACT HERE] for abstract, and write [INTRODUCTION HERE] for the introduction... etc. Your paper should have the following sections: 1. Abstract 2. Introduction, 3. Background, 4. Related Work 5. Methods, 6. Experimental Setup 7. Results, and 8. Discussion. Just create the scaffolding as compilable latex. Your title should start with Research Report: [title here] where title here is a title you choose. For author write Agent Laboratory."
elif section is not None: section_cmd = f"Your only goal is to generate latex for the following {section}. DO NOT INCLUDE ANY PACKAGES OR ANY SECTION COMMANDS. DO NOT INCLUDE A TITLE OR DATE ONLY TEXT. You only have to generate text for this specific section and do not have to output anything else. {length} I repeat DO NOT INCLUDE ANY PACKAGES OR ANY SECTION COMMANDS. DO NOT INCLUDE A TITLE OR DATE ONLY TEXT. Use as many equations as you find necessary. You should include mathematical equations, numbers, and tables where necessary. Remember that to include a percentage sign % you must add a backslash \% or else it will become a comment. Here are some tips {per_section_tips[section]} {methods_str}.\n\n"
else: section_cmd = ""
paper_len = sum([i.strip(string.punctuation).isalpha() for i in ("".join(self.paper_lines)).split()])
#paper_len2 = len(("".join(self.paper_lines)).split())
if paper_len < 4000: paper_progress = f"The current length of the paper is {paper_len} words, you must increase this by {4000-paper_len} words."
else: paper_progress = ""
if not self.supress_print: print(paper_progress)
cmd_set = f"The following are commands you have access to: {self.command_descriptions()}\n." if commands else ""
if len(self.ref_papers) == 0: ref_papers = ""
else:
refpapers = '\n'.join(self.ref_papers)
ref_papers = f"Here is a reference paper that is high quality:\n{refpapers}\n\n\n"
lit_review_str = str(self.lit_review)[:20000]
return (
f"{ref_papers}"
# ROLE DESCRIPTION
f"{self.role_description()}.\n"
# TASK INSTRUCTIONS
f"The following are your task instructions: {self.phase_prompt()}\n"
# NOTES
f"The following are notes, instructions, and general tips for you: {self.notes}"
# LIT REVIEW
f"The following literature review was provided for the paper:\n{lit_review_str}\n"
# PLAN DESCRIPTION
f"You are given a paper report writing task. The original research plan was described as follows: {self.plan}\n"
# EXPERIMENT CODE
f"A team of research wrote the following code, following this plan: {self.exp_code}\n"
# EXPERIMENT RESULTS
f"After running this code, the following results were observed: {self.exp_results}\n Your results must ACCURATELY reflect the numbers presented here."
# EXPERIMENT RESULT INSIGHTS
f"Provided was an interpretation of the experimental results:\n{self.insights}\n"
f"Your writing style should be boring and objective.\n"
# transition
f"Your goal is to write a research paper as well as possible. You will receive a score after you write the paper and should aim to maximize the score by writing a high quality research paper. The paper length should be 8 pages or 4000 words in total. It should be quite long and comprehensive. Remember, the paper MUST BE LONG. {paper_progress}\n"
# COMMAND SET
f"{cmd_set}\n"
# PAPER
f"Provided here is your current paper {self.generate_paper_lines(self.paper_lines)}"
# optional section command
f"{section_cmd}"
)
def command_descriptions(self):
"""
Provide command descriptions
@return: (str) command descriptions
"""
cmd_strings = "\n".join([_cmd.docstring() for _cmd in self.commands])
return f"\nYou also have access to tools which can be interacted with using the following structure: ```COMMAND\n\n```, where COMMAND is whichever command you want to run (e.g. EDIT,...), is information used for the command and ``` are meant to encapsulate the command. ``` must be included as part of the command both at the beginning and at the end of the command. DO NOT FORGOT TO HAVE ``` AT THE TOP AND BOTTOM OF COMMAND. and this structure must be followed to execute a command correctly. YOU CAN ONLY EXECUTE A SINGLE COMMAND AT A TIME! Do not try to perform multiple commands EVER only one." + cmd_strings
def role_description(self):
"""
Provide role description
@return: (str) role description
"""
return "You are a computer science PhD student at a top university who has submitted their paper to an ML conference called ICLR. Your goal was to write a research paper and get high scores from the reviewers so that it get accepted to the conference. Your paper should be approximately 8 pages and around 4000 words. Your article should ONLY CONTAIN EIGHT sections as follows: 1. Abstract 2. Introduction, 3. Background, 4. Related Work 5. Methods, 6. Experimental Setup 7. Results, and 8. Discussion.\n"
def phase_prompt(self,):
"""
Describe system role and general tips for mle-solver
@return: (str) system role
"""
phase_str = (
"You are a PhD student who has submitted a paper to an ML conference called ICLR. Your goal was to write a research paper and get high scores from the reviewers so that it get accepted to the conference.\n"
)
return phase_str
================================================
FILE: requirements.txt
================================================
absl-py==2.1.0
accelerate==1.1.1
aiohappyeyeballs==2.4.3
aiohttp==3.11.7
aiosignal==1.3.1
annotated-types==0.7.0
anthropic==0.39.0
anyio==4.6.2.post1
arxiv==2.1.3
astunparse==1.6.3
async-timeout==5.0.1
attrs==24.2.0
blis==1.0.1
catalogue==2.0.10
certifi==2024.8.30
charset-normalizer==3.4.0
click==8.1.7
cloudpathlib==0.20.0
confection==0.1.5
contourpy==1.3.0
cycler==0.12.1
cymem==2.0.10
datasets==3.1.0
diffusers==0.31.0
dill==0.3.8
distro==1.9.0
exceptiongroup==1.2.2
feedparser==6.0.11
filelock==3.16.1
flatbuffers==24.3.25
fonttools==4.55.0
frozenlist==1.5.0
fsspec==2024.9.0
gast==0.6.0
google-pasta==0.2.0
grpcio==1.68.0
h11==0.14.0
h5py==3.12.1
httpcore==1.0.7
httpx==0.27.2
huggingface-hub==0.26.2
idna==3.10
imageio==2.36.0
importlib_metadata==8.5.0
importlib_resources==6.4.5
Jinja2==3.1.4
jiter==0.7.1
joblib==1.4.2
keras==3.7.0
kiwisolver==1.4.7
langcodes==3.5.0
language_data==1.3.0
lazy_loader==0.4
libclang==18.1.1
marisa-trie==1.2.1
Markdown==3.7
markdown-it-py==3.0.0
MarkupSafe==3.0.2
matplotlib==3.9.2
mdurl==0.1.2
ml-dtypes==0.4.1
mpmath==1.3.0
multidict==6.1.0
multiprocess==0.70.16
murmurhash==1.0.11
namex==0.0.8
nest-asyncio==1.6.0
networkx==3.2.1
nltk==3.9.1
numpy==2.0.2
openai==1.55.1
opt_einsum==3.4.0
optree==0.13.1
packaging==24.2
pandas==2.2.3
patsy==1.0.1
pillow==11.0.0
plotly==5.24.1
preshed==3.0.9
propcache==0.2.0
protobuf==5.28.3
psutil==6.1.0
pyarrow==18.1.0
pydantic==2.10.2
pydantic_core==2.27.1
Pygments==2.18.0
pyparsing==3.2.0
pypdf==5.1.0
python-dateutil==2.9.0.post0
pytz==2024.2
PyYAML==6.0.2
regex==2024.11.6
requests==2.32.3
rich==13.9.4
sacremoses==0.1.1
safetensors==0.4.5
scikit-image==0.24.0
scikit-learn==1.5.2
scipy==1.13.1
seaborn==0.13.2
semanticscholar==0.8.4
sgmllib3k==1.0.0
shellingham==1.5.4
six==1.16.0
smart-open==7.0.5
sniffio==1.3.1
spacy==3.8.2
spacy-legacy==3.0.12
spacy-loggers==1.0.5
srsly==2.4.8
statsmodels==0.14.4
sympy==1.13.1
tenacity==9.0.0
termcolor==2.5.0
thinc==8.3.2
threadpoolctl==3.5.0
tifffile==2024.8.30
tiktoken==0.8.0
tokenizers==0.20.4
torch==2.5.1
tqdm==4.67.1
transformers==4.46.3
typer==0.13.1
typing_extensions==4.12.2
tzdata==2024.2
urllib3==2.2.3
wasabi==1.1.3
weasel==0.4.1
Werkzeug==3.1.3
wrapt==1.17.0
xxhash==3.5.0
yarl==1.18.0
zipp==3.21.0
google-generativeai
PyPDF2
================================================
FILE: tools.py
================================================
from utils import *
import os
import time
import arxiv
import io, sys
import traceback
import matplotlib
import numpy as np
import multiprocessing
from pypdf import PdfReader
from datasets import load_dataset
from psutil._common import bytes2human
from datasets import load_dataset_builder
from semanticscholar import SemanticScholar
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import TfidfVectorizer
class HFDataSearch:
def __init__(self, like_thr=3, dwn_thr=50) -> None:
"""
Class for finding relevant huggingface datasets
:param like_thr:
:param dwn_thr:
"""
self.dwn_thr = dwn_thr
self.like_thr = like_thr
self.ds = load_dataset("nkasmanoff/huggingface-datasets")["train"]
# Initialize lists to collect filtered data
filtered_indices = []
filtered_descriptions = []
filtered_likes = []
filtered_downloads = []
# Iterate over the dataset and filter based on criteria
for idx, item in enumerate(self.ds):
# Get likes and downloads, handling None values
likes = int(item['likes']) if item['likes'] is not None else 0
downloads = int(item['downloads']) if item['downloads'] is not None else 0
# Check if likes and downloads meet the thresholds
if likes >= self.like_thr and downloads >= self.dwn_thr:
# Check if the description is a non-empty string
description = item['description']
if isinstance(description, str) and description.strip():
# Collect the data
filtered_indices.append(idx)
filtered_descriptions.append(description)
filtered_likes.append(likes)
filtered_downloads.append(downloads)
# Check if any datasets meet all criteria
if not filtered_indices:
print("No datasets meet the specified criteria.")
self.ds = []
self.descriptions = []
self.likes_norm = []
self.downloads_norm = []
self.description_vectors = None
return # Exit the constructor
# Filter the datasets using the collected indices
self.ds = self.ds.select(filtered_indices)
# Update descriptions, likes, and downloads
self.descriptions = filtered_descriptions
self.likes = np.array(filtered_likes)
self.downloads = np.array(filtered_downloads)
# Normalize likes and downloads
self.likes_norm = self._normalize(self.likes)
self.downloads_norm = self._normalize(self.downloads)
# Vectorize the descriptions
self.vectorizer = TfidfVectorizer()
self.description_vectors = self.vectorizer.fit_transform(self.descriptions)
def _normalize(self, arr):
min_val = arr.min()
max_val = arr.max()
if max_val - min_val == 0:
return np.zeros_like(arr, dtype=float)
return (arr - min_val) / (max_val - min_val)
def retrieve_ds(self, query, N=10, sim_w=1.0, like_w=0.0, dwn_w=0.0):
"""
Retrieves the top N datasets matching the query, weighted by likes and downloads.
:param query: The search query string.
:param N: The number of results to return.
:param sim_w: Weight for cosine similarity.
:param like_w: Weight for likes.
:param dwn_w: Weight for downloads.
:return: List of top N dataset items.
"""
if not self.ds or self.description_vectors is None:
print("No datasets available to search.")
return []
query_vector = self.vectorizer.transform([query])
cosine_similarities = linear_kernel(query_vector, self.description_vectors).flatten()
# Normalize cosine similarities
cosine_similarities_norm = self._normalize(cosine_similarities)
# Compute final scores
final_scores = (
sim_w * cosine_similarities_norm +
like_w * self.likes_norm +
dwn_w * self.downloads_norm
)
# Get top N indices
top_indices = final_scores.argsort()[-N:][::-1]
# Convert indices to Python ints
top_indices = [int(i) for i in top_indices]
top_datasets = [self.ds[i] for i in top_indices]
# check if dataset has a test & train set
has_test_set = list()
has_train_set = list()
ds_size_info = list()
for i in top_indices:
try:
dbuilder = load_dataset_builder(self.ds[i]["id"], trust_remote_code=True).info
except Exception as e:
has_test_set.append(False)
has_train_set.append(False)
ds_size_info.append((None, None, None, None))
continue
if dbuilder.splits is None:
has_test_set.append(False)
has_train_set.append(False)
ds_size_info.append((None, None, None, None))
continue
# Print number of examples for
has_test, has_train = "test" in dbuilder.splits, "train" in dbuilder.splits
has_test_set.append(has_test)
has_train_set.append(has_train)
test_dwn_size, test_elem_size = None, None
train_dwn_size, train_elem_size = None, None
if has_test:
test_dwn_size = bytes2human(dbuilder.splits["test"].num_bytes)
test_elem_size = dbuilder.splits["test"].num_examples
if has_train:
train_dwn_size = bytes2human(dbuilder.splits["train"].num_bytes)
train_elem_size = dbuilder.splits["train"].num_examples
ds_size_info.append((test_dwn_size, test_elem_size, train_dwn_size, train_elem_size))
for _i in range(len(top_datasets)):
top_datasets[_i]["has_test_set"] = has_test_set[_i]
top_datasets[_i]["has_train_set"] = has_train_set[_i]
top_datasets[_i]["test_download_size"] = ds_size_info[_i][0]
top_datasets[_i]["test_element_size"] = ds_size_info[_i][1]
top_datasets[_i]["train_download_size"] = ds_size_info[_i][2]
top_datasets[_i]["train_element_size"] = ds_size_info[_i][3]
return top_datasets
def results_str(self, results):
"""
Provide results as list of results in human-readable format.
:param results: (list(dict)) list of results from search
:return: (list(str)) list of results in human-readable format
"""
result_strs = list()
for result in results:
res_str = f"Dataset ID: {result['id']}\n"
res_str += f"Description: {result['description']}\n"
res_str += f"Likes: {result['likes']}\n"
res_str += f"Downloads: {result['downloads']}\n"
res_str += f"Has Testing Set: {result['has_test_set']}\n"
res_str += f"Has Training Set: {result['has_train_set']}\n"
res_str += f"Test Download Size: {result['test_download_size']}\n"
res_str += f"Test Dataset Size: {result['test_element_size']}\n"
res_str += f"Train Download Size: {result['train_download_size']}\n"
res_str += f"Train Dataset Size: {result['train_element_size']}\n"
result_strs.append(res_str)
return result_strs
class SemanticScholarSearch:
def __init__(self):
self.sch_engine = SemanticScholar(retry=False)
def find_papers_by_str(self, query, N=10):
paper_sums = list()
results = self.sch_engine.search_paper(query, limit=N, min_citation_count=3, open_access_pdf=True)
for _i in range(len(results)):
paper_sum = f'Title: {results[_i].title}\n'
paper_sum += f'Abstract: {results[_i].abstract}\n'
paper_sum += f'Citations: {results[_i].citationCount}\n'
paper_sum += f'Release Date: year {results[_i].publicationDate.year}, month {results[_i].publicationDate.month}, day {results[_i].publicationDate.day}\n'
paper_sum += f'Venue: {results[_i].venue}\n'
paper_sum += f'Paper ID: {results[_i].externalIds["DOI"]}\n'
paper_sums.append(paper_sum)
return paper_sums
def retrieve_full_paper_text(self, query):
pass
class ArxivSearch:
def __init__(self):
# Construct the default API client.
self.sch_engine = arxiv.Client()
def _process_query(self, query: str) -> str:
"""Process query string to fit within MAX_QUERY_LENGTH while preserving as much information as possible"""
MAX_QUERY_LENGTH = 300
if len(query) <= MAX_QUERY_LENGTH:
return query
# Split into words
words = query.split()
processed_query = []
current_length = 0
# Add words while staying under the limit
# Account for spaces between words
for word in words:
# +1 for the space that will be added between words
if current_length + len(word) + 1 <= MAX_QUERY_LENGTH:
processed_query.append(word)
current_length += len(word) + 1
else:
break
return ' '.join(processed_query)
def find_papers_by_str(self, query, N=20):
processed_query = self._process_query(query)
max_retries = 3
retry_count = 0
while retry_count < max_retries:
try:
search = arxiv.Search(
query="abs:" + processed_query,
max_results=N,
sort_by=arxiv.SortCriterion.Relevance)
paper_sums = list()
# `results` is a generator; you can iterate over its elements one by one...
for r in self.sch_engine.results(search):
paperid = r.pdf_url.split("/")[-1]
pubdate = str(r.published).split(" ")[0]
paper_sum = f"Title: {r.title}\n"
paper_sum += f"Summary: {r.summary}\n"
paper_sum += f"Publication Date: {pubdate}\n"
#paper_sum += f"Categories: {' '.join(r.categories)}\n"
paper_sum += f"arXiv paper ID: {paperid}\n"
paper_sums.append(paper_sum)
time.sleep(2.0)
return "\n".join(paper_sums)
except Exception as e:
retry_count += 1
if retry_count < max_retries:
time.sleep(2 * retry_count)
continue
return None
def retrieve_full_paper_text(self, query, MAX_LEN=50000):
pdf_text = str()
paper = next(arxiv.Client().results(arxiv.Search(id_list=[query])))
# Download the PDF to the PWD with a custom filename.
paper.download_pdf(filename="downloaded-paper.pdf")
# creating a pdf reader object
reader = PdfReader('downloaded-paper.pdf')
# Iterate over all the pages
for page_number, page in enumerate(reader.pages, start=1):
# Extract text from the page
try:
text = page.extract_text()
except Exception as e:
os.remove("downloaded-paper.pdf")
time.sleep(2.0)
return "EXTRACTION FAILED"
# Do something with the text (e.g., print it)
pdf_text += f"--- Page {page_number} ---"
pdf_text += text
pdf_text += "\n"
os.remove("downloaded-paper.pdf")
time.sleep(2.0)
return pdf_text[:MAX_LEN]
# Set the non-interactive backend early in the module
matplotlib.use('Agg')
import matplotlib.pyplot as plt
def worker_run_code(code_str, output_queue):
output_capture = io.StringIO()
sys.stdout = output_capture
try:
# Create a globals dictionary with __name__ set to "__main__"
globals_dict = {"__name__": "__main__"}
exec(code_str, globals_dict)
except Exception as e:
output_capture.write(f"[CODE EXECUTION ERROR]: {str(e)}\n")
traceback.print_exc(file=output_capture)
finally:
sys.stdout = sys.__stdout__
output_queue.put(output_capture.getvalue())
def execute_code(code_str, timeout=600, MAX_LEN=1000):
#code_str = code_str.replace("\\n", "\n")
code_str = "from utils import *\n" + code_str
if "load_dataset('pubmed" in code_str:
return "[CODE EXECUTION ERROR] pubmed Download took way too long. Program terminated"
if "exit(" in code_str:
return "[CODE EXECUTION ERROR] The exit() command is not allowed you must remove this."
output_queue = multiprocessing.Queue()
proc = multiprocessing.Process(target=worker_run_code, args=(code_str, output_queue))
proc.start()
proc.join(timeout)
if proc.is_alive():
proc.terminate() # Forcefully kill the process
proc.join()
return (f"[CODE EXECUTION ERROR]: Code execution exceeded the timeout limit of {timeout} seconds. "
"You must reduce the time complexity of your code.")
else:
if not output_queue.empty(): output = output_queue.get()
else: output = ""
return output
================================================
FILE: utils.py
================================================
import os, re
import shutil
import time
import tiktoken, openai
import subprocess, string
from openai import OpenAI
import google.generativeai as genai
from huggingface_hub import InferenceClient
def query_deepseekv3(prompt, system, api_key, attempt=0, temperature=0.0):
try:
client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com")
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{"role": "system", "content": system},
{"role": "user", "content": prompt},
],
stream=False, temperature=temperature,
)
return response.choices[0].message.content
except Exception as e:
print(f"Query qwen error: {e}")
if attempt >= 10: return f"Your attempt to query deepseekv3 failed: {e}"
return query_deepseekv3(prompt, system, attempt+1)
def query_qwen(prompt, system, api_key, attempt=0, temperature=0.0):
try:
client = InferenceClient(api_key=api_key)
if system is not None:
messages = [
{"role": "system", "content": system},
{"role": "user", "content": prompt}]
else:
messages = [
{"role": "user", "content": prompt}]
completion = client.chat.completions.create(
model="Qwen/QwQ-32B",
messages=messages,
max_tokens=500,
temperature=temperature
)
return completion.choices[0].message.content.strip()
except Exception as e:
print(f"Query qwen error: {e}")
if attempt >= 10: return f"Your attempt to inference gemini failed: {e}"
return query_qwen(prompt, system, attempt+1)
def query_gpt4omini(prompt, system, api_key, attempt=0, temperature=0.0):
try:
openai_api_key = api_key
openai.api_key = openai_api_key
os.environ["OPENAI_API_KEY"] = openai_api_key
if system is not None:
messages = [
{"role": "system", "content": system},
{"role": "user", "content": prompt}]
else:
messages = [
{"role": "user", "content": prompt}]
client = OpenAI()
response = client.chat.completions.create(
model="gpt-4o-mini", messages=messages, temperature=temperature).choices[0].message.content.strip()
return response
except Exception as e:
print(f"Query 4o-mini error: {e}")
if attempt >= 10: return f"Your attempt to inference gemini failed: {e}"
return query_gpt4omini(prompt, system, attempt+1)
def query_gpt4o(prompt, system, api_key, attempt=0, temperature=0.0):
try:
openai_api_key = api_key
openai.api_key = openai_api_key
os.environ["OPENAI_API_KEY"] = openai_api_key
if system is not None:
messages = [
{"role": "user", "content":system + prompt}]
else:
messages = [
{"role": "user", "content": prompt}]
client = OpenAI()
response = client.chat.completions.create(
model="gpt-4o", messages=messages, temperature=temperature).choices[0].message.content.strip()
return response
except Exception as e:
print(f"Query gpr-4o error: {e}")
if attempt >= 10: return f"Your attempt to inference gemini failed: {e}"
return query_gpt4o(prompt, system, attempt+1)
def query_gemini(prompt, system, api_key, attempt=0, temperature=0.0):
try:
genai.configure(api_key=api_key)
model = genai.GenerativeModel(model_name="gemini-1.5-pro", system_instruction=system)
response = model.generate_content(prompt, generation_config=genai.types.GenerationConfig(temperature=temperature)).text.strip()
time.sleep(1)
return response
except Exception as e:
print(f"Gemini error: {e}")
if attempt >= 10: return f"Your attempt to inference gemini failed: {e}"
time.sleep(1)
return query_gemini(prompt, system, attempt+1)
def query_gemini2p0(prompt, system, api_key, attempt=0, temperature=0.0,):
try:
genai.configure(api_key=api_key)
model = genai.GenerativeModel(model_name="gemini-2.0-flash", system_instruction=system)
response = model.generate_content(prompt, generation_config=genai.types.GenerationConfig(temperature=temperature)).text.strip()
time.sleep(1)
return response
except Exception as e:
print(f"Gemini error: {e}")
if attempt >= 10: return f"Your attempt to inference gemini failed: {e}"
time.sleep(1)
return query_gemini2p0(prompt, system, attempt+1)
def compile_latex(latex_code, output_path, compile=True, timeout=30):
latex_code = latex_code.replace(
r"\documentclass{article}",
"\\documentclass{article}\n\\usepackage{amsmath}\n\\usepackage{amssymb}\n\\usepackage{array}\n\\usepackage{algorithm}\n\\usepackage{algorithmicx}\n\\usepackage{algpseudocode}\n\\usepackage{booktabs}\n\\usepackage{colortbl}\n\\usepackage{color}\n\\usepackage{enumitem}\n\\usepackage{fontawesome5}\n\\usepackage{float}\n\\usepackage{graphicx}\n\\usepackage{hyperref}\n\\usepackage{listings}\n\\usepackage{makecell}\n\\usepackage{multicol}\n\\usepackage{multirow}\n\\usepackage{pgffor}\n\\usepackage{pifont}\n\\usepackage{soul}\n\\usepackage{sidecap}\n\\usepackage{subcaption}\n\\usepackage{titletoc}\n\\usepackage[symbol]{footmisc}\n\\usepackage{url}\n\\usepackage{wrapfig}\n\\usepackage{xcolor}\n\\usepackage{xspace}")
#print(latex_code)
dir_path = f"{output_path}/tex"
tex_file_path = os.path.join(dir_path, "temp.tex")
# Write the LaTeX code to the .tex file in the specified directory
with open(tex_file_path, "w") as f:
f.write(latex_code)
if not compile:
return f"Compilation successful"
# Compiling the LaTeX code using pdflatex with non-interactive mode and timeout
try:
result = subprocess.run(
["pdflatex", "-interaction=nonstopmode", "temp.tex"],
check=True, # Raises a CalledProcessError on non-zero exit codes
stdout=subprocess.PIPE, # Capture standard output
stderr=subprocess.PIPE, # Capture standard error
timeout=timeout, # Timeout for the process
cwd=dir_path
)
# If compilation is successful, return the success message
return f"Compilation successful: {result.stdout.decode('utf-8')}"
except subprocess.TimeoutExpired:
# If the compilation takes too long, return a timeout message
return "[CODE EXECUTION ERROR]: Compilation timed out after {} seconds".format(timeout)
except subprocess.CalledProcessError as e:
# If there is an error during LaTeX compilation, return the error message
return f"[CODE EXECUTION ERROR]: Compilation failed. There was an error in your latex."
def count_tokens(messages, model="gpt-4"):
enc = tiktoken.encoding_for_model(model)
num_tokens = sum([len(enc.encode(message["content"])) for message in messages])
return num_tokens
def remove_figures():
"""Remove a directory if it exists."""
for _file in os.listdir("."):
if "Figure_" in _file and ".png" in _file:
os.remove(_file)
def remove_directory(dir_path):
"""Remove a directory if it exists."""
if os.path.exists(dir_path) and os.path.isdir(dir_path):
try:
shutil.rmtree(dir_path)
print(f"Directory {dir_path} removed successfully.")
except Exception as e:
print(f"Error removing directory {dir_path}: {e}")
else:
print(f"Directory {dir_path} does not exist or is not a directory.")
def save_to_file(location, filename, data):
"""Utility function to save data as plain text."""
filepath = os.path.join(location, filename)
try:
with open(filepath, 'w') as f:
f.write(data) # Write the raw string instead of using json.dump
print(f"Data successfully saved to {filepath}")
except Exception as e:
print(f"Error saving file {filename}: {e}")
def clip_tokens(messages, model="gpt-4", max_tokens=100000):
enc = tiktoken.encoding_for_model(model)
total_tokens = sum([len(enc.encode(message["content"])) for message in messages])
if total_tokens <= max_tokens:
return messages # No need to clip if under the limit
# Start removing tokens from the beginning
tokenized_messages = []
for message in messages:
tokenized_content = enc.encode(message["content"])
tokenized_messages.append({"role": message["role"], "content": tokenized_content})
# Flatten all tokens
all_tokens = [token for message in tokenized_messages for token in message["content"]]
# Remove tokens from the beginning
clipped_tokens = all_tokens[total_tokens - max_tokens:]
# Rebuild the clipped messages
clipped_messages = []
current_idx = 0
for message in tokenized_messages:
message_token_count = len(message["content"])
if current_idx + message_token_count > len(clipped_tokens):
clipped_message_content = clipped_tokens[current_idx:]
clipped_message = enc.decode(clipped_message_content)
clipped_messages.append({"role": message["role"], "content": clipped_message})
break
else:
clipped_message_content = clipped_tokens[current_idx:current_idx + message_token_count]
clipped_message = enc.decode(clipped_message_content)
clipped_messages.append({"role": message["role"], "content": clipped_message})
current_idx += message_token_count
return clipped_messages
def extract_prompt(text, word):
code_block_pattern = rf"```{word}(.*?)```"
code_blocks = re.findall(code_block_pattern, text, re.DOTALL)
extracted_code = "\n".join(code_blocks).strip()
return extracted_code
from typing import Dict, List
import datasets
def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
def _process_doc(doc: dict) -> dict:
out_doc = {
"problem": doc["problem"],
"solution": doc["solution"],
"answer": remove_boxed(last_boxed_only_string(doc["solution"])),
}
return out_doc
return dataset.map(_process_doc)
def process_results(doc: dict, results: List[str]) -> Dict[str, int]:
retval = 0
indices = [pos for pos, char in enumerate(results[0]) if char == "$"]
if len(indices) <= 1:
answer = results[0]
else:
answer = results[0][indices[0] + 1 : indices[-1]]
if is_equiv(answer, remove_boxed(last_boxed_only_string(doc["solution"]))):
retval = 1
results = {
"exact_match": retval,
}
return results
# string normalization from https://github.com/EleutherAI/lm-evaluation-harness/blob/master/lm_eval/tasks/hendrycks_math.py
def is_equiv(str1, str2, verbose=False):
if str1 is None and str2 is None:
print("WARNING: Both None")
return True
if str1 is None or str2 is None:
return False
try:
ss1 = strip_string(str1)
ss2 = strip_string(str2)
if verbose:
print(ss1, ss2)
return ss1 == ss2
except Exception:
return str1 == str2
def clean_answer(s):
s = s.replace("\\dfrac", "\\frac") # makes no difference but can lead to errors
s = s.replace("x \\in", "")
return s
def remove_boxed(s):
if "\\boxed " in s:
left = "\\boxed "
assert s[: len(left)] == left
return s[len(left) :]
left = "\\boxed{"
assert s[: len(left)] == left
assert s[-1] == "}"
return clean_answer(s[len(left) : -1])
def last_boxed_only_string(string):
idx = string.rfind("\\boxed")
if "\\boxed " in string:
return "\\boxed " + string.split("\\boxed ")[-1].split("$")[0]
if idx < 0:
idx = string.rfind("\\fbox")
if idx < 0:
return None
i = idx
right_brace_idx = None
num_left_braces_open = 0
while i < len(string):
if string[i] == "{":
num_left_braces_open += 1
if string[i] == "}":
num_left_braces_open -= 1
if num_left_braces_open == 0:
right_brace_idx = i
break
i += 1
if right_brace_idx is None:
retval = None
else:
retval = string[idx : right_brace_idx + 1]
return retval
def fix_fracs(string):
substrs = string.split("\\frac")
new_str = substrs[0]
if len(substrs) > 1:
substrs = substrs[1:]
for substr in substrs:
new_str += "\\frac"
if substr[0] == "{":
new_str += substr
else:
try:
assert len(substr) >= 2
except AssertionError:
return string
a = substr[0]
b = substr[1]
if b != "{":
if len(substr) > 2:
post_substr = substr[2:]
new_str += "{" + a + "}{" + b + "}" + post_substr
else:
new_str += "{" + a + "}{" + b + "}"
else:
if len(substr) > 2:
post_substr = substr[2:]
new_str += "{" + a + "}" + b + post_substr
else:
new_str += "{" + a + "}" + b
string = new_str
return string
def fix_a_slash_b(string):
if len(string.split("/")) != 2:
return string
a = string.split("/")[0]
b = string.split("/")[1]
try:
a = int(a)
b = int(b)
assert string == "{}/{}".format(a, b)
new_string = "\\frac{" + str(a) + "}{" + str(b) + "}"
return new_string
except AssertionError:
return string
def remove_right_units(string):
# "\\text{ " only ever occurs (at least in the val set) when describing units
if "\\text{ " in string:
splits = string.split("\\text{ ")
assert len(splits) == 2
return splits[0]
else:
return string
def fix_sqrt(string):
if "\\sqrt" not in string:
return string
splits = string.split("\\sqrt")
new_string = splits[0]
for split in splits[1:]:
if split[0] != "{":
a = split[0]
new_substr = "\\sqrt{" + a + "}" + split[1:]
else:
new_substr = "\\sqrt" + split
new_string += new_substr
return new_string
def strip_string(string):
# linebreaks
string = string.replace("\n", "")
# remove inverse spaces
string = string.replace("\\!", "")
# replace \\ with \
string = string.replace("\\\\", "\\")
# replace tfrac and dfrac with frac
string = string.replace("tfrac", "frac")
string = string.replace("dfrac", "frac")
# remove \left and \right
string = string.replace("\\left", "")
string = string.replace("\\right", "")
# Remove circ (degrees)
string = string.replace("^{\\circ}", "")
string = string.replace("^\\circ", "")
# remove dollar signs
string = string.replace("\\$", "")
# remove units (on the right)
string = remove_right_units(string)
# remove percentage
string = string.replace("\\%", "")
string = string.replace("\%", "") # noqa: W605
# " 0." equivalent to " ." and "{0." equivalent to "{." Alternatively, add "0" if "." is the start of the string
string = string.replace(" .", " 0.")
string = string.replace("{.", "{0.")
# if empty, return empty string
if len(string) == 0:
return string
if string[0] == ".":
string = "0" + string
# to consider: get rid of e.g. "k = " or "q = " at beginning
if len(string.split("=")) == 2:
if len(string.split("=")[0]) <= 2:
string = string.split("=")[1]
# fix sqrt3 --> sqrt{3}
string = fix_sqrt(string)
# remove spaces
string = string.replace(" ", "")
# \frac1b or \frac12 --> \frac{1}{b} and \frac{1}{2}, etc. Even works with \frac1{72} (but not \frac{72}1). Also does a/b --> \\frac{a}{b}
string = fix_fracs(string)
# manually change 0.5 --> \frac{1}{2}
if string == "0.5":
string = "\\frac{1}{2}"
if string == "5.5":
string = "\\frac{11}{2}"
if "(x - 3)(x + 3)" in string:
string = string.replace("(x - 3)(x + 3)", "(x+3)(x-3)")
# NOTE: X/Y changed to \frac{X}{Y} in dataset, but in simple cases fix in case the model output is X/Y
string = fix_a_slash_b(string)
return string
================================================
FILE: experiment_configs/MATH_agentlab.yaml
================================================
# If you want to have user input or be a human-in-the-loop
copilot-mode: True
# Here is the research prompt. If num-papers-to-write > 1, you can treat this as a "research direction" otherwise it can be *very* specific and can be treated as a full research idea
research-topic: "Your goal is to design reasoning and prompt engineering techniques to maximize accuracy on the entire 500 test questions of MATH500 benchmark. Your idea should be very novel."
# Here you can put your OpenAI API key--if you don't have one or OpenAI doesn't work for you, you can also instead use `deepseek-api-key`
api-key: "OPENAI-API-KEY-HERE"
# or deepseek-api-key: "DEEPSEEK-API-KEY-HERE"
# Agent Laboratory backend
llm-backend: "o3-mini"
# Literature review backend
lit-review-backend: "o3-mini"
# Base language
language: "English"
# Number of arxiv papers to lit review
num-papers-lit-review: 5
# Total number of papers to write in sequence
num-papers-to-write: 1
# Do you want to run multiple agent labs in parallel?
parallel-labs: False
# Total mle-solver steps per lab
mlesolver-max-steps: 3
# Total paper-solver steps per lab
papersolver-max-steps: 1
# The lab index for this lab (used for parallel runs)
lab-index: 1
# If you want to load an existing save
load-existing: False
# If fail, run exception?
except-if-fail: False
# Compile latex into PDFs during paper-solver
compile-latex: False
# Task notes
task-notes:
plan-formulation:
- 'You should come up with a plan for only ONE experiment aimed at maximizing performance on the test set of MATH using prompting techniques.'
- 'The baseline performance of gpt-4o-mini on MATH-500 is 70.2%'
- 'Please use gpt-4o-mini for your experiments'
- 'You must evaluate on the entire 500 test questions of MATH'
- 'Your plan should be a novel prompting technique'
- 'Your evalution shound aim to get state-of-the-art performance on the MATH dataset using prompt a novel prompting idea'
- "DO NOT PLAN FOR TOO LONG. Submit your plan soon."
data-preparation:
- 'Please use gpt-4o-mini for your experiments'
- 'You must evaluate on the entire 500 test questions of MATH'
- 'Here is a sample code you can use to load MATH\nfrom datasets import load_dataset\nMATH_test_set = load_dataset("HuggingFaceH4/MATH-500")["test"]'
running-experiments:
- "For all strings you instantiate you must use triple quotes (''')"
- 'Please use gpt-4o-mini for your experiments'
- 'Do not try to obtain baseline accuracy or any comparison points. The baseline performance of gpt-4o-mini on MATH-500 is 70.2%'
- 'You can just use the query_gpt4omini(prompt=prompt, system=system_prompt) to prompt gpt-4o-mini. You can also access temperature by setting the temperature value query_gpt4omini(prompt=prompt, system=system_prompt, temperature=0.5) for example.'
- 'You must evaluate on the entire 500 test questions of MATH-500'
- "You should come up with a plan for ONE experiment aimed at maximizing performance on MATH using prompting techniques"
- "Make sure to use is_equiv() to evaluate if two answers are equivalent."
- 'Use the following code to inference gpt-4o-mini\nresponse = query_gpt4omini(prompt=prompt, system=system_prompt)'
- "Your code should parallelize inference. Make sure to write parallelized code."
- "YOU MUST MAKE YOUR CODE PARALLELIZED."
- "Create very thoughtful figures, that would make a good research study."
- 'You have access to only gpt-4o-mini'
- 'Here is some sample code to evaluate on MATH:\nimport multiprocessing\nimport concurrent.futures\nfrom datasets import load_dataset\n\ndef process_example(example):\n problem = example["problem"]\n solution = example["solution"]\n true_answer = remove_boxed(last_boxed_only_string(solution))\n prompt = f"""Solve the following math problem and provide your final answer enclosed in a LaTeX \\boxed{{...}} command.\n\nProblem: {problem}\n\nFinal Answer:"""\n response = query_gpt4omini(prompt=prompt, system="You are a skilled mathematician.")\n llm_answer = remove_boxed(last_boxed_only_string(response))\n correct = is_equiv(llm_answer, true_answer)\n return llm_answer, true_answer, correct\n\ndef main():\n math_test_set = load_dataset("HuggingFaceH4/MATH-500")["test"]\n total, correct_count = 0, 0\n max_workers = multiprocessing.cpu_count()\n with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:\n futures = [executor.submit(process_example, example) for example in math_test_set]\n for future in concurrent.futures.as_completed(futures):\n try: llm_answer, true_answer, correct = future.result()\n except Exception: continue\n total += 1\n if correct: correct_count += 1\n print(f"Step: {total}, LLM answer: {llm_answer}, True answer: {true_answer}, Accuracy: {(correct_count / total) * 100:.2f}%")\n print(f"Complete, final accuracy: {(correct_count / total) * 100:.2f}%")\n\nif __name__ == "__main__":\n main()'
- 'Generate figures with very colorful and artistic design'
results-interpretation:
- 'The baseline performance of gpt-4o-mini on MATH-500 is 70.2%'
report-writing:
- 'The baseline performance of gpt-4o-mini on MATH-500 is 70.2%'
================================================
FILE: experiment_configs/MATH_agentrxiv.yaml
================================================
# If you want to have user input or be a human-in-the-loop
copilot-mode: False
# Here is the research prompt. If num-papers-to-write > 1, you can treat this as a "research direction" otherwise it can be *very* specific and can be treated as a full research idea
research-topic: "Your goal is to design reasoning and prompt engineering techniques to maximize accuracy on the entire 500 test questions of MATH500 benchmark. Your idea should be very novel."
# Here you can put your OpenAI API key--if you don't have one or OpenAI doesn't work for you, you can also instead use `deepseek-api-key`
api-key: "OPENAI-API-KEY-HERE"
# or deepseek-api-key: "DEEPSEEK-API-KEY-HERE"
# Agent Laboratory backend
llm-backend: "o3-mini"
# Literature review backend
lit-review-backend: "o3-mini"
# Base language
language: "English"
# Number of arxiv papers to lit review
num-papers-lit-review: 5
# Number of agentRxiv papers to lit review
agentrxiv-papers: 5
# Total number of papers to write in sequence
num-papers-to-write: 40
# Do you want to run multiple agent labs in parallel?
parallel-labs: False
# Total mle-solver steps per lab
mlesolver-max-steps: 3
# Total paper-solver steps per lab
papersolver-max-steps: 1
# The lab index for this lab (used for parallel runs)
lab-index: 1
# If you want to load an existing save
load-existing: False
# If fail, run exception?
except-if-fail: False
# Compile latex into PDFs during paper-solver
compile-latex: False
# Task notes
task-notes:
plan-formulation:
- 'You should come up with a plan for only ONE experiment aimed at maximizing performance on the test set of MATH using prompting techniques.'
- 'The baseline performance of gpt-4o-mini on MATH-500 is 70.2%'
- 'Please use gpt-4o-mini for your experiments'
- 'You must evaluate on the entire 500 test questions of MATH'
- 'Your plan should be a novel prompting technique'
- 'Your evalution shound aim to get state-of-the-art performance on the MATH dataset using prompt a novel prompting idea'
- "DO NOT PLAN FOR TOO LONG. Submit your plan soon."
data-preparation:
- 'Please use gpt-4o-mini for your experiments'
- 'You must evaluate on the entire 500 test questions of MATH'
- 'Here is a sample code you can use to load MATH\nfrom datasets import load_dataset\nMATH_test_set = load_dataset("HuggingFaceH4/MATH-500")["test"]'
running-experiments:
- "For all strings you instantiate you must use triple quotes (''')"
- 'Please use gpt-4o-mini for your experiments'
- 'Do not try to obtain baseline accuracy or any comparison points. The baseline performance of gpt-4o-mini on MATH-500 is 70.2%'
- 'You can just use the query_gpt4omini(prompt=prompt, system=system_prompt) to prompt gpt-4o-mini. You can also access temperature by setting the temperature value query_gpt4omini(prompt=prompt, system=system_prompt, temperature=0.5) for example.'
- 'You must evaluate on the entire 500 test questions of MATH-500'
- "You should come up with a plan for ONE experiment aimed at maximizing performance on MATH using prompting techniques"
- "Make sure to use is_equiv() to evaluate if two answers are equivalent."
- 'Use the following code to inference gpt-4o-mini\nresponse = query_gpt4omini(prompt=prompt, system=system_prompt)'
- "Your code should parallelize inference. Make sure to write parallelized code."
- "YOU MUST MAKE YOUR CODE PARALLELIZED."
- "Create very thoughtful figures, that would make a good research study."
- 'You have access to only gpt-4o-mini'
- 'Here is some sample code to evaluate on MATH:\nimport multiprocessing\nimport concurrent.futures\nfrom datasets import load_dataset\n\ndef process_example(example):\n problem = example["problem"]\n solution = example["solution"]\n true_answer = remove_boxed(last_boxed_only_string(solution))\n prompt = f"""Solve the following math problem and provide your final answer enclosed in a LaTeX \\boxed{{...}} command.\n\nProblem: {problem}\n\nFinal Answer:"""\n response = query_gpt4omini(prompt=prompt, system="You are a skilled mathematician.")\n llm_answer = remove_boxed(last_boxed_only_string(response))\n correct = is_equiv(llm_answer, true_answer)\n return llm_answer, true_answer, correct\n\ndef main():\n math_test_set = load_dataset("HuggingFaceH4/MATH-500")["test"]\n total, correct_count = 0, 0\n max_workers = multiprocessing.cpu_count()\n with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:\n futures = [executor.submit(process_example, example) for example in math_test_set]\n for future in concurrent.futures.as_completed(futures):\n try: llm_answer, true_answer, correct = future.result()\n except Exception: continue\n total += 1\n if correct: correct_count += 1\n print(f"Step: {total}, LLM answer: {llm_answer}, True answer: {true_answer}, Accuracy: {(correct_count / total) * 100:.2f}%")\n print(f"Complete, final accuracy: {(correct_count / total) * 100:.2f}%")\n\nif __name__ == "__main__":\n main()'
- 'Generate figures with very colorful and artistic design'
results-interpretation:
- 'The baseline performance of gpt-4o-mini on MATH-500 is 70.2%'
report-writing:
- 'The baseline performance of gpt-4o-mini on MATH-500 is 70.2%'
================================================
FILE: readme/README-arabic.md
================================================
# مختبر الوكيل: استخدام وكلاء النماذج اللغوية الكبيرة كمساعدين بحثيين
【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】
【🌐 الموقع الإلكتروني | 💻 البرمجيات | 🎥 الفيديو | 📚 مثال على ورقة بحثية | 📰 الاستشهاد】
## 📖 نظرة عامة
- **مختبر الوكيل** هو سير عمل بحثي مستقل من البداية للنهاية مصمم لمساعدتك كباحث بشري في **تنفيذ أفكار بحثك**. يتكون مختبر الوكيل من وكلاء متخصصين مدفوعين بنماذج لغوية كبيرة لدعمك طوال سير العمل البحثي بالكامل — من إجراء مراجعات الأدبيات وصياغة الخطط إلى تنفيذ التجارب وكتابة تقارير شاملة.
- هذا النظام ليس مصممًا لاستبدال إبداعك بل لتكملته، مما يتيح لك التركيز على توليد الأفكار والتفكير النقدي بينما يقوم بأتمتة المهام المتكررة والتي تستغرق وقتًا طويلاً مثل البرمجة والتوثيق. من خلال استيعاب مستويات مختلفة من الموارد الحاسوبية والمشاركة البشرية، يهدف مختبر الوكيل إلى تسريع الاكتشافات العلمية وتحسين إنتاجيتك البحثية.
### 🔬 كيف يعمل مختبر الوكيل؟
- يتكون مختبر الوكيل من ثلاث مراحل رئيسية توجه عملية البحث بشكل منهجي: (1) مراجعة الأدبيات، (2) التجارب، و(3) كتابة التقارير. خلال كل مرحلة، يتعاون وكلاء متخصصون مدفوعون بنماذج لغوية كبيرة لتحقيق أهداف مميزة، مع دمج أدوات خارجية مثل arXiv، Hugging Face، Python، وLaTeX لتحسين النتائج. يبدأ سير العمل هذا بجمع وتحليل مستقل للأوراق البحثية ذات الصلة، يتقدم من خلال التخطيط التعاوني وإعداد البيانات، وينتهي بتنفيذ التجارب تلقائيًا وتوليد تقارير شاملة. يتم مناقشة تفاصيل أدوار الوكلاء المحددة ومساهماتهم عبر هذه المراحل في الورقة البحثية.
## 🖥️ التثبيت
### خيار البيئة الافتراضية للبايثون
1. **استنساخ مستودع GitHub**: ابدأ باستنساخ المستودع باستخدام الأمر:
```bash
git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git
```
2. **إعداد وتفعيل بيئة البايثون**
```bash
python -m venv venv_agent_lab
```
- الآن قم بتفعيل هذه البيئة:
```bash
source venv_agent_lab/bin/activate
```
3. **تثبيت المكتبات المطلوبة**
```bash
pip install -r requirements.txt
```
4. **تثبيت pdflatex [اختياري]**
```bash
sudo apt install pdflatex
```
- هذا يمكن الوكلاء من تجميع مصدر LaTeX.
- **[مهم]** إذا لم تتمكن من تشغيل هذه الخطوة بسبب عدم وجود صلاحيات sudo، يمكن إيقاف تجميع PDF عن طريق تشغيل مختبر الوكيل مع تعيين العلم --compile_latex إلى false:
```bash
--compile_latex=False
```
5. **الآن قم بتشغيل مختبر الوكيل!**
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA"
```
أو، إذا لم يكن لديك pdflatex مثبتًا
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False
```
-----
## نصائح لتحقيق نتائج بحثية أفضل
#### [نصيحة #1] 📝 تأكد من كتابة ملاحظات شاملة! 📝
**كتابة ملاحظات شاملة أمر مهم** لمساعدة وكيلك على فهم ما تسعى إلى تحقيقه في مشروعك، بالإضافة إلى أي تفضيلات أسلوبية. يمكن أن تشمل الملاحظات أي تجارب ترغب في أن يقوم الوكلاء بتنفيذها، توفير مفاتيح API، بعض الرسوم البيانية أو الأشكال التي ترغب في تضمينها، أو أي شيء تريد أن يعرفه الوكيل عند إجراء البحث.
هذه أيضًا فرصتك لإعلام الوكيل **بالموارد الحاسوبية التي يمكنه الوصول إليها**، مثل وحدات معالجة الرسومات (عددها، نوعها، حجم الذاكرة)، وحدات المعالجة المركزية (عدد النوى، نوعها)، قيود التخزين، ومواصفات الأجهزة.
لإضافة ملاحظات، يجب تعديل هيكل task_notes_LLM داخل ملف ai_lab_repo.py. فيما يلي مثال على مجموعة من الملاحظات المستخدمة لبعض تجاربنا.
```python
task_notes_LLM = [
{"phases": ["plan formulation"],
"note": f"You should come up with a plan for TWO experiments."},
{"phases": ["plan formulation", "data preparation", "running experiments"],
"note": "Please use gpt-4o-mini for your experiments."},
{"phases": ["running experiments"],
"note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"},
{"phases": ["running experiments"],
"note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."},
{"phases": ["running experiments"],
"note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."},
{"phases": ["data preparation", "running experiments"],
"note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"},
{"phases": ["data preparation", "running experiments"],
"note": "Generate figures with very colorful and artistic design."},
]
```
--------
#### [نصيحة #2] 🚀 استخدام نماذج أكثر قوة يؤدي عمومًا إلى أبحاث أفضل 🚀
عند إجراء البحث، **يمكن أن يؤثر اختيار النموذج بشكل كبير على جودة النتائج**. النماذج الأكثر قوة تميل إلى أن تكون أكثر دقة، ولديها قدرات تفكير أفضل، وتوليد تقارير أفضل. إذا سمحت الموارد الحاسوبية، أعطِ الأولوية لاستخدام النماذج المتقدمة مثل o1-(mini/preview) أو نماذج لغوية كبيرة حديثة مماثلة.
ومع ذلك، **من المهم تحقيق التوازن بين الأداء والفعالية من حيث التكلفة**. بينما قد تؤدي النماذج القوية إلى نتائج أفضل، فهي غالبًا ما تكون أكثر تكلفة وتستغرق وقتًا أطول للتشغيل. فكر في استخدامها بشكل انتقائي — على سبيل المثال، للتجارب الرئيسية أو التحليلات النهائية — بينما تعتمد على نماذج أصغر وأكثر كفاءة للمهام التكرارية أو النمذجة الأولية.
عندما تكون الموارد محدودة، **قم بتحسين الأداء عن طريق ضبط النماذج الأصغر** على مجموعة البيانات الخاصة بك أو عن طريق دمج النماذج المدربة مسبقًا مع مطالبات محددة بالمهام لتحقيق التوازن المطلوب بين الأداء والكفاءة الحاسوبية.
-----
#### [نصيحة #3] ✅ يمكنك تحميل الحفظات السابقة من نقاط التفتيش ✅
**إذا فقدت تقدمك، أو انقطعت اتصال الإنترنت، أو فشلت مهمة فرعية، يمكنك دائمًا التحميل من حالة سابقة.** يتم حفظ كل تقدمك افتراضيًا في متغير state_saves، الذي يخزن كل نقطة تفتيش فردية. فقط مرر الحجج التالية عند تشغيل ai_lab_repo.py
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH"
```
-----
#### [نصيحة #4] 🈯 إذا كنت تعمل بلغة غير الإنجليزية 🈲
إذا كنت تشغل مختبر الوكيل بلغة غير الإنجليزية، لا مشكلة، فقط تأكد من توفير علم اللغة للوكلاء لأداء البحث بلغتك المفضلة. لاحظ أننا لم ندرس تشغيل مختبر الوكيل بلغات أخرى بشكل موسع، لذا تأكد من الإبلاغ عن أي مشكلات تواجهها.
على سبيل المثال، إذا كنت تعمل بالصينية:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文"
```
----
#### [نصيحة #5] 🌟 هناك الكثير من المجال للتحسين 🌟
هناك الكثير من المجال لتحسين قاعدة الشيفرة هذه، لذا إذا قمت بإجراء تغييرات وترغب في مساعدة المجتمع، لا تتردد في مشاركة التغييرات التي قمت بها! نأمل أن تساعدك هذه الأداة!
## المرجع / Bibtex
```bibtex
@preprint{schmidgall2025AgentLaboratory,
title={Agent Laboratory: Using LLM Agents as Research Assistants},
author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad},
year={2025}
}
```
================================================
FILE: readme/README-bengali.md
================================================
# এজেন্ট ল্যাবরেটরি: গবেষণা সহকারী হিসেবে LLM এজেন্ট ব্যবহার
【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】
【🌐 Website | 💻 Software | 🎥 Video | 📚 Example Paper | 📰 Citation】
## 📖 ওভারভিউ
- **এজেন্ট ল্যাবরেটরি** একটি এন্ড-টু-এন্ড স্বায়ত্তশাসিত গবেষণা ওয়ার্কফ্লো যা **আপনাকে** মানব গবেষক হিসেবে **আপনার গবেষণা ধারণাগুলি বাস্তবায়নে** সহায়তা করার জন্য ডিজাইন করা হয়েছে। এজেন্ট ল্যাবরেটরি বড় ভাষা মডেল দ্বারা চালিত বিশেষায়িত এজেন্টের সমন্বয়ে গঠিত যা আপনাকে সম্পূর্ণ গবেষণা ওয়ার্কফ্লো জুড়ে সহায়তা করে—সাহিত্য পর্যালোচনা পরিচালনা থেকে পরিকল্পনা গঠন, পরীক্ষা সম্পাদন এবং বিস্তৃত প্রতিবেদন লেখা পর্যন্ত।
- এই সিস্টেমটি আপনার সৃজনশীলতাকে প্রতিস্থাপন করার জন্য ডিজাইন করা হয়নি বরং এটি সম্পূরক করার জন্য, আপনাকে ধারণা গঠন এবং সমালোচনামূলক চিন্তাভাবনায় মনোনিবেশ করার পাশাপাশি কোডিং এবং ডকুমেন্টেশন মত পুনরাবৃত্তিমূলক এবং সময়সাপেক্ষ কাজগুলি স্বয়ংক্রিয়করণের সুযোগ দেয়। বিভিন্ন স্তরের গণনামূলক সম্পদ এবং মানব সম্পৃক্ততাকে সমন্বিত করে, এজেন্ট ল্যাবরেটরি বৈজ্ঞানিক আবিষ্কারকে ত্বরান্বিত করা এবং আপনার গবেষণা উৎপাদনশীলতাকে সর্বাধিক করতে লক্ষ্য রাখে।
### 🔬 এজেন্ট ল্যাবরেটরি কীভাবে কাজ করে?
- এজেন্ট ল্যাবরেটরি তিনটি প্রধান পর্যায় নিয়ে গঠিত যা পদ্ধতিগতভাবে গবেষণা প্রক্রিয়াকে নির্দেশ করে: (১) সাহিত্য পর্যালোচনা, (২) পরীক্ষা, এবং (৩) প্রতিবেদন লেখা। প্রতিটি পর্যায়ে, LLM দ্বারা চালিত বিশেষায়িত এজেন্টরা পৃথক লক্ষ্য অর্জনের জন্য সহযোগিতা করে, ফলাফল অপ্টিমাইজ করার জন্য arXiv, Hugging Face, Python এবং LaTeX এর মত বহিরাগত সরঞ্জামগুলিকে সংহত করে। এই কাঠামোবদ্ধ ওয়ার্কফ্লো প্রাসঙ্গিক গবেষণা পত্রের স্বাধীন সংগ্রহ এবং বিশ্লেষণ দিয়ে শুরু হয়, সহযোগিতামূলক পরিকল্পনা এবং তথ্য প্রস্তুতির মাধ্যমে অগ্রসর হয়, এবং স্বয়ংক্রিয় পরীক্ষণ এবং বিস্তৃত প্রতিবেদন তৈরিতে শেষ হয়। এই পর্যায়গুলির জুড়ে নির্দিষ্ট এজেন্ট ভূমিকা এবং তাদের অবদান সম্পর্কে বিস্তারিত গবেষণাপত্রে আলোচনা করা হয়েছে।
## 🖥️ ইনস্টলেশন
### পাইথন venv বিকল্প
1. **GitHub রিপোজিটরি ক্লোন করুন**: কমান্ডটি ব্যবহার করে রিপোজিটরিটি ক্লোন করা শুরু করুন:
```bash
git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git
```
2. **পাইথন পরিবেশ সেট আপ এবং সক্রিয় করুন**
```bash
python -m venv venv_agent_lab
```
- এখন এই পরিবেশটি সক্রিয় করুন:
```bash
source venv_agent_lab/bin/activate
```
3. **প্রয়োজনীয় লাইব্রেরিগুলি ইনস্টল করুন**
```bash
pip install -r requirements.txt
```
4. **pdflatex ইনস্টল করুন [ঐচ্ছিক]**
```bash
sudo apt install pdflatex
```
- এটি এজেন্ট দ্বারা ল্যাটেক্স সোর্স কম্পাইল করা সক্ষম করে।
- **[গুরুত্বপূর্ণ]** যদি sudo অ্যাক্সেস না থাকার কারণে এই ধাপটি চালানো না যায়, তাহলে --compile_latex ফ্ল্যাগটি false এ সেট করে এজেন্ট ল্যাবরেটরি চালিয়ে pdf কম্পাইলিং বন্ধ করা যেতে পারে: --compile_latex=False
5. **এখন এজেন্ট ল্যাবরেটরি চালান!**
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA"
```
অথবা, যদি আপনি pdflatex ইনস্টল না করে থাকেন
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False
```
-----
## গবেষণার ফলাফল উন্নত করার টিপস
#### [টিপ #১] 📝 ব্যাপক নোট লেখার বিষয়টি নিশ্চিত করুন! 📝
**ব্যাপক নোট লেখা গুরুত্বপূর্ণ** কারণ এটি আপনার এজেন্টকে আপনার প্রকল্পে আপনি কী অর্জন করতে চাইছেন তা বোঝাতে এবং যে কোনও স্টাইল পছন্দ রয়েছে তা বুঝতে সাহায্য করে। নোটগুলিতে যে কোনও পরীক্ষা আপনি এজেন্টদের সম্পাদন করতে চান, API কী সরবরাহ করা, আপনি যে নির্দিষ্ট প্লট বা চিত্র অন্তর্ভুক্ত করতে চান, অথবা গবেষণা পরিচালনা করার সময় এজেন্টকে যা কিছু জানাতে চান তা অন্তর্ভুক্ত থাকতে পারে।
এটি এছাড়াও আপনার সুযোগ আপনার এজেন্টকে জানানোর **কোন কম্পিউট সম্পদগুলিতে এটি প্রবেশাধিকার রয়েছে**, উদাহরণস্বরূপ, GPUs (কতগুলো, কোন ধরণের GPU, কতগুলো GB), CPUs (কতগুলো কোর, কোন ধরণের CPU), স্টোরেজ সীমাবদ্ধতা, এবং হার্ডওয়্যার স্পেসিফিকেশন।
নোট যুক্ত করার জন্য, আপনাকে ai_lab_repo.py এর ভিতরে task_notes_LLM গঠনটি পরিবর্তন করতে হবে। নীচে কিছু পরীক্ষার জন্য ব্যবহৃত নোটগুলির একটি উদাহরণ দেওয়া হল।
```python
task_notes_LLM = [
{"phases": ["plan formulation"],
"note": f"You should come up with a plan for TWO experiments."},
{"phases": ["plan formulation", "data preparation", "running experiments"],
"note": "Please use gpt-4o-mini for your experiments."},
{"phases": ["running experiments"],
"note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"},
{"phases": ["running experiments"],
"note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."},
{"phases": ["running experiments"],
"note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."},
{"phases": ["data preparation", "running experiments"],
"note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"},
{"phases": ["data preparation", "running experiments"],
"note": "Generate figures with very colorful and artistic design."},
]
```
--------
#### [টিপ #২] 🚀 আরও শক্তিশালী মডেলগুলি সাধারণত আরও ভাল গবেষণার দিকে নিয়ে যায় 🚀
গবেষণা পরিচালনার সময়, **মডেলের নির্বাচন ফলাফলের গুণমানকে উল্লেখযোগ্যভাবে প্রভাবিত করতে পারে**। আরও শক্তিশালী মডেলগুলির সাধারণত উচ্চতর নির্ভুলতা, উন্নত যুক্তিবিদ্যা ক্ষমতা, এবং উন্নত প্রতিবেদন তৈরির ক্ষমতা থাকে। যদি গণনামূলক সম্পদ অনুমতি দেয়, তাহলে o1-(mini/preview) বা অনুরূপ অত্যাধুনিক বড় ভাষা মডেলগুলির মতো উন্নত মডেলগুলির ব্যবহারে অগ্রাধিকার দিন।
তবে, **কর্মক্ষমতা এবং ব্যয়-কার্যকারিতা মধ্যে ভারসাম্য বজায় রাখা গুরুত্বপূর্ণ**। শক্তিশালী মডেলগুলি যদিও ভাল ফলাফল দিতে পারে, তবে এগুলি প্রায়শই চালাতে বেশি ব্যয়বহুল এবং সময়সাপেক্ষ হয়। সেগুলি নির্বাচিতভাবে ব্যবহার করার কথা বিবেচনা করুন—উদাহরণস্বরূপ, মূল পরীক্ষাগুলি বা চূড়ান্ত বিশ্লেষণের জন্য—অব iterativeative কাজ বা প্রাথমিক প্রোটোটাইপিংয়ের জন্য ছোট, আরও দক্ষ মডেলগুলির উপর নির্ভর করে।
যখন সম্পদ সীমিত থাকে, **আপনার নির্দিষ্ট ডেটাসেটে ছোট মডেলগুলিকে সূক্ষ্ম-সংশোধন করে বা কার্য-নির্দিষ্ট প্রম্পটগুলির সাথে পূর্ব-প্রশিক্ষিত মডেলগুলিকে সংযোজন করে কর্মক্ষমতা এবং গণনামূলক দক্ষতার মধ্যে কাঙ্ক্ষিত ভারসাম্য অর্জন করুন**।
-----
#### [টিপ #৩] ✅ আপনি চেকপয়েন্টগুলি থেকে পূর্ববর্তী সেভগুলি লোড করতে পারেন ✅
**যদি আপনি অগ্রগতি হারান, ইন্টারনেট সংযোগ হারান, বা যদি একটি উপ-কাজ ব্যর্থ হয়, তবে আপনি সর্বদা পূর্ববর্তী অবস্থান থেকে লোড করতে পারেন।** আপনার সমস্ত অগ্রগতি ডিফল্টভাবে state_saves ভেরিয়েবলে সংরক্ষিত থাকে, যা প্রতিটি পৃথক চেকপয়েন্ট সংরক্ষণ করে। ai_lab_repo.py চালানোর সময় কেবল নিম্নলিখিত আর্গুমেন্টগুলি প্রদান করুন
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH"
```
-----
#### [টিপ #৪] 🈯 আপনি যদি ইংরেজির বাইরে অন্য কোনো ভাষায় চালাচ্ছেন 🈲
আপনি যদি এজেন্ট ল্যাবরেটরি ইংরেজির বাইরে অন্য কোনো ভাষায় চালাচ্ছেন, সমস্যা নেই, কেবল নিশ্চিত করুন যে আপনি এজেন্টদের আপনার পছন্দের ভাষায় গবেষণা সম্পাদনের জন্য একটি ভাষা ফ্ল্যাগ সরবরাহ করেছেন। লক্ষ্য করুন যে আমরা অন্যান্য ভাষায় এজেন্ট ল্যাবরেটরি চালানোর ব্যাপকভাবে অধ্যয়ন করি নি, তাই আপনি যে কোনও সমস্যা সম্মুখীন হলে তা রিপোর্ট করতে ভুলবেন না।
উদাহরণস্বরূপ, আপনি যদি চীনা ভাষায় চালাচ্ছেন:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文"
```
----
#### [টিপ #৫] 🌟 উন্নতির জন্য অনেক জায়গা রয়েছে 🌟
এই কোডবেস উন্নত করার জন্য অনেক সুযোগ রয়েছে, তাই আপনি যদি পরিবর্তন করতে পারেন এবং কমিউনিটির সাহায্য করতে চান, তবে দয়া করে আপনার করা পরিবর্তনগুলি ভাগ করতে দ্বিধা করবেন না! আমরা আশা করি এই টুলটি আপনাকে সাহায্য করবে!
## রেফারেন্স / Bibtex
```bibtex
@preprint{schmidgall2025AgentLaboratory,
title={Agent Laboratory: Using LLM Agents as Research Assistants},
author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad},
year={2025}
}
```
================================================
FILE: readme/README-chinese.md
================================================
# Agent Laboratory: 使用大型语言模型代理作为研究助理
【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】
【🌐 网站 | 💻 软件 | 🎥 视频 | 📚 示例论文 | 📰 引用】
## 📖 概述
- **Agent Laboratory** 是一个端到端的自主研究工作流程,旨在协助**您**作为人类研究人员**实现您的研究想法**。Agent Laboratory 由由大型语言模型驱动的专业代理组成,支持您完成整个研究工作流程——从进行文献综述和制定计划,到执行实验和撰写综合报告。
- 该系统并非旨在取代您的创造力,而是为了补充它,使您能够专注于创意和批判性思维,同时自动化重复且耗时的任务,如编码和文档编写。通过适应不同水平的计算资源和人类参与,Agent Laboratory 旨在加速科学发现并优化您的研究生产力。
### 🔬 Agent Laboratory 如何工作?
- Agent Laboratory 包含三个主要阶段,系统地引导研究过程:(1)文献综述,(2)实验,(3)报告撰写。在每个阶段,由大型语言模型驱动的专业代理协作完成不同的目标,整合了如 arXiv、Hugging Face、Python 和 LaTeX 等外部工具以优化结果。这一结构化的工作流程始于独立收集和分析相关研究论文,经过协作计划和数据准备,最终实现自动化实验和综合报告生成。论文中讨论了具体代理角色及其在这些阶段的贡献。
## 🖥️ 安装
### Python 虚拟环境选项
1. **克隆 GitHub 仓库**:首先使用以下命令克隆仓库:
```bash
git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git
```
2. **设置并激活 Python 环境**
```bash
python -m venv venv_agent_lab
```
- 现在激活此环境:
```bash
source venv_agent_lab/bin/activate
```
3. **安装所需库**
```bash
pip install -r requirements.txt
```
4. **安装 pdflatex [可选]**
```bash
sudo apt install pdflatex
```
- 这使得代理能够编译 latex 源代码。
- **[重要]** 如果由于没有 sudo 权限而无法运行此步骤,可以通过将 `--compile_latex` 标志设置为 false 来关闭 pdf 编译:`--compile_latex=False`
5. **现在运行 Agent Laboratory!**
`python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA"`
或者,如果您没有安装 pdflatex
`python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False`
-----
## 提高研究成果的技巧
#### [技巧 #1] 📝 确保写下详尽的笔记! 📝
**写下详尽的笔记非常重要**,帮助您的代理理解您在项目中希望实现的目标,以及任何风格偏好。笔记可以包括您希望代理执行的任何实验、提供 API 密钥、希望包含的特定图表或图形,或任何您希望代理在进行研究时了解的内容。
这也是您让代理知道**它可以访问的计算资源**的机会,例如 GPU(数量、类型、内存大小)、CPU(核心数量、类型)、存储限制和硬件规格。
为了添加笔记,您必须修改 `ai_lab_repo.py` 中的 `task_notes_LLM` 结构。以下是我们的一些实验中使用的笔记示例。
```
task_notes_LLM = [
{"phases": ["plan formulation"],
"note": f"You should come up with a plan for TWO experiments."},
{"phases": ["plan formulation", "data preparation", "running experiments"],
"note": "Please use gpt-4o-mini for your experiments."},
{"phases": ["running experiments"],
"note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"},
{"phases": ["running experiments"],
"note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."},
{"phases": ["running experiments"],
"note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."},
{"phases": ["data preparation", "running experiments"],
"note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"},
{"phases": ["data preparation", "running experiments"],
"note": "Generate figures with very colorful and artistic design."},
]
```
--------
#### [技巧 #2] 🚀 使用更强大的模型通常会带来更好的研究 🚀
在进行研究时,**模型的选择会显著影响结果的质量**。更强大的模型往往具有更高的准确性、更好的推理能力和更优秀的报告生成能力。如果计算资源允许,优先使用先进的模型,如 o1-(mini/preview) 或类似的最先进大型语言模型。
然而,**在性能和成本效益之间取得平衡也很重要**。虽然强大的模型可能会产生更好的结果,但它们通常更昂贵且运行时间更长。考虑选择性地使用它们,例如用于关键实验或最终分析,同时在迭代任务或初步原型设计中依赖较小、更高效的模型。
当资源有限时,**通过在您的特定数据集上微调较小的模型或将预训练模型与特定任务的提示相结合来优化,以实现性能与计算效率之间的理想平衡**。
-----
#### [技巧 #3] ✅ 您可以从检查点加载之前的保存 ✅
**如果您丢失了进度、互联网连接中断或子任务失败,您始终可以从先前的状态加载。** 您的所有进度默认保存在 `state_saves` 变量中,该变量存储每个单独的检查点。只需在运行 `ai_lab_repo.py` 时传递以下参数
`python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH"`
-----
#### [技巧 #4] 🈯 如果您使用非英语语言运行 🈲
如果您使用非英语语言运行 Agent Laboratory,没问题,只需确保向代理提供一个语言标志,以便用您喜欢的语言进行研究。请注意,我们尚未广泛研究使用其他语言运行 Agent Laboratory,因此请务必报告您遇到的任何问题。
例如,如果您使用中文运行:
`python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文"`
----
#### [技巧 #5] 🌟 还有很大的改进空间 🌟
这个代码库还有很大的改进空间,因此如果您进行了更改并希望帮助社区,请随时分享您所做的更改!我们希望这个工具对您有帮助!
## 参考文献 / Bibtex
```bibtex
@preprint{schmidgall2025AgentLaboratory,
title={Agent Laboratory: Using LLM Agents as Research Assistants},
author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad},
year={2025}
}
```
================================================
FILE: readme/README-farsi.md
================================================
# آزمایشگاه ایجینت ها: استفاده از نمایندگان مدلهای زبانی بزرگ به عنوان دستیار برای محققان
【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】
【🌐 Website | 💻 Software | 🎥 Video | 📚 Example Paper | 📰 Citation】
## 📖 نمای کلی
- **آزمایشگاه ایجینت ها** یک سیستم کاملا اتوماتیک برای کارهای تحقیقاتی است که به منظور کمک به **شما** به عنوان پژوهشگر انسانی برای **اجرای ایدههای تحقیقاتی خود** طراحی شده است. آزمایشگاه ایجینت ها شامل نمایندگان تخصصی است که توسط مدلهای زبان بزرگ هدایت میشوند تاتا در تمام مراحل تحقیق از انجام مطالعه و تدوین برنامهها تا اجرای آزمایشها و نوشتن گزارشهای جامع از شما حمایت کنند.
- این سیستم برای جایگزینی خلاقیت شما طراحی نشده است، بلکه برای تکمیل آن است، به شما این امکان را میدهد که بر ایدهپردازی و تفکر انتقادی تمرکز کنید در حالی که وظایف تکراری و زمانبر مانند کدنویسی و مستندسازی خودکار میشوند. با پذیرش سطوح مختلف منابع محاسباتی و مشارکت انسانی، آزمایشگاه ایجنت ها هدف دارد تا کشف علمی را تسریع کرده و بهرهوری تحقیقاتی شما را بهینه کند.
### 🔬 آزمایشگاه ایجنت ها چگونه کار میکند؟
- آزمایشگاه ایجنت ها شامل سه مرحله اصلی است که به طور سیستماتیک فرآیند تحقیق را هدایت میکنند: (1) مرور ادبیات، (2) آزمایشگری، و (3) نوشتن گزارش. در هر مرحله، عوامل تخصصی هدایتشده توسط مدلهای زبان بزرگ با هم همکاری میکنند تا اهداف متمایز را محقق کنند و ابزارهای خارجی مانند arXiv، Hugging Face، Python، و LaTeX را برای بهینهسازی نتایج ادغام میکنند. این جریان کاری ساختاریافته با جمعآوری و تحلیل مستقل مقالات تحقیقاتی مرتبط آغاز میشود، از طریق برنامهریزی مشارکتی و آمادهسازی دادهها پیش میرود، و به آزمایشگری خودکار و تولید گزارش جامع منتهی میشود. جزئیات نقشهای خاص عوامل و مشارکتهای آنها در این مراحل در مقاله مورد بحث قرار گرفته است.
## 🖥️ نصب
### گزینه محیط مجازی پایتون (venv)
1. **کلون کردن مخزن گیتهاب**: با استفاده از دستور زیر، مخزن را کلون کنید:
```bash
git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git
```
2. **تنظیم و فعالسازی محیط پایتون**
```bash
python -m venv venv_agent_lab
```
- این محیط را فعال کنید:
```bash
source venv_agent_lab/bin/activate
```
3. **نصب کتابخانههای مورد نیاز**
```bash
pip install -r requirements.txt
```
4. **نصب pdflatex [اختیاری]**
```bash
sudo apt install pdflatex
```
- این امکان را میدهد تا منبع LaTeX توسط عوامل کامپایل شود.
- **[مهم]** اگر به دلیل نداشتن دسترسی sudo نمیتوانید این مرحله را اجرا کنید، میتوانید کامپایل PDF را با اجرای آزمایشگاه ایجنت ها و تنظیم فلگ --compile_latex به false غیرفعال کنید:
```
--compile_latex=False
```
5. **اکنون آزمایشگاه ایجنت ها را اجرا کنید!**
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA"
```
یا اگر pdflatex نصب نکردهاید:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False
```
-----
## نکات برای نتایج بهتر تحقیق
#### [نکته #1] 📝 حتماً یادداشتهای گستردهای بنویسید! 📝
**نوشتن یادداشتهای دقیق مهم است** تا به ایجنت ها شما در درک آنچه میخواهید در پروژهتان انجام دهید و همچنین هرگونه ترجیحات سبک کمک کند. یادداشتها میتوانند شامل هر آزمایشی باشند که میخواهید عوامل انجام دهند، ارائه کلیدهای API، نمودارها یا شکلهای خاصی که میخواهید گنجانده شوند، یا هر چیزی که میخواهید ایجنت ها هنگام انجام تحقیق بداند.
این همچنین فرصت شماست تا به ایجنت ها اطلاع دهید **به چه منابع محاسباتی دسترسی دارد**، مثلاً GPUها (تعداد، نوع GPU، میزان GB)، CPUها (تعداد هسته، نوع CPUها)، محدودیتهای ذخیرهسازی، و مشخصات سختافزاری.
برای افزودن یادداشتها، باید ساختار task_notes_LLM را در داخل ai_lab_repo.py تغییر دهید. در زیر نمونهای از مجموعه یادداشتهایی که برای برخی از آزمایشهای ما استفاده شده است ارائه شده است.
```python
task_notes_LLM = [
{"phases": ["plan formulation"],
"note": f"You should come up with a plan for TWO experiments."},
{"phases": ["plan formulation", "data preparation", "running experiments"],
"note": "Please use gpt-4o-mini for your experiments."},
{"phases": ["running experiments"],
"note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"},
{"phases": ["running experiments"],
"note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."},
{"phases": ["running experiments"],
"note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."},
{"phases": ["data preparation", "running experiments"],
"note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"},
{"phases": ["data preparation", "running experiments"],
"note": "Generate figures with very colorful and artistic design."},
]
```
--------
#### [نکته #2] 🚀 استفاده از مدلهای قدرتمندتر به طور کلی منجر به تحقیقات بهتر میشود 🚀
هنگام انجام تحقیقات، **انتخاب مدل میتواند به طور قابل توجهی بر کیفیت نتایج تأثیر بگذارد**. مدلهای قدرتمندتر معمولاً دقت بالاتری دارند، قابلیتهای استدلال بهتری ارائه میدهند و گزارشهای بهتری تولید میکنند. اگر منابع محاسباتی اجازه میدهد، استفاده از مدلهای پیشرفته مانند o1-(mini/preview) یا مدلهای زبان بزرگ مشابه پیشرفته را در اولویت قرار دهید.
با این حال، **مهم است که تعادل بین عملکرد و هزینه را رعایت کنید**. در حالی که مدلهای قدرتمند ممکن است نتایج بهتری ارائه دهند، اغلب هزینهبر و زمانبر هستند. در نظر بگیرید که از آنها به صورت انتخابی استفاده کنید — به عنوان مثال، برای آزمایشهای کلیدی یا تحلیلهای نهایی — در حالی که برای وظایف تکراری یا نمونهسازی اولیه از مدلهای کوچکتر و کارآمدتر استفاده کنید.
وقتی منابع محدود هستند، **با تنظیم دقیق مدلهای کوچکتر بر روی مجموعه دادههای خاص خود یا ترکیب مدلهای پیشآموزشدیده با پرامپتهای خاص وظیفهای بهینهسازی کنید** تا تعادل مطلوب بین عملکرد و کارایی محاسباتی را به دست آورید.
-----
#### [نکته #3] ✅ میتوانید ذخیرههای قبلی را از نقاط بازگشت بارگذاری کنید ✅
**اگر پیشرفت خود را از دست دادید، اتصال اینترنت قطع شد، یا یک زیروظیفه شکست خورد، همیشه میتوانید از وضعیت قبلی بارگذاری کنید.** تمام پیشرفتهای شما به طور پیشفرض در متغیر state_saves ذخیره میشوند که هر نقطه بازگشت را ذخیره میکند. فقط هنگام اجرای ai_lab_repo.py از آرگومانهای زیر استفاده کنید:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH"
```
-----
#### [نکته #4] 🈯 اگر به زبانی غیر از انگلیسی اجرا میکنید 🈲
اگر آزمایشگاه ایحنت ها را به زبانی غیر از انگلیسی اجرا میکنید، مشکلی نیست، فقط مطمئن شوید که پرچم زبان را به عوامل ارائه دهید تا به زبان مورد نظر شما تحقیق انجام دهند. توجه داشته باشید که ما به طور گستردهای اجرای آزمایشگاه ایجنت ها را به زبانهای دیگر مطالعه نکردهایم، بنابراین حتماً هر مشکلی که با آن مواجه شدید را گزارش دهید.
برای مثال، اگر به زبان چینی اجرا میکنید:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文"
```
----
#### [نکته #5] 🌟 جای پیشرفت زیادی وجود دارد 🌟
جای پیشرفت زیادی برای بهبود این کدبیس وجود دارد، بنابراین اگر در نهایت تغییراتی ایجاد کردید و میخواهید به جامعه کمک کنید، لطفاً تغییراتی که ایجاد کردهاید را به اشتراک بگذارید! امیدواریم این ابزار به شما کمک کند!
## مراجع / Bibtex
```bibtex
@preprint{schmidgall2025AgentLaboratory,
title={Agent Laboratory: Using LLM Agents as Research Assistants},
author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad},
year={2025}
}
```
================================================
FILE: readme/README-filipino.md
================================================
# Agent Laboratory: Paggamit ng LLM Agents bilang mga Tagapag-Asistang Pang-research
【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】
【🌐 Website | 💻 Software | 🎥 Video | 📚 Example Paper | 📰 Citation】
## 📖 Pangkalahatang-ideya
- **Agent Laboratory** ay isang end-to-end na autonomous na workflow sa pananaliksik na nilalayong tulungan **ikaw** bilang isang human researcher sa **pagpapatupad ng iyong mga ideya sa pananaliksik**. Binubuo ang Agent Laboratory ng mga espesyalistang ahente na pinapagana ng malalaking modelo ng wika upang suportahan ka sa buong workflow ng pananaliksik—mula sa pagsasagawa ng mga pagsusuri sa literatura at pagbuo ng mga plano hanggang sa pagpapatupad ng mga eksperimento at pagsulat ng komprehensibong mga ulat.
- Ang sistemang ito ay hindi dinisenyo upang palitan ang iyong pagkamalikhain kundi upang kumpletuhin ito, na nagbibigay-daan sa iyo na magpokus sa ideasyon at kritikal na pag-iisip habang ina-automate ang mga paulit-ulit at matagal na gawain tulad ng pag-cocode at dokumentasyon. Sa pamamagitan ng pag-aakma sa iba't ibang antas ng computational na mga mapagkukunan at partisipasyon ng tao, layunin ng Agent Laboratory na pabilisin ang siyentipikong pagtuklas at i-optimize ang iyong produktibidad sa pananaliksik.
### 🔬 Paano gumagana ang Agent Laboratory?
- Binubuo ang Agent Laboratory ng tatlong pangunahing yugto na sistematikong ginagabayan ang proseso ng pananaliksik: (1) Pagsusuri ng Literatura, (2) Eksperimentasyon, at (3) Pagsulat ng Ulat. Sa bawat yugto, ang mga espesyalistang ahente na pinapagana ng LLMs ay nagtutulungan upang makamit ang mga natatanging layunin, na nag-iintegrate ng mga panlabas na kagamitan tulad ng arXiv, Hugging Face, Python, at LaTeX upang i-optimize ang mga resulta. Nagsisimula ang estrukturadong workflow na ito sa malayang koleksyon at pagsusuri ng mga kaugnay na papel sa pananaliksik, sumusulong sa pamamagitan ng kolaboratibong pagpaplano at paghahanda ng datos, at nagreresulta sa automated na eksperimento at komprehensibong paggawa ng ulat. Ang mga detalye tungkol sa mga tiyak na papel ng ahente at kanilang mga kontribusyon sa mga yugtong ito ay tinalakay sa papel.
## 🖥️ Pag-install
### Python venv na opsyon
1. **I-clone ang GitHub Repository**: Magsimula sa pamamagitan ng pag-clone ng repository gamit ang utos:
```bash
git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git
```
2. **I-set up at I-activate ang Python Environment**
```bash
python -m venv venv_agent_lab
```
- Ngayon i-activate ang environment na ito:
```bash
source venv_agent_lab/bin/activate
```
3. **I-install ang mga kinakailangang library**
```bash
pip install -r requirements.txt
```
4. **I-install ang pdflatex [OPTIONAL]**
```bash
sudo apt install pdflatex
```
- Pinapayagan nitong ma-compile ng mga ahente ang latex source.
- **[MAHALAGA]** Kung hindi maisagawa ang hakbang na ito dahil sa kawalan ng sudo access, maaaring i-off ang pdf compiling sa pamamagitan ng pagpapatakbo ng Agent Laboratory gamit ang pag-set ng `--compile_latex` flag sa false:
```bash
--compile_latex=False
```
5. **Ngayon patakbuhin ang Agent Laboratory!**
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA"
```
o, kung wala kang naka-install na pdflatex
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False
```
-----
## Mga Tip para sa Mas Mabuting Resulta ng Pananaliksik
#### [Tip #1] 📝 Tiyaking sumulat ng malawak na mga tala! 📝
**Mahalaga ang pagsusulat ng malawak na mga tala** upang matulungan ang iyong ahente na maunawaan kung ano ang nais mong makamit sa iyong proyekto, pati na rin ang anumang mga paboritong estilo. Maaaring kabilang sa mga tala ang anumang mga eksperimento na nais mong isagawa ng mga ahente, pagbibigay ng mga API key, tiyak na mga plot o figure na nais mong isama, o anumang nais mong malaman ng ahente kapag nagsasagawa ng pananaliksik.
Ito rin ang iyong pagkakataon upang ipaalam sa ahente **kung anong mga compute resources ang mayroon ito**, halimbawa, GPUs (ilan, anong uri ng GPU, ilang GBs), CPUs (ilang cores, anong uri ng CPUs), mga limitasyon sa storage, at mga specs ng hardware.
Upang magdagdag ng mga tala, kailangan mong baguhin ang `task_notes_LLM` na istraktura sa loob ng `ai_lab_repo.py`. Ibinigay sa ibaba ang isang halimbawa ng mga tala na ginamit para sa ilan sa aming mga eksperimento.
```python
task_notes_LLM = [
{"phases": ["plan formulation"],
"note": f"You should come up with a plan for TWO experiments."},
{"phases": ["plan formulation", "data preparation", "running experiments"],
"note": "Please use gpt-4o-mini for your experiments."},
{"phases": ["running experiments"],
"note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"},
{"phases": ["running experiments"],
"note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."},
{"phases": ["running experiments"],
"note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."},
{"phases": ["data preparation", "running experiments"],
"note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"},
{"phases": ["data preparation", "running experiments"],
"note": "Generate figures with very colorful and artistic design."},
]
```
--------
#### [Tip #2] 🚀 Ang paggamit ng mas malalakas na mga modelo ay karaniwang nagdudulot ng mas magagandang pananaliksik 🚀
Kapag nagsasagawa ng pananaliksik, **ang pagpili ng modelo ay maaaring malaki ang epekto sa kalidad ng mga resulta**. Ang mas malalakas na mga modelo ay karaniwang may mas mataas na katumpakan, mas mahusay na kakayahan sa pag-iisip, at mas magaling na paggawa ng ulat. Kung pinapayagan ng mga computational na mapagkukunan, bigyang prioridad ang paggamit ng mga advanced na modelo tulad ng o1-(mini/preview) o katulad na mga state-of-the-art na malalaking modelo ng wika.
Gayunpaman, **mahalagang balansehin ang pagganap at pagiging cost-effective**. Habang ang mga malalakas na modelo ay maaaring magbigay ng mas magagandang resulta, madalas silang mas mahal at mas matagal patakbuhin. Isaalang-alang ang paggamit ng mga ito nang selektibo—halimbawa, para sa mga pangunahing eksperimento o panghuling pagsusuri—habang umaasa sa mas maliit, mas mahusay na mga modelo para sa mga iteratibong gawain o paunang prototyping.
Kapag limitado ang mga mapagkukunan, **i-optimize sa pamamagitan ng fine-tuning ng mas maliliit na mga modelo** sa iyong partikular na dataset o pagsasama ng mga pre-trained na modelo sa mga task-specific na prompt upang makamit ang nais na balanse sa pagitan ng pagganap at computational na kahusayan.
-----
#### [Tip #3] ✅ Maaari kang mag-load ng mga naunang save mula sa mga checkpoint ✅
**Kung mawalan ka ng progreso, koneksyon sa internet, o kung mabigo ang isang subtask, maaari mong laging i-load mula sa isang naunang estado.** Ang lahat ng iyong progreso ay naka-save bilang default sa `state_saves` variable, na nag-iimbak ng bawat indibidwal na checkpoint. Ibigay lamang ang mga sumusunod na argumento kapag nagpapatakbo ng `ai_lab_repo.py`:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH"
```
-----
#### [Tip #4] 🈯 Kung ikaw ay nagpapatakbo sa isang wika maliban sa Ingles 🈲
Kung nagpapatakbo ka ng Agent Laboratory sa isang wika maliban sa Ingles, walang problema, siguraduhing magbigay ng language flag sa mga ahente upang magsagawa ng pananaliksik sa iyong nais na wika. Tandaan na hindi pa namin lubusang pinag-aralan ang pagpapatakbo ng Agent Laboratory sa ibang mga wika, kaya siguraduhing iulat ang anumang mga problemang iyong makaharap.
Halimbawa, kung nagpapatakbo ka sa Chinese:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文"
```
----
#### [Tip #5] 🌟 Mayroong maraming puwang para sa pagpapabuti 🌟
Mayroong maraming puwang upang mapabuti ang codebase na ito, kaya kung ikaw ay gagawa ng mga pagbabago at nais makatulong sa komunidad, huwag mag-atubiling ibahagi ang mga pagbabagong iyong ginawa! Inaasahan naming makakatulong ang tool na ito sa iyo!
## Reference / Bibtex
```bibtex
@preprint{schmidgall2025AgentLaboratory,
title={Agent Laboratory: Using LLM Agents as Research Assistants},
author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad},
year={2025}
}
```
================================================
FILE: readme/README-french.md
================================================
# Laboratoire d'Agent : Utilisation des agents LLM comme assistants de recherche
【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】
【🌐 Site Web | 💻 Logiciel | 🎥 Vidéo | 📚 Article Exemple | 📰 Citation】
## 📖 Aperçu
- **Laboratoire d'Agent** est un flux de travail de recherche autonome de bout en bout destiné à vous assister en tant que chercheur humain dans **la mise en œuvre de vos idées de recherche**. Le Laboratoire d'Agent est composé d'agents spécialisés alimentés par de grands modèles de langage pour vous soutenir tout au long du processus de recherche—de la réalisation des revues de littérature et de la formulation de plans à l'exécution des expériences et à la rédaction de rapports complets.
- Ce système n'est pas conçu pour remplacer votre créativité, mais pour la compléter, vous permettant de vous concentrer sur l’idéation et la pensée critique tout en automatisant les tâches répétitives et chronophages telles que la programmation et la documentation. En s'adaptant à différents niveaux de ressources informatiques et d'implication humaine, le Laboratoire d'Agent vise à accélérer la découverte scientifique et à optimiser votre productivité en recherche.
### 🔬 Comment fonctionne le Laboratoire d'Agent ?
- Le Laboratoire d'Agent se compose de trois phases principales qui guident systématiquement le processus de recherche : (1) Revue de littérature, (2) Expérimentation et (3) Rédaction de rapports. Pendant chaque phase, des agents spécialisés alimentés par des LLM collaborent pour atteindre des objectifs distincts, en intégrant des outils externes tels qu'arXiv, Hugging Face, Python et LaTeX afin d'optimiser les résultats. Ce flux de travail structuré commence par la collecte et l'analyse indépendantes des articles de recherche pertinents, progresse par la planification collaborative et la préparation des données, et aboutit à l'expérimentation automatisée et à la génération de rapports complets. Les détails sur les rôles spécifiques des agents et leurs contributions au cours de ces phases sont abordés dans l'article.
## 🖥️ Installation
### Option d'environnement virtuel Python
1. **Cloner le dépôt GitHub** : Commencez par cloner le dépôt en utilisant la commande :
```bash
git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git
```
2. **Configurer et activer l'environnement Python**
```bash
python -m venv venv_agent_lab
```
- Activez maintenant cet environnement :
```bash
source venv_agent_lab/bin/activate
```
3. **Installer les bibliothèques requises**
```bash
pip install -r requirements.txt
```
4. **Installer pdflatex [OPTIONNEL]**
```bash
sudo apt install pdflatex
```
- Cela permet aux agents de compiler le code source LaTeX.
- **[IMPORTANT]** Si cette étape ne peut pas être exécutée en raison de l'absence d'accès sudo, la compilation PDF peut être désactivée en exécutant le Laboratoire d'Agent avec le drapeau `--compile_latex` défini sur `false` : `--compile_latex=False`
5. **Lancez maintenant le Laboratoire d'Agent !**
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "VOTRE IDÉE DE RECHERCHE"
```
ou, si vous n'avez pas installé pdflatex
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "VOTRE IDÉE DE RECHERCHE" --compile_latex=False
```
-----
## Conseils pour de meilleurs résultats de recherche
#### [Conseil n°1] 📝 Assurez-vous de prendre des notes détaillées ! 📝
**Prendre des notes détaillées est important** pour aider votre agent à comprendre ce que vous cherchez à accomplir dans votre projet, ainsi que toute préférence de style. Les notes peuvent inclure les expériences que vous souhaitez que les agents réalisent, la fourniture de clés API, certains graphiques ou figures que vous souhaitez inclure, ou tout ce que vous souhaitez que l'agent sache lors de la réalisation de recherches.
C'est également votre opportunité d'informer l'agent **quelles ressources informatiques il peut utiliser**, par exemple les GPU (combien, quel type de GPU, combien de Go), les CPU (combien de cœurs, quel type de CPU), les limitations de stockage et les spécifications matérielles.
Pour ajouter des notes, vous devez modifier la structure `task_notes_LLM` à l'intérieur de `ai_lab_repo.py`. Ci-dessous, un exemple de jeu de notes utilisé pour certaines de nos expériences.
```python
task_notes_LLM = [
{"phases": ["plan formulation"],
"note": f"You should come up with a plan for TWO experiments."},
{"phases": ["plan formulation", "data preparation", "running experiments"],
"note": "Please use gpt-4o-mini for your experiments."},
{"phases": ["running experiments"],
"note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"},
{"phases": ["running experiments"],
"note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."},
{"phases": ["running experiments"],
"note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."},
{"phases": ["data preparation", "running experiments"],
"note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"},
{"phases": ["data preparation", "running experiments"],
"note": "Generate figures with very colorful and artistic design."},
]
```
--------
#### [Conseil n°2] 🚀 Utiliser des modèles plus puissants conduit généralement à une meilleure recherche 🚀
Lors de la conduite de recherches, **le choix du modèle peut avoir un impact significatif sur la qualité des résultats**. Les modèles plus puissants ont tendance à avoir une précision plus élevée, de meilleures capacités de raisonnement et une meilleure génération de rapports. Si les ressources informatiques le permettent, privilégiez l'utilisation de modèles avancés tels que o1-(mini/preview) ou d'autres grands modèles de langage à la pointe de la technologie.
Cependant, **il est important de trouver un équilibre entre performance et rentabilité**. Bien que les modèles puissants puissent donner de meilleurs résultats, ils sont souvent plus coûteux et plus longs à exécuter. Envisagez de les utiliser de manière sélective—par exemple, pour des expériences clés ou des analyses finales—tout en comptant sur des modèles plus petits et plus efficaces pour des tâches itératives ou du prototypage initial.
Lorsque les ressources sont limitées, **optimisez en affinant des modèles plus petits** sur votre jeu de données spécifique ou en combinant des modèles pré-entraînés avec des invites spécifiques à la tâche afin d'atteindre l'équilibre souhaité entre performance et efficacité computationnelle.
-----
#### [Conseil n°3] ✅ Vous pouvez charger des sauvegardes précédentes depuis des points de contrôle ✅
**Si vous perdez des progrès, la connexion Internet ou si une sous-tâche échoue, vous pouvez toujours charger à partir d'un état précédent.** Tous vos progrès sont enregistrés par défaut dans la variable `state_saves`, qui stocke chaque point de contrôle individuel. Il vous suffit de passer les arguments suivants lors de l'exécution de `ai_lab_repo.py`
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH"
```
-----
#### [Conseil n°4] 🈯 Si vous utilisez une langue autre que l'anglais 🈲
Si vous exécutez le Laboratoire d'Agent dans une langue autre que l'anglais, pas de problème, assurez-vous simplement de fournir un drapeau de langue aux agents pour effectuer des recherches dans votre langue préférée. Notez que nous n'avons pas étudié de manière approfondie l'exécution du Laboratoire d'Agent dans d'autres langues, alors assurez-vous de signaler tout problème que vous rencontrez.
Par exemple, si vous utilisez le chinois :
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文"
```
----
#### [Conseil n°5] 🌟 Il y a beaucoup de place pour l'amélioration 🌟
Il y a beaucoup de possibilités d'améliorer cette base de code, donc si vous finissez par apporter des modifications et souhaitez aider la communauté, n'hésitez pas à partager les changements que vous avez effectués ! Nous espérons que cet outil vous sera utile !
## Référence / Bibtex
```bibtex
@preprint{schmidgall2025AgentLaboratory,
title={Agent Laboratory: Using LLM Agents as Research Assistants},
author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad},
year={2025}
}
```
================================================
FILE: readme/README-hindi.md
================================================
# एजेंट लैबोरेटरी: अनुसंधान सहायकों के रूप में LLM एजेंटों का उपयोग
【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】
【🌐 Website | 💻 Software | 🎥 Video | 📚 Example Paper | 📰 Citation】
## 📖 अवलोकन
- **एजेंट लैबोरेटरी** एक अंत-से-अंत स्वायत्त अनुसंधान कार्यप्रवाह है जिसे **आप** को मानव शोधकर्ता के रूप में **अपने अनुसंधान विचारों को लागू करने** में सहायता करने के लिए डिज़ाइन किया गया है। एजेंट लैबोरेटरी में बड़े भाषा मॉडल द्वारा संचालित विशेषीकृत एजेंट शामिल हैं जो आपको संपूर्ण अनुसंधान कार्यप्रवाह के माध्यम से समर्थन करते हैं—साहित्य समीक्षा करने और योजनाएँ बनाने से लेकर प्रयोगों को निष्पादित करने और व्यापक रिपोर्ट लिखने तक।
- यह प्रणाली आपकी रचनात्मकता को बदलने के लिए नहीं बल्कि इसे पूरा करने के लिए डिज़ाइन की गई है, जिससे आप विचार-विमर्श और महत्वपूर्ण सोच पर ध्यान केंद्रित कर सकते हैं, जबकि कोडिंग और दस्तावेजीकरण जैसे दोहराए जाने वाले और समय-गहन कार्यों को स्वचालित किया जाता है। विभिन्न स्तर के संगणनात्मक संसाधनों और मानव भागीदारी को समायोजित करके, एजेंट लैबोरेटरी वैज्ञानिक खोज को तेज करने और आपके अनुसंधान उत्पादकता को अनुकूलित करने का लक्ष्य रखता है।
### 🔬 एजेंट लैबोरेटरी कैसे काम करता है?
- एजेंट लैबोरेटरी तीन मुख्य चरणों से मिलकर बनता है जो अनुसंधान प्रक्रिया का व्यवस्थित रूप से मार्गदर्शन करते हैं: (1) साहित्य समीक्षा, (2) प्रयोग, और (3) रिपोर्ट लेखन। प्रत्येक चरण के दौरान, LLM द्वारा संचालित विशेषीकृत एजेंट विशिष्ट उद्देश्यों को प्राप्त करने के लिए सहयोग करते हैं, परिणामों को अनुकूलित करने के लिए arXiv, Hugging Face, Python, और LaTeX जैसे बाहरी उपकरणों को एकीकृत करते हैं। यह संरचित कार्यप्रवाह संबंधित अनुसंधान पत्रों के स्वतंत्र संग्रह और विश्लेषण से शुरू होता है, सहयोगात्मक योजना और डेटा तैयारी के माध्यम से प्रगति करता है, और स्वचालित प्रयोग और व्यापक रिपोर्ट जनरेशन में समाप्त होता है। इन चरणों में विशिष्ट एजेंट भूमिकाओं और उनके योगदान के विवरण पेपर में चर्चा किए गए हैं।
## 🖥️ स्थापना
### Python venv विकल्प
1. **GitHub रिपॉजिटरी क्लोन करें**: रिपॉजिटरी को क्लोन करना शुरू करें निम्न कमांड का उपयोग करके:
```bash
git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git
```
2. **पायथन पर्यावरण सेटअप और सक्रिय करें**
```bash
python -m venv venv_agent_lab
```
- अब इस पर्यावरण को सक्रिय करें:
```bash
source venv_agent_lab/bin/activate
```
3. **आवश्यक पुस्तकालय स्थापित करें**
```bash
pip install -r requirements.txt
```
4. **pdflatex स्थापित करें [वैकल्पिक]**
```bash
sudo apt install pdflatex
```
- यह एजेंटों द्वारा latex स्रोत को संकलित करने में सक्षम बनाता है।
- **[महत्वपूर्ण]** यदि इस चरण को sudo एक्सेस न होने के कारण नहीं चलाया जा सकता है, तो Agent Laboratory को --compile_latex फ्लैग को false सेट करके pdf संकलन बंद किया जा सकता है: `--compile_latex=False`
5. **अब Agent Laboratory चलाएं!**
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA"
```
या, यदि आपने pdflatex स्थापित नहीं किया है:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False
```
-----
## बेहतर अनुसंधान परिणामों के लिए सुझाव
#### [सुझाव #1] 📝 विस्तृत नोट्स लिखना सुनिश्चित करें! 📝
**विस्तृत नोट्स लिखना महत्वपूर्ण है** ताकि आपका एजेंट समझ सके कि आप अपने प्रोजेक्ट में क्या हासिल करना चाहते हैं, साथ ही किसी भी शैली की प्राथमिकताएँ। नोट्स में उन किसी भी प्रयोगों को शामिल किया जा सकता है जिन्हें आप एजेंटों से करने के लिए चाहते हैं, API कुंजी प्रदान करना, कुछ प्लॉट या आकृतियाँ शामिल करना, या कुछ भी जिसे आप अनुसंधान करते समय एजेंट को जानना चाहते हैं।
यह आपका अवसर भी है कि एजेंट को बताएं **कौन से कंप्यूट संसाधनों तक उसकी पहुंच है**, जैसे GPUs (कितने, किस प्रकार के GPU, कितने GBs), CPUs (कितने कोर, किस प्रकार के CPUs), स्टोरेज सीमाएँ, और हार्डवेयर स्पेसिफिकेशन।
नोट्स जोड़ने के लिए, आपको ai_lab_repo.py के अंदर task_notes_LLM संरचना को संशोधित करना होगा। नीचे हमारे कुछ प्रयोगों के लिए उपयोग किए गए नोट्स का एक उदाहरण दिया गया है।
```python
task_notes_LLM = [
{"phases": ["plan formulation"],
"note": f"You should come up with a plan for TWO experiments."},
{"phases": ["plan formulation", "data preparation", "running experiments"],
"note": "Please use gpt-4o-mini for your experiments."},
{"phases": ["running experiments"],
"note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"},
{"phases": ["running experiments"],
"note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."},
{"phases": ["running experiments"],
"note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."},
{"phases": ["data preparation", "running experiments"],
"note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"},
{"phases": ["data preparation", "running experiments"],
"note": "Generate figures with very colorful and artistic design."},
]
```
--------
#### [सुझाव #2] 🚀 अधिक शक्तिशाली मॉडल का उपयोग सामान्यतः बेहतर अनुसंधान की ओर ले जाता है 🚀
अनुसंधान करते समय, **मॉडल का चयन परिणामों की गुणवत्ता पर महत्वपूर्ण प्रभाव डाल सकता है**। अधिक शक्तिशाली मॉडल आमतौर पर उच्च सटीकता, बेहतर तर्क क्षमताओं, और बेहतर रिपोर्ट जनरेशन प्रदान करते हैं। यदि संगणनात्मक संसाधन अनुमति देते हैं, तो o1-(mini/preview) या इसी तरह के अत्याधुनिक बड़े भाषा मॉडल जैसे उन्नत मॉडलों के उपयोग को प्राथमिकता दें।
हालांकि, **प्रदर्शन और लागत-प्रभावशीलता के बीच संतुलन बनाना महत्वपूर्ण है**। जबकि शक्तिशाली मॉडल बेहतर परिणाम दे सकते हैं, उन्हें चलाने में अक्सर अधिक खर्च और समय लगता है। उन्हें चयनात्मक रूप से उपयोग करने पर विचार करें—उदाहरण के लिए, मुख्य प्रयोगों या अंतिम विश्लेषणों के लिए—जबकि पुनरावृत्त कार्यों या प्रारंभिक प्रोटोटाइपिंग के लिए छोटे, अधिक कुशल मॉडलों पर निर्भर रहें।
जब संसाधन सीमित हों, **अपने विशिष्ट डेटासेट पर छोटे मॉडलों को फाइन-ट्यून करके या कार्य-विशिष्ट प्रॉम्प्ट के साथ पूर्व-प्रशिक्षित मॉडलों को मिलाकर प्रदर्शन और संगणनात्मक दक्षता के बीच वांछित संतुलन प्राप्त करें**।
-----
#### [सुझाव #3] ✅ आप चेकपॉइंट से पिछले सहेजनों को लोड कर सकते हैं ✅
**यदि आप प्रगति खो देते हैं, इंटरनेट कनेक्शन खोते हैं, या कोई उपकार्य विफल हो जाता है, तो आप हमेशा पिछले स्थिति से लोड कर सकते हैं।** आपकी सभी प्रगति डिफ़ॉल्ट रूप से state_saves वेरिएबल में सहेजी जाती है, जो प्रत्येक व्यक्तिगत चेकपॉइंट को संग्रहीत करता है। बस ai_lab_repo.py चलाते समय निम्नलिखित तर्क पास करें:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH"
```
-----
#### [सुझाव #4] 🈯 यदि आप अंग्रेजी के अलावा किसी अन्य भाषा में चला रहे हैं 🈲
यदि आप एजेंट लैबोरेटरी को अंग्रेजी के अलावा किसी अन्य भाषा में चला रहे हैं, तो कोई समस्या नहीं है, बस सुनिश्चित करें कि एजेंटों को आपके पसंदीदा भाषा में अनुसंधान करने के लिए एक भाषा फ्लैग प्रदान करें। ध्यान दें कि हमने अन्य भाषाओं में एजेंट लैबोरेटरी चलाने का व्यापक अध्ययन नहीं किया है, इसलिए किसी भी समस्या का सामना करने पर रिपोर्ट करना सुनिश्चित करें।
उदाहरण के लिए, यदि आप चीनी में चला रहे हैं:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文"
```
----
#### [सुझाव #5] 🌟 सुधार के लिए बहुत गुंजाइश है 🌟
इस कोडबेस में सुधार की बहुत गुंजाइश है, इसलिए यदि आप अंततः परिवर्तन करते हैं और समुदाय की मदद करना चाहते हैं, तो कृपया आप जो परिवर्तन किए हैं उन्हें साझा करने में संकोच न करें! हमें उम्मीद है कि यह उपकरण आपकी मदद करेगा!
## संदर्भ / Bibtex
```bibtex
@preprint{schmidgall2025AgentLaboratory,
title={Agent Laboratory: Using LLM Agents as Research Assistants},
author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad},
year={2025}
}
```
================================================
FILE: readme/README-italian.md
================================================
# Laboratorio Agenti: Utilizzo di Agenti LLM come Assistenti di Ricerca
【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】
【🌐 Sito web | 💻 Software | 🎥 Video | 📚 Documento di esempio | 📰 Citazione】
## 📖 Panoramica
- **Agent Laboratory** è un flusso di lavoro di ricerca autonomo end-to-end progettato per assistere **te** come ricercatore umano nell'**implementazione delle tue idee di ricerca**. Agent Laboratory è composto da agenti specializzati guidati da grandi modelli linguistici per supportarti durante l'intero flusso di lavoro di ricerca—dalla conduzione di revisioni della letteratura e formulazione di piani all'esecuzione di esperimenti e alla scrittura di rapporti completi.
- Questo sistema non è progettato per sostituire la tua creatività ma per complementarla, permettendoti di concentrarti sull'ideazione e il pensiero critico mentre automatizza compiti ripetitivi e che richiedono tempo come la codifica e la documentazione. Accomodando diversi livelli di risorse computazionali e coinvolgimento umano, Agent Laboratory mira ad accelerare la scoperta scientifica e ottimizzare la tua produttività di ricerca.
### 🔬 Come funziona Agent Laboratory?
- Agent Laboratory è composto da tre fasi principali che guidano sistematicamente il processo di ricerca: (1) Revisione della letteratura, (2) Sperimentazione e (3) Scrittura del rapporto. Durante ogni fase, agenti specializzati guidati da LLM collaborano per raggiungere obiettivi distinti, integrando strumenti esterni come arXiv, Hugging Face, Python e LaTeX per ottimizzare i risultati. Questo flusso di lavoro strutturato inizia con la raccolta e analisi indipendente di documenti di ricerca pertinenti, prosegue attraverso la pianificazione collaborativa e la preparazione dei dati, e si conclude con la sperimentazione automatizzata e la generazione di rapporti completi. I dettagli sui ruoli specifici degli agenti e i loro contributi in queste fasi sono discussi nel documento.
## 🖥️ Installazione
### Opzione Python venv
1. **Clona il Repository GitHub**: Inizia clonando il repository usando il comando:
```bash
git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git
```
2. **Configura e Attiva l'Ambiente Python**
```bash
python -m venv venv_agent_lab
```
- Ora attiva questo ambiente:
```bash
source venv_agent_lab/bin/activate
```
3. **Installa le librerie richieste**
```bash
pip install -r requirements.txt
```
4. **Installa pdflatex [OPZIONALE]**
```bash
sudo apt install pdflatex
```
- Questo permette agli agenti di compilare il codice sorgente LaTeX.
- **[IMPORTANTE]** Se questo passaggio non può essere eseguito a causa della mancanza di accesso sudo, la compilazione del pdf può essere disattivata eseguendo Agent Laboratory impostando il flag --compile_latex su false: --compile_latex=False
5. **Ora esegui Agent Laboratory!**
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA"
```
oppure, se non hai installato pdflatex
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False
```
-----
## Consigli per migliori risultati di ricerca
#### [Consiglio #1] 📝 Assicurati di scrivere appunti dettagliati! 📝
**Scrivere appunti dettagliati è importante** per aiutare il tuo agente a comprendere cosa intendi realizzare nel tuo progetto, nonché eventuali preferenze di stile. Gli appunti possono includere qualsiasi esperimento che desideri che gli agenti eseguano, fornire chiavi API, determinati grafici o figure che desideri includere, o qualsiasi cosa tu voglia che l'agente sappia durante la ricerca.
Questa è anche la tua opportunità di far sapere all'agente **a quali risorse computazionali ha accesso**, ad esempio GPU (quante, che tipo di GPU, quanti GB), CPU (quanti core, che tipo di CPU), limitazioni di archiviazione e specifiche hardware.
Per aggiungere appunti, devi modificare la struttura task_notes_LLM all'interno di ai_lab_repo.py. Di seguito è fornito un esempio di set di appunti utilizzati per alcuni dei nostri esperimenti.
```python
task_notes_LLM = [
{"phases": ["plan formulation"],
"note": f"You should come up with a plan for TWO experiments."},
{"phases": ["plan formulation", "data preparation", "running experiments"],
"note": "Please use gpt-4o-mini for your experiments."},
{"phases": ["running experiments"],
"note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"},
{"phases": ["running experiments"],
"note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."},
{"phases": ["running experiments"],
"note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."},
{"phases": ["data preparation", "running experiments"],
"note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"},
{"phases": ["data preparation", "running experiments"],
"note": "Generate figures with very colorful and artistic design."},
]
```
--------
#### [Consiglio #2] 🚀 Utilizzare modelli più potenti generalmente porta a migliori ricerche 🚀
Quando si conduce una ricerca, **la scelta del modello può influenzare significativamente la qualità dei risultati**. I modelli più potenti tendono ad avere una maggiore accuratezza, migliori capacità di ragionamento e una migliore generazione dei rapporti. Se le risorse computazionali lo consentono, dà priorità all'uso di modelli avanzati come o1-(mini/preview) o simili modelli linguistici di grandi dimensioni all'avanguardia.
Tuttavia, **è importante bilanciare le prestazioni e l'efficienza dei costi**. Sebbene i modelli potenti possano fornire risultati migliori, spesso sono più costosi e richiedono più tempo per essere eseguiti. Considera di usarli selettivamente—ad esempio, per esperimenti chiave o analisi finali—mentre ti affidi a modelli più piccoli ed efficienti per compiti iterativi o prototipazione iniziale.
Quando le risorse sono limitate, **ottimizza effettuando il fine-tuning di modelli più piccoli** sul tuo dataset specifico o combinando modelli pre-addestrati con prompt specifici per il compito per raggiungere l'equilibrio desiderato tra prestazioni ed efficienza computazionale.
-----
#### [Consiglio #3] ✅ Puoi caricare salvataggi precedenti dai checkpoint ✅
**Se perdi i progressi, la connessione a internet o se un sotto-compito fallisce, puoi sempre caricare da uno stato precedente.** Tutti i tuoi progressi vengono salvati di default nella variabile state_saves, che memorizza ogni singolo checkpoint. Basta passare i seguenti argomenti quando esegui ai_lab_repo.py
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH"
```
-----
#### [Consiglio #4] 🈯 Se stai utilizzando una lingua diversa dall'inglese 🈲
Se stai utilizzando Agent Laboratory in una lingua diversa dall'inglese, nessun problema, basta assicurarti di fornire un flag di lingua agli agenti per eseguire la ricerca nella tua lingua preferita. Nota che non abbiamo studiato approfonditamente l'utilizzo di Agent Laboratory in altre lingue, quindi assicurati di segnalare eventuali problemi che incontri.
Ad esempio, se stai utilizzando in cinese:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文"
```
----
#### [Consiglio #5] 🌟 C'è molto spazio per miglioramenti 🌟
C'è molto spazio per migliorare questo codice, quindi se alla fine apporti modifiche e vuoi aiutare la comunità, sentiti libero di condividere le modifiche che hai effettuato! Speriamo che questo strumento ti sia d'aiuto!
## Riferimenti / Bibtex
```bibtex
@preprint{schmidgall2025AgentLaboratory,
title={Agent Laboratory: Using LLM Agents as Research Assistants},
author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad},
year={2025}
}
```
================================================
FILE: readme/README-japanese.md
================================================
# Agent Laboratory: Using LLM Agents as Research Assistants
【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】
【🌐 Website | 💻 Software | 🎥 Video | 📚 Example Paper | 📰 Citation】
## 📖 概要
- **Agent Laboratory**は、**あなた**が**研究アイデアを実現する**ために支援するエンドツーエンドの自律的な研究ワークフローです。Agent Laboratoryは、大規模言語モデルによって駆動される専門のエージェントで構成されており、文献レビューの実施や計画の策定から実験の実行、包括的な報告書の作成まで、研究の全過程をサポートします。
- このシステムはあなたの創造性を置き換えるものではなく、補完するために設計されています。アイデアの創出や批判的思考に集中できるようにし、コーディングやドキュメント作成のような反復的で時間のかかる作業を自動化します。計算資源や人間の関与のレベルに応じて対応することで、Agent Laboratoryは科学的発見を加速し、研究の生産性を最適化することを目指しています。
### 🔬 Agent Laboratoryはどのように機能しますか?
- Agent Laboratoryは、研究プロセスを体系的に導く3つの主要なフェーズから構成されています:(1)文献レビュー、(2)実験、(3)報告書作成。各フェーズでは、LLMによって駆動される専門のエージェントが協力してそれぞれの目標を達成し、arXiv、Hugging Face、Python、LaTeXなどの外部ツールを統合して成果を最適化します。この構造化されたワークフローは、関連する研究論文の独立した収集と分析から始まり、協力的な計画とデータ準備を経て、自動化された実験と包括的な報告書の生成に至ります。これらのフェーズ全体にわたる具体的なエージェントの役割と貢献の詳細は論文で説明されています。
## 🖥️ インストール
### Python venv オプション
1. **GitHubリポジトリをクローンする**: 以下のコマンドを使用してリポジトリをクローンします:
```bash
git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git
```
2. **Python環境を設定してアクティベートする**
```bash
python -m venv venv_agent_lab
```
- 次に、この環境をアクティベートします:
```bash
source venv_agent_lab/bin/activate
```
3. **必要なライブラリをインストールする**
```bash
pip install -r requirements.txt
```
4. **pdflatexをインストールする [オプション]**
```bash
sudo apt install pdflatex
```
- これにより、エージェントがLaTeXソースをコンパイルできるようになります。
- **[重要]** sudo権限がないためにこのステップを実行できない場合、Agent Laboratoryを実行する際に --compile_latexフラグをfalseに設定してPDFのコンパイルをオフにすることができます: --compile_latex=False
5. **Agent Laboratoryを実行します!**
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA"
```
または、pdflatexがインストールされていない場合
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False
```
-----
## より良い研究成果を得るためのヒント
#### [ヒント #1] 📝 詳細なノートを書くことを忘れずに! 📝
**詳細なノートを書くことは重要です**。これにより、エージェントがプロジェクトで達成しようとしていることや、スタイルの好みを理解するのに役立ちます。ノートには、エージェントに実行してほしい実験、APIキーの提供、含めたい特定のプロットや図、研究を行う際にエージェントに知っておいてほしいことなどを含めることができます。
また、**エージェントがアクセスできる計算資源**を知らせる機会でもあります。例えば、GPU(数、種類、GB数)、CPU(コア数、種類)、ストレージの制限、ハードウェア仕様などです。
ノートを追加するには、ai_lab_repo.py内のtask_notes_LLM構造を変更する必要があります。以下に、いくつかの実験で使用されたノートの例を示します。
```python
task_notes_LLM = [
{"phases": ["plan formulation"],
"note": f"You should come up with a plan for TWO experiments."},
{"phases": ["plan formulation", "data preparation", "running experiments"],
"note": "Please use gpt-4o-mini for your experiments."},
{"phases": ["running experiments"],
"note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"},
{"phases": ["running experiments"],
"note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."},
{"phases": ["running experiments"],
"note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."},
{"phases": ["data preparation", "running experiments"],
"note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"},
{"phases": ["data preparation", "running experiments"],
"note": "Generate figures with very colorful and artistic design."},
]
```
--------
#### [ヒント #2] 🚀 より強力なモデルを使用することで、一般的により良い研究が可能になります 🚀
研究を行う際、**モデルの選択は結果の質に大きな影響を与える可能性があります**。より強力なモデルは、通常、精度が高く、推論能力が優れており、報告書の生成も優れています。計算資源が許す場合は、o1-(mini/preview)などの先進的な大規模言語モデルの使用を優先してください。
ただし、**性能と費用対効果のバランスを取ることが重要です**。強力なモデルはより良い結果をもたらす可能性がありますが、実行には時間と費用がかかることが多いです。重要な実験や最終分析には選択的に使用し、反復作業や初期のプロトタイピングには小さく効率的なモデルを使用することを検討してください。
資源が限られている場合は、**小さなモデルを特定のデータセットでファインチューニングするか、タスク固有のプロンプトと組み合わせて使用することで、性能と計算効率の間の望ましいバランスを達成します**。
-----
#### [ヒント #3] ✅ チェックポイントから以前の保存をロードできます ✅
**進捗が失われた場合、インターネット接続が切れた場合、またはサブタスクが失敗した場合でも、以前の状態から常にロードできます。** すべての進捗はデフォルトでstate_saves変数に保存され、各チェックポイントが保存されます。ai_lab_repo.pyを実行する際に、以下の引数を渡すだけです
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH"
```
-----
#### [ヒント #4] 🈯 英語以外の言語で実行している場合 🈲
Agent Laboratoryを英語以外の言語で実行している場合でも問題ありません。エージェントが希望する言語で研究を行えるように、言語フラグを提供することを確認してください。Agent Laboratoryを他の言語で実行することについては十分に研究していないため、問題が発生した場合は必ず報告してください。
例えば、中国語で実行する場合:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文"
```
----
#### [ヒント #5] 🌟 改善の余地がたくさんあります 🌟
このコードベースには改善の余地がたくさんありますので、変更を加えてコミュニティに貢献したい場合は、ぜひ変更内容を共有してください!このツールが皆さんのお役に立つことを願っています!
## 参考文献 / Bibtex
```bibtex
@preprint{schmidgall2025AgentLaboratory,
title={Agent Laboratory: Using LLM Agents as Research Assistants},
author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad},
year={2025}
}
```
================================================
FILE: readme/README-korean.md
================================================
# Agent Laboratory: Using LLM Agents as Research Assistants
【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】
【🌐 Website | 💻 Software | 🎥 Video | 📚 Example Paper | 📰 Citation】
## 📖 개요
- **Agent Laboratory**는 **당신**이 인간 연구자로서 **연구 아이디어를 구현**할 수 있도록 지원하는 엔드 투 엔드 자율 연구 워크플로우입니다. Agent Laboratory는 대규모 언어 모델에 의해 구동되는 전문화된 에이전트들로 구성되어 문헌 검토 수행, 계획 수립, 실험 실행, 종합 보고서 작성에 이르기까지 전체 연구 워크플로우를 지원합니다.
- 이 시스템은 당신의 창의성을 대체하기 위해 설계된 것이 아니라 보완하기 위해 설계되었습니다. 아이디어 발상과 비판적 사고에 집중할 수 있도록 하면서 코딩 및 문서화와 같은 반복적이고 시간이 많이 소요되는 작업을 자동화합니다. 다양한 수준의 컴퓨팅 자원과 인간의 참여를 수용함으로써 Agent Laboratory는 과학적 발견을 가속화하고 연구 생산성을 최적화하는 것을 목표로 합니다.
### 🔬 Agent Laboratory는 어떻게 작동하나요?
- Agent Laboratory는 연구 과정을 체계적으로 안내하는 세 가지 주요 단계로 구성됩니다: (1) 문헌 검토, (2) 실험, (3) 보고서 작성. 각 단계 동안 LLM에 의해 구동되는 전문화된 에이전트들이 협력하여 개별 목표를 달성하며, arXiv, Hugging Face, Python, LaTeX와 같은 외부 도구를 통합하여 결과를 최적화합니다. 이 구조화된 워크플로우는 관련 연구 논문의 독립적인 수집 및 분석으로 시작하여, 협력적인 계획 수립 및 데이터 준비를 거쳐, 자동화된 실험 실행 및 종합적인 보고서 생성으로 이어집니다. 이러한 단계 전반에 걸친 특정 에이전트 역할과 기여에 대한 자세한 내용은 논문에서 논의됩니다.
## 🖥️ 설치
### Python venv 옵션
1. **GitHub 저장소 복제**: 다음 명령어를 사용하여 저장소를 복제합니다:
```bash
git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git
```
2. **Python 환경 설정 및 활성화**
```bash
python -m venv venv_agent_lab
```
- 이제 이 환경을 활성화합니다:
```bash
source venv_agent_lab/bin/activate
```
3. **필수 라이브러리 설치**
```bash
pip install -r requirements.txt
```
4. **pdflatex 설치 [옵션]**
```bash
sudo apt install pdflatex
```
- 이는 에이전트들이 LaTeX 소스를 컴파일할 수 있도록 합니다.
- **[중요]** sudo 접근 권한이 없어 이 단계를 실행할 수 없는 경우, --compile_latex 플래그를 false로 설정하여 Agent Laboratory 실행 시 PDF 컴파일을 비활성화할 수 있습니다: `--compile_latex=False`
5. **이제 Agent Laboratory를 실행하세요!**
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA"
```
또는, pdflatex가 설치되어 있지 않은 경우
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False
```
-----
## 더 나은 연구 결과를 위한 팁
#### [팁 #1] 📝 광범위한 노트를 작성하세요! 📝
**광범위한 노트 작성은** 에이전트가 프로젝트에서 달성하려는 목표와 스타일 선호도를 이해하는 데 중요합니다. 노트에는 에이전트에게 수행하도록 원하는 실험, API 키 제공, 포함하고 싶은 특정 플롯이나 그림, 또는 연구를 수행할 때 에이전트가 알아야 할 모든 내용을 포함할 수 있습니다.
또한, **에이전트가 접근할 수 있는 컴퓨팅 자원**을 알려줄 수 있는 기회이기도 합니다. 예를 들어 GPU (몇 개, 어떤 유형의 GPU, GB 수), CPU (코어 수, CPU 유형), 저장 한계 및 하드웨어 사양 등을 포함할 수 있습니다.
노트를 추가하려면, ai_lab_repo.py 내부의 `task_notes_LLM` 구조를 수정해야 합니다. 아래는 일부 실험에 사용된 노트의 예시입니다.
```python
task_notes_LLM = [
{"phases": ["plan formulation"],
"note": f"You should come up with a plan for TWO experiments."},
{"phases": ["plan formulation", "data preparation", "running experiments"],
"note": "Please use gpt-4o-mini for your experiments."},
{"phases": ["running experiments"],
"note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"},
{"phases": ["running experiments"],
"note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."},
{"phases": ["running experiments"],
"note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."},
{"phases": ["data preparation", "running experiments"],
"note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"},
{"phases": ["data preparation", "running experiments"],
"note": "Generate figures with very colorful and artistic design."},
]
```
--------
#### [팁 #2] 🚀 더 강력한 모델을 사용하는 것이 일반적으로 더 나은 연구로 이어집니다 🚀
연구를 수행할 때, **모델의 선택은 결과의 질에 상당한 영향을 미칠 수 있습니다**. 더 강력한 모델은 일반적으로 더 높은 정확도, 더 나은 추론 능력, 더 우수한 보고서 생성을 제공합니다. 컴퓨팅 자원이 허용한다면, o1-(mini/preview)와 같은 최첨단 대규모 언어 모델과 같은 고급 모델의 사용을 우선시하세요.
그러나, **성능과 비용 효율성의 균형을 맞추는 것이 중요합니다**. 강력한 모델은 더 나은 결과를 제공할 수 있지만, 실행하는 데 비용과 시간이 더 많이 소요되는 경우가 많습니다. 예를 들어, 핵심 실험이나 최종 분석에는 고급 모델을 선택적으로 사용하고, 반복 작업이나 초기 프로토타이핑에는 더 작고 효율적인 모델을 사용하는 것을 고려하세요.
자원이 제한된 경우, **작은 모델을 특정 데이터셋에 맞게 미세 조정하거나, 사전 훈련된 모델과 작업 특화 프롬프트를 결합하여 성능과 컴퓨팅 효율성 사이의 원하는 균형을 달성할 수 있습니다**.
-----
#### [팁 #3] ✅ 체크포인트에서 이전 저장 상태를 불러올 수 있습니다 ✅
**진행 상황을 잃었거나 인터넷 연결이 끊기거나 하위 작업이 실패한 경우, 이전 상태에서 항상 불러올 수 있습니다.** 모든 진행 상황은 기본적으로 `state_saves` 변수에 저장되며, 이는 각 개별 체크포인트를 저장합니다. ai_lab_repo.py를 실행할 때 다음 인수를 전달하면 됩니다.
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH"
```
-----
#### [팁 #4] 🈯 영어가 아닌 다른 언어로 실행하는 경우 🈲
Agent Laboratory를 영어가 아닌 다른 언어로 실행하는 경우, 문제 없습니다. 단, 에이전트가 선호하는 언어로 연구를 수행할 수 있도록 언어 플래그를 제공해야 합니다. 다른 언어로 Agent Laboratory를 실행하는 것에 대해 광범위하게 연구하지 않았으므로, 발생하는 문제를 반드시 보고해 주세요.
예를 들어, 중국어로 실행하는 경우:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文"
```
----
#### [팁 #5] 🌟 개선의 여지가 많습니다 🌟
이 코드베이스를 개선할 여지가 많으므로, 변경을 가하고 커뮤니티에 기여하고 싶다면, 변경한 사항을 자유롭게 공유해 주세요! 이 도구가 여러분에게 도움이 되길 바랍니다!
## 참고 문헌 / Bibtex
```bibtex
@preprint{schmidgall2025AgentLaboratory,
title={Agent Laboratory: Using LLM Agents as Research Assistants},
author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad},
year={2025}
}
```
================================================
FILE: readme/README-portugues.md
================================================
# Agent Laboratory: Usando Agentes LLM como Assistentes de Pesquisa
【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】
【🌐 Website | 💻 Software | 🎥 Video | 📚 Example Paper | 📰 Citation】
## 📖 Visão Geral
- **Agent Laboratory** é um fluxo de trabalho de pesquisa autônomo de ponta a ponta, destinado a auxiliar **você** como pesquisador humano na **implementação das suas ideias de pesquisa**. O Agent Laboratory consiste em agentes especializados movidos por grandes modelos de linguagem para apoiá-lo durante todo o fluxo de trabalho de pesquisa — desde a condução de revisões de literatura e formulação de planos até a execução de experimentos e a redação de relatórios abrangentes.
- Este sistema não foi projetado para substituir a sua criatividade, mas para complementá-la, permitindo que você se concentre na ideação e no pensamento crítico enquanto automatiza tarefas repetitivas e que consomem muito tempo, como codificação e documentação. Ao acomodar diferentes níveis de recursos computacionais e envolvimento humano, o Agent Laboratory visa acelerar a descoberta científica e otimizar a sua produtividade em pesquisa.
### 🔬 Como funciona o Agent Laboratory?
- O Agent Laboratory consiste em três fases principais que orientam sistematicamente o processo de pesquisa: (1) Revisão de Literatura, (2) Experimentação e (3) Redação de Relatórios. Durante cada fase, agentes especializados movidos por LLMs colaboram para alcançar objetivos distintos, integrando ferramentas externas como arXiv, Hugging Face, Python e LaTeX para otimizar os resultados. Este fluxo de trabalho estruturado começa com a coleta e análise independentes de artigos de pesquisa relevantes, avança através do planejamento colaborativo e preparação de dados, e resulta em experimentação automatizada e geração de relatórios abrangentes. Detalhes sobre os papéis específicos dos agentes e suas contribuições ao longo dessas fases são discutidos no artigo.
## 🖥️ Instalação
### Opção de ambiente virtual Python (venv)
1. **Clone o Repositório do GitHub**: Comece clonando o repositório usando o comando:
```bash
git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git
```
2. **Configure e Ative o Ambiente Python**
```bash
python -m venv venv_agent_lab
```
- Agora, ative este ambiente:
```bash
source venv_agent_lab/bin/activate
```
3. **Instale as bibliotecas necessárias**
```bash
pip install -r requirements.txt
```
4. **Instale o pdflatex [OPCIONAL]**
```bash
sudo apt install pdflatex
```
- Isso permite que o código LaTeX seja compilado pelos agentes.
- **[IMPORTANTE]** Se esta etapa não puder ser executada devido à falta de acesso sudo, a compilação de PDF pode ser desativada executando o Agent Laboratory com a flag --compile_latex definida como false: --compile_latex=False
5. **Agora execute o Agent Laboratory!**
```bash
python ai_lab_repo.py --api-key "API_KEY_AQUI" --llm-backend "o1-mini" --research-topic "SUA IDEIA DE PESQUISA"
```
ou, se você não tiver o pdflatex instalado
```bash
python ai_lab_repo.py --api-key "API_KEY_AQUI" --llm-backend "o1-mini" --research-topic "SUA IDEIA DE PESQUISA" --compile_latex=False
```
-----
## Dicas para melhores resultados de pesquisa
#### [Dica #1] 📝 Certifique-se de escrever notas extensas! 📝
**Escrever notas extensas é importante** para ajudar seu agente a entender o que você está tentando realizar em seu projeto, bem como quaisquer preferências de estilo. As notas podem incluir quaisquer experimentos que você deseja que os agentes realizem, fornecendo chaves de API, certos gráficos ou figuras que você deseja incluir, ou qualquer coisa que você queira que o agente saiba ao realizar a pesquisa.
Esta também é sua oportunidade de informar ao agente **a quais recursos de computação ele tem acesso**, por exemplo, GPUs (quantas, que tipo de GPU, quantos GBs), CPUs (quantos núcleos, que tipo de CPUs), limitações de armazenamento e especificações de hardware.
Para adicionar notas, você deve modificar a estrutura task_notes_LLM dentro de ai_lab_repo.py. Abaixo está um exemplo de conjunto de notas usadas em alguns de nossos experimentos.
```python
task_notes_LLM = [
{"phases": ["plan formulation"],
"note": f"You should come up with a plan for TWO experiments."},
{"phases": ["plan formulation", "data preparation", "running experiments"],
"note": "Please use gpt-4o-mini for your experiments."},
{"phases": ["running experiments"],
"note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"},
{"phases": ["running experiments"],
"note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."},
{"phases": ["running experiments"],
"note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."},
{"phases": ["data preparation", "running experiments"],
"note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"},
{"phases": ["data preparation", "running experiments"],
"note": "Generate figures with very colorful and artistic design."},
]
```
--------
#### [Dica #2] 🚀 Usar modelos mais poderosos geralmente leva a melhores pesquisas 🚀
Ao conduzir pesquisas, **a escolha do modelo pode impactar significativamente a qualidade dos resultados**. Modelos mais poderosos tendem a ter maior precisão, melhores capacidades de raciocínio e melhor geração de relatórios. Se os recursos computacionais permitirem, priorize o uso de modelos avançados como o1-(mini/preview) ou modelos de linguagem grandes de última geração similares.
No entanto, **é importante equilibrar desempenho e custo-benefício**. Embora modelos poderosos possam gerar melhores resultados, eles geralmente são mais caros e consomem mais tempo para serem executados. Considere usá-los seletivamente — por exemplo, para experimentos chave ou análises finais — enquanto confia em modelos menores e mais eficientes para tarefas iterativas ou prototipagem inicial.
Quando os recursos são limitados, **otimize ajustando modelos menores** no seu conjunto de dados específico ou combinando modelos pré-treinados com prompts específicos para a tarefa para alcançar o equilíbrio desejado entre desempenho e eficiência computacional.
-----
#### [Dica #3] ✅ Você pode carregar salvamentos anteriores a partir de checkpoints ✅
**Se você perder o progresso, conexão com a internet ou se uma subtarefa falhar, você sempre pode carregar a partir de um estado anterior.** Todo o seu progresso é salvo por padrão na variável state_saves, que armazena cada checkpoint individual. Basta passar os seguintes argumentos ao executar ai_lab_repo.py
```bash
python ai_lab_repo.py --api-key "API_KEY_AQUI" --research-topic "SUA IDEIA DE PESQUISA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH"
```
-----
#### [Dica #4] 🈯 Se você estiver executando em um idioma diferente do inglês 🈲
Se você estiver executando o Agent Laboratory em um idioma diferente do inglês, sem problema, apenas certifique-se de fornecer uma flag de idioma para que os agentes realizem a pesquisa no seu idioma preferido. Observe que não estudamos extensivamente a execução do Agent Laboratory em outros idiomas, portanto, certifique-se de relatar quaisquer problemas que encontrar.
Por exemplo, se você estiver executando em chinês:
```bash
python ai_lab_repo.py --api-key "API_KEY_AQUI" --research-topic "SUA IDEIA DE PESQUISA (no seu idioma)" --llm-backend "o1-mini" --language "中文"
```
----
#### [Dica #5] 🌟 Há muito espaço para melhorias 🌟
Há muito espaço para melhorar esta base de código, então se você acabar fazendo alterações e quiser ajudar a comunidade, sinta-se à vontade para compartilhar as mudanças que você fez! Esperamos que esta ferramenta lhe seja útil!
## Referência / Bibtex
```bibtex
@preprint{schmidgall2025AgentLaboratory,
title={Agent Laboratory: Using LLM Agents as Research Assistants},
author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad},
year={2025}
}
```
================================================
FILE: readme/README-russian.md
================================================
# Лаборатория Агентов: Использование агентов на основе больших языковых моделей в качестве научных ассистентов
【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】
【🌐 Веб-сайт | 💻 Программное обеспечение | 🎥 Видео | 📚 Пример статьи | 📰 Цитирование】
## 📖 Обзор
- **Лаборатория Агентов** — это автономный исследовательский процесс от начала до конца, предназначенный для помощи **вам** как человеческому исследователю в **реализации ваших исследовательских идей**. Лаборатория Агентов состоит из специализированных агентов, управляемых большими языковыми моделями, которые поддерживают вас на протяжении всего исследовательского процесса — от проведения обзора литературы и формулирования планов до выполнения экспериментов и написания подробных отчетов.
- Эта система не предназначена для замены вашего творчества, а дополняет его, позволяя вам сосредоточиться на генерации идей и критическом мышлении, одновременно автоматизируя повторяющиеся и времязатратные задачи, такие как кодирование и документирование. Адаптируясь к различным уровням вычислительных ресурсов и вовлеченности человека, Лаборатория Агентов стремится ускорить научные открытия и оптимизировать вашу исследовательскую продуктивность.
### 🔬 Как работает Лаборатория Агентов?
- Лаборатория Агентов состоит из трех основных фаз, которые систематически направляют исследовательский процесс: (1) Обзор литературы, (2) Экспериментирование и (3) Написание отчета. В каждой фазе специализированные агенты, управляемые большими языковыми моделями, сотрудничают для достижения отдельных целей, интегрируя внешние инструменты, такие как arXiv, Hugging Face, Python и LaTeX, для оптимизации результатов. Эта структурированная рабочая схема начинается с независимого сбора и анализа соответствующих научных работ, проходит через совместное планирование и подготовку данных и заканчивается автоматизированным проведением экспериментов и созданием подробных отчетов. Детали конкретных ролей агентов и их вклад на каждом этапе обсуждаются в статье.
## 🖥️ Установка
### Вариант с использованием Python venv
1. **Клонируйте репозиторий GitHub**: Начните с клонирования репозитория с помощью команды:
```bash
git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git
```
2. **Настройте и активируйте Python окружение**
```bash
python -m venv venv_agent_lab
```
- Теперь активируйте это окружение:
```bash
source venv_agent_lab/bin/activate
```
3. **Установите необходимые библиотеки**
```bash
pip install -r requirements.txt
```
4. **Установите pdflatex [ОПЦИОНАЛЬНО]**
```bash
sudo apt install pdflatex
```
- Это позволяет агентам компилировать исходный код LaTeX.
- **[ВАЖНО]** Если этот шаг невозможно выполнить из-за отсутствия прав sudo, можно отключить компиляцию pdf, запустив Лабораторию Агентов с флагом --compile_latex=False: --compile_latex=False
5. **Теперь запустите Лабораторию Агентов!**
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "ВАША ИССЛЕДОВАТЕЛЬСКАЯ ИДЕЯ"
```
или, если у вас не установлен pdflatex
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "ВАША ИССЛЕДОВАТЕЛЬСКАЯ ИДЕЯ" --compile_latex=False
```
-----
## Советы для лучших исследовательских результатов
#### [Совет №1] 📝 Обязательно записывайте подробные заметки! 📝
**Ведение подробных заметок важно** для того, чтобы ваш агент понимал, что вы хотите достичь в вашем проекте, а также любые предпочтения в стиле. Заметки могут включать любые эксперименты, которые вы хотите, чтобы агенты выполняли, предоставление API-ключей, определенные графики или фигуры, которые вы хотите включить, или любую информацию, которую вы хотите, чтобы агент знал при проведении исследований.
Это также ваша возможность сообщить агенту, **какие вычислительные ресурсы у него есть**, например, GPU (сколько, какой тип GPU, сколько GB), CPU (сколько ядер, какой тип CPU), ограничения по памяти и спецификации оборудования.
Чтобы добавить заметки, необходимо изменить структуру task_notes_LLM внутри файла ai_lab_repo.py. Ниже приведен пример набора заметок, использованных в некоторых наших экспериментах.
```python
task_notes_LLM = [
{"phases": ["plan formulation"],
"note": f"You should come up with a plan for TWO experiments."},
{"phases": ["plan formulation", "data preparation", "running experiments"],
"note": "Please use gpt-4o-mini for your experiments."},
{"phases": ["running experiments"],
"note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"},
{"phases": ["running experiments"],
"note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."},
{"phases": ["running experiments"],
"note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."},
{"phases": ["data preparation", "running experiments"],
"note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"},
{"phases": ["data preparation", "running experiments"],
"note": "Generate figures with very colorful and artistic design."},
]
```
--------
#### [Совет №2] 🚀 Использование более мощных моделей обычно приводит к лучшим исследованиям 🚀
При проведении исследований, **выбор модели может значительно повлиять на качество результатов**. Более мощные модели, как правило, имеют более высокую точность, лучшие способности к рассуждению и более качественное генерирование отчетов. Если вычислительные ресурсы позволяют, отдавайте предпочтение использованию продвинутых моделей, таких как o1-(mini/preview) или подобных современных больших языковых моделей.
Однако, **важно балансировать между производительностью и экономической эффективностью**. Хотя мощные модели могут давать лучшие результаты, они часто дороже и требуют больше времени для выполнения. Рассмотрите возможность использования их выборочно — например, для ключевых экспериментов или окончательных анализов — в то время как для итеративных задач или начального прототипирования полагайтесь на более маленькие и эффективные модели.
Когда ресурсы ограничены, **оптимизируйте, дорабатывая более маленькие модели** на вашем конкретном наборе данных или комбинируя предобученные модели с специфическими для задачи подсказками, чтобы достичь желаемого баланса между производительностью и вычислительной эффективностью.
-----
#### [Совет №3] ✅ Вы можете загрузить предыдущие сохранения из контрольных точек ✅
**Если вы потеряете прогресс, потеряете интернет-соединение или если подзадача завершится неудачей, вы всегда можете загрузить предыдущую версию.** Весь ваш прогресс сохраняется по умолчанию в переменной state_saves, которая хранит каждую отдельную контрольную точку. Просто передайте следующие аргументы при запуске ai_lab_repo.py
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "ВАША ИССЛЕДОВАТЕЛЬСКАЯ ИДЕЯ" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH"
```
-----
#### [Совет №4] 🈯 Если вы работаете на другом языке, кроме английского 🈲
Если вы запускаете Лабораторию Агентов на другом языке, кроме английского, это не проблема, просто убедитесь, что вы предоставили языковой флаг агентам для проведения исследований на предпочитаемом вами языке. Обратите внимание, что мы не проводили обширных исследований по запуску Лаборатории Агентов на других языках, поэтому обязательно сообщайте о любых возникающих проблемах.
Например, если вы работаете на китайском языке:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "ВАША ИССЛЕДОВАТЕЛЬСКАЯ ИДЕЯ (на вашем языке)" --llm-backend "o1-mini" --language "中文"
```
----
#### [Совет №5] 🌟 Есть много возможностей для улучшения 🌟
Есть много возможностей для улучшения этой кодовой базы, поэтому если вы внесете изменения и захотите помочь сообществу, пожалуйста, не стесняйтесь поделиться внесенными изменениями! Мы надеемся, что этот инструмент вам поможет!
## Ссылки / Bibtex
bibtex
```bibtex
@preprint{schmidgall2025AgentLaboratory,
title={Agent Laboratory: Using LLM Agents as Research Assistants},
author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad},
year={2025}
}
```
================================================
FILE: readme/README-slovak.md
================================================
# Agent Laboratory: Používanie LLM Agentov ako Výskumných Asistentov
【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】
【🌐 Webová stránka | 💻 Softvér | 🎥 Video | 📚 Príkladový článok | 📰 Citácia】
## 📖 Prehľad
- **Agent Laboratory** je autonómny výskumný pracovný postup od začiatku do konca, ktorý má za úlohu asistovať **vám** ako ľudskému výskumníkovi pri **realizácii vašich výskumných nápadov**. Agent Laboratory pozostáva zo špecializovaných agentov poháňaných veľkými jazykovými modelmi, ktorí vás podporujú počas celého výskumného procesu – od vykonávania literárnych prehľadov a formulovania plánov až po realizáciu experimentov a písanie komplexných správ.
- Tento systém nie je navrhnutý na nahradenie vašej kreativity, ale na jej doplnenie, čo vám umožňuje sústrediť sa na tvorivosť a kritické myslenie pri automatizácii opakujúcich sa a časovo náročných úloh, ako je kódovanie a dokumentácia. Tým, že zohľadňuje rôzne úrovne výpočtových zdrojov a ľudského zapojenia, Agent Laboratory má za cieľ urýchliť vedecké objavy a optimalizovať vašu výskumnú produktivitu.
### 🔬 Ako Agent Laboratory funguje?
- Agent Laboratory sa skladá z troch hlavných fáz, ktoré systematicky usmerňujú výskumný proces: (1) Literárny prehľad, (2) Experimentovanie a (3) Písanie správ. Počas každej fázy špecializovaní agenti poháňaní LLM spolupracujú na dosiahnutí konkrétnych cieľov, integrujúc externé nástroje ako arXiv, Hugging Face, Python a LaTeX na optimalizáciu výsledkov. Táto štruktúrovaná pracovná postupnosť začína nezávislým zhromažďovaním a analýzou relevantných výskumných prác, pokračuje cez kolaboratívne plánovanie a prípravu dát a končí automatizovaným experimentovaním a komplexnou generáciou správ. Podrobnosti o konkrétnych rolách agentov a ich príspevkoch v rámci týchto fáz sú diskutované v článku.
## 🖥️ Inštalácia
### Python venv možnosť
1. **Naklonujte GitHub repozitár**: Začnite klonovaním repozitára pomocou príkazu:
```bash
git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git
```
2. **Nastavte a aktivujte Python prostredie**
```bash
python -m venv venv_agent_lab
```
- Teraz aktivujte toto prostredie:
```bash
source venv_agent_lab/bin/activate
```
3. **Nainštalujte požadované knižnice**
```bash
pip install -r requirements.txt
```
4. **Nainštalujte pdflatex [VOLITEĽNÉ]**
```bash
sudo apt install pdflatex
```
- Toto umožňuje agentom kompilovať latex zdroj.
- **[DÔLEŽITÉ]** Ak tento krok nemôžete vykonať kvôli absencii sudo prístupu, kompiláciu pdf môžete vypnúť spustením Agent Laboratory s nastavením vlajky --compile_latex na false: `--compile_latex=False`
5. **Teraz spustite Agent Laboratory!**
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA"
```
alebo, ak nemáte nainštalovaný pdflatex
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False
```
-----
## Tipy pre lepšie výskumné výsledky
#### [Tip #1] 📝 Uistite sa, že píšete rozsiahle poznámky! 📝
**Písanie rozsiahlych poznámok je dôležité** pre pomoc vášmu agentovi pochopiť, čo sa snažíte dosiahnuť vo vašom projekte, ako aj akékoľvek preferencie štýlu. Poznámky môžu obsahovať akékoľvek experimenty, ktoré chcete, aby agenti vykonali, poskytovanie API kľúčov, určité grafy alebo figúry, ktoré chcete zahrnúť, alebo čokoľvek, čo chcete, aby agent vedel pri vykonávaní výskumu.
Je to tiež vaša príležitosť informovať agenta, **aké výpočtové zdroje má k dispozícii**, napr. GPU (koľko, aký typ GPU, koľko GB), CPU (koľko jadier, aký typ CPU), obmedzenia úložiska a hardvérové špecifikácie.
Aby ste pridali poznámky, musíte upraviť štruktúru `task_notes_LLM` v súbore `ai_lab_repo.py`. Nižšie je uvedený príklad sady poznámok použitých pre niektoré naše experimenty.
```python
task_notes_LLM = [
{"phases": ["plan formulation"],
"note": f"You should come up with a plan for TWO experiments."},
{"phases": ["plan formulation", "data preparation", "running experiments"],
"note": "Please use gpt-4o-mini for your experiments."},
{"phases": ["running experiments"],
"note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"},
{"phases": ["running experiments"],
"note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."},
{"phases": ["running experiments"],
"note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."},
{"phases": ["data preparation", "running experiments"],
"note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"},
{"phases": ["data preparation", "running experiments"],
"note": "Generate figures with very colorful and artistic design."},
]
```
--------
#### [Tip #2] 🚀 Používanie výkonnejších modelov zvyčajne vedie k lepšiemu výskumu 🚀
Pri vykonávaní výskumu môže **výber modelu významne ovplyvniť kvalitu výsledkov**. Výkonnejšie modely majú tendenciu mať vyššiu presnosť, lepšie schopnosti logického uvažovania a lepšiu generáciu správ. Ak výpočtové zdroje umožňujú, uprednostnite používanie pokročilých modelov, ako sú o1-(mini/preview) alebo podobné najmodernejšie veľké jazykové modely.
Avšak, **je dôležité nájsť rovnováhu medzi výkonom a nákladovou efektívnosťou**. Zatiaľ čo výkonnejšie modely môžu priniesť lepšie výsledky, často sú drahšie a časovo náročnejšie na spustenie. Zvážte ich selektívne používanie – napríklad pre kľúčové experimenty alebo konečné analýzy – zatiaľ čo na iteratívne úlohy alebo počiatočné prototypovanie sa spoliehajte na menšie, efektívnejšie modely.
Keď sú zdroje obmedzené, **optimalizujte jemným ladením menších modelov** na vašich špecifických dátach alebo kombinovaním predtrénovaných modelov s úlohovo špecifickými promptami, aby ste dosiahli požadovanú rovnováhu medzi výkonom a výpočtovou efektívnosťou.
-----
#### [Tip #3] ✅ Môžete načítať predchádzajúce uloženia z kontrolných bodov ✅
**Ak stratíte postup, internetové pripojenie alebo ak sa podúloha nepodarí, môžete vždy načítať z predchádzajúceho stavu.** Všetok váš postup je predvolene uložený v premennej `state_saves`, ktorá ukladá každý jednotlivý kontrolný bod. Stačí pri spúšťaní `ai_lab_repo.py` zadať nasledujúce argumenty:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH"
```
-----
#### [Tip #4] 🈯 Ak pracujete v inom jazyku než angličtine 🈲
Ak spúšťate Agent Laboratory v inom jazyku než v angličtine, nie je problém, stačí zabezpečiť, aby ste agentom poskytli jazykovú vlajku pre vykonávanie výskumu vo vašom preferovanom jazyku. Všimnite si, že sme neštudovali dôkladne spúšťanie Agent Laboratory v iných jazykoch, preto určite hláste akékoľvek problémy, na ktoré narazíte.
Napríklad, ak pracujete v čínštine:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文"
```
----
#### [Tip #5] 🌟 Je tu veľa priestoru na zlepšenie 🌟
Je tu veľa priestoru na zlepšenie tohto kódu, takže ak urobíte zmeny a chcete pomôcť komunite, neváhajte zdieľať zmeny, ktoré ste vykonali! Dúfame, že vám tento nástroj pomôže!
## Reference / Bibtex
```bibtex
@preprint{schmidgall2025AgentLaboratory,
title={Agent Laboratory: Using LLM Agents as Research Assistants},
author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad},
year={2025}
}
```
================================================
FILE: readme/README-spanish.md
================================================
# Agent Laboratory: Using LLM Agents as Research Assistants
【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】
【🌐 Sitio web | 💻 Software | 🎥 Video | 📚 Artículo de ejemplo | 📰 Citación】
## 📖 Overview
- **Agent Laboratory** es un flujo de trabajo de investigación autónomo de extremo a extremo diseñado para asistir **a ti** como investigador humano en **implementar tus ideas de investigación**. Agent Laboratory consiste en agentes especializados impulsados por grandes modelos de lenguaje para apoyarte a lo largo de todo el flujo de trabajo de investigación, desde la realización de revisiones bibliográficas y la formulación de planes hasta la ejecución de experimentos y la redacción de informes comprensivos.
- Este sistema no está diseñado para reemplazar tu creatividad, sino para complementarla, permitiéndote enfocarte en la ideación y el pensamiento crítico mientras automatiza tareas repetitivas y que consumen mucho tiempo, como la programación y la documentación. Al acomodar diferentes niveles de recursos computacionales e implicación humana, Agent Laboratory tiene como objetivo acelerar el descubrimiento científico y optimizar tu productividad en la investigación.
### 🔬 How does Agent Laboratory work?
- Agent Laboratory consta de tres fases principales que guían sistemáticamente el proceso de investigación: (1) Revisión de Literatura, (2) Experimentación y (3) Redacción de Informes. Durante cada fase, agentes especializados impulsados por LLM colaboran para lograr objetivos distintos, integrando herramientas externas como arXiv, Hugging Face, Python y LaTeX para optimizar los resultados. Este flujo de trabajo estructurado comienza con la recolección y análisis independiente de artículos de investigación relevantes, avanza a través de la planificación colaborativa y la preparación de datos, y culmina en la experimentación automatizada y la generación de informes comprensivos. Los detalles sobre roles específicos de los agentes y sus contribuciones a lo largo de estas fases se discuten en el documento.
## 🖥️ Installation
### Python venv option
1. **Clonar el Repositorio de GitHub**: Comienza clonando el repositorio usando el comando:
```bash
git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git
```
2. **Configurar y Activar el Entorno de Python**
```bash
python -m venv venv_agent_lab
```
- Ahora activa este entorno:
```bash
source venv_agent_lab/bin/activate
```
3. **Instalar las librerías requeridas**
```bash
pip install -r requirements.txt
```
4. **Instalar pdflatex [OPCIONAL]**
```bash
sudo apt install pdflatex
```
- Esto permite que las fuentes de LaTeX sean compiladas por los agentes.
- **[IMPORTANTE]** Si no puedes ejecutar este paso debido a la falta de acceso sudo, la compilación de PDF puede desactivarse ejecutando Agent Laboratory configurando la bandera `--compile_latex` a falso: `--compile_latex=False`
5. **¡Ahora ejecuta Agent Laboratory!**
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA"
```
o, si no tienes pdflatex instalado
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False
```
-----
## Consejos para mejores resultados de investigación
#### [Consejo #1] 📝 ¡Asegúrate de escribir notas extensas! 📝
**Escribir notas extensas es importante** para ayudar a tu agente a comprender lo que buscas lograr en tu proyecto, así como cualquier preferencia de estilo. Las notas pueden incluir cualquier experimento que desees que los agentes realicen, proporcionar claves de API, ciertos gráficos o figuras que quieras incluir, o cualquier cosa que quieras que el agente sepa al realizar la investigación.
Esta también es tu oportunidad para informar al agente **a qué recursos computacionales tiene acceso**, por ejemplo, GPUs (cuántas, qué tipo de GPU, cuántos GB), CPUs (cuántos núcleos, qué tipo de CPUs), limitaciones de almacenamiento y especificaciones de hardware.
Para agregar notas, debes modificar la estructura `task_notes_LLM` dentro de `ai_lab_repo.py`. A continuación se proporciona un ejemplo de conjunto de notas utilizadas en algunos de nuestros experimentos.
```python
task_notes_LLM = [
{"phases": ["plan formulation"],
"note": f"You should come up with a plan for TWO experiments."},
{"phases": ["plan formulation", "data preparation", "running experiments"],
"note": "Please use gpt-4o-mini for your experiments."},
{"phases": ["running experiments"],
"note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"},
{"phases": ["running experiments"],
"note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."},
{"phases": ["running experiments"],
"note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."},
{"phases": ["data preparation", "running experiments"],
"note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"},
{"phases": ["data preparation", "running experiments"],
"note": "Generate figures with very colorful and artistic design."},
]
```
--------
#### [Consejo #2] 🚀 ¡Usar modelos más potentes generalmente conduce a una mejor investigación! 🚀
Al realizar investigaciones, **la elección del modelo puede impactar significativamente la calidad de los resultados**. Los modelos más potentes tienden a tener mayor precisión, mejores capacidades de razonamiento y mejor generación de informes. Si los recursos computacionales lo permiten, prioriza el uso de modelos avanzados como o1-(mini/preview) o modelos de lenguaje grandes similares de última generación.
Sin embargo, **es importante equilibrar el rendimiento y la rentabilidad**. Aunque los modelos potentes pueden ofrecer mejores resultados, a menudo son más costosos y requieren más tiempo para ejecutarse. Considera usarlos de manera selectiva, por ejemplo, para experimentos clave o análisis finales, mientras confías en modelos más pequeños y eficientes para tareas iterativas o prototipos iniciales.
Cuando los recursos son limitados, **optimiza ajustando finamente modelos más pequeños** en tu conjunto de datos específico o combinando modelos preentrenados con prompts específicos para tareas para lograr el equilibrio deseado entre rendimiento y eficiencia computacional.
-----
#### [Consejo #3] ✅ Puedes cargar guardados anteriores desde puntos de control ✅
**Si pierdes progreso, la conexión a internet o si una subtarea falla, siempre puedes cargar desde un estado anterior.** Todo tu progreso se guarda por defecto en la variable `state_saves`, que almacena cada punto de control individual. Simplemente pasa los siguientes argumentos al ejecutar `ai_lab_repo.py`
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH"
```
-----
#### [Consejo #4] 🈯 Si estás ejecutando en un idioma que no sea inglés 🈲
Si estás ejecutando Agent Laboratory en un idioma que no sea inglés, no hay problema, solo asegúrate de proporcionar una bandera de idioma a los agentes para realizar la investigación en tu idioma preferido. Ten en cuenta que no hemos estudiado extensivamente la ejecución de Agent Laboratory en otros idiomas, así que asegúrate de reportar cualquier problema que encuentres.
Por ejemplo, si estás ejecutando en chino:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文"
```
----
#### [Consejo #5] 🌟 Hay mucho margen para mejorar 🌟
Hay mucho margen para mejorar esta base de código, así que si terminas haciendo cambios y quieres ayudar a la comunidad, ¡no dudes en compartir los cambios que has realizado! ¡Esperamos que esta herramienta te sea de ayuda!
## Referencia / Bibtex
```bibtex
@preprint{schmidgall2025AgentLaboratory,
title={Agent Laboratory: Using LLM Agents as Research Assistants},
author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad},
year={2025}
}
```
================================================
FILE: readme/README-turkish.md
================================================
# Agent Laboratuvarı: LLM Ajanlarını Araştırma Asistanı Olarak Kullanma
【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】
【🌐 Website | 💻 Software | 🎥 Video | 📚 Example Paper | 📰 Citation】
## 📖 Genel Bakış
- **Agent Laboratuvarı**, **araştırma fikirlerinizi uygulamanıza** yardımcı olmak amacıyla **siz** insan araştırmacıyı desteklemek için tasarlanmış uçtan uca otonom bir araştırma iş akışıdır. Agent Laboratuvarı, literatür taramaları yapmaktan planlar oluşturmaya, deneyler yürütmekten kapsamlı raporlar yazmaya kadar tüm araştırma süreci boyunca sizi desteklemek için büyük dil modelleriyle desteklenen uzman ajanlardan oluşur.
- Bu sistem, yaratıcılığınızı yerine koymak için değil, onu tamamlamak için tasarlanmıştır; böylece kodlama ve dokümantasyon gibi tekrarlayan ve zaman alıcı görevleri otomatikleştirirken, fikir üretimi ve eleştirel düşünmeye odaklanabilirsiniz. Farklı düzeylerde hesaplama kaynakları ve insan katılımını karşılayarak, Agent Laboratuvarı bilimsel keşfi hızlandırmayı ve araştırma verimliliğinizi optimize etmeyi amaçlamaktadır.
### 🔬 Agent Laboratuvarı Nasıl Çalışır?
- Agent Laboratuvarı, araştırma sürecini sistematik olarak yönlendiren üç ana aşamadan oluşur: (1) Literatür Taraması, (2) Deney Yapma ve (3) Rapor Yazımı. Her aşamada, LLM'ler tarafından yönlendirilen uzman ajanlar, arXiv, Hugging Face, Python ve LaTeX gibi dış araçları entegre ederek farklı hedeflere ulaşmak için iş birliği yapar ve sonuçları optimize eder. Bu yapılandırılmış iş akışı, ilgili araştırma makalelerinin bağımsız olarak toplanması ve analiz edilmesiyle başlar, ortak planlama ve veri hazırlama aşamalarından geçer ve otomatik deney yapma ile kapsamlı rapor oluşturma ile sona erer. Bu aşamalarda belirli ajan rollerinin ve katkılarının detayları makalede tartışılmaktadır.
## 🖥️ Kurulum
### Python venv seçeneği
1. **GitHub Deposu Klonlayın**: Depoyu aşağıdaki komutu kullanarak klonlayarak başlayın:
```bash
git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git
```
2. **Python Ortamını Kurun ve Aktif Hale Getirin**
```bash
python -m venv venv_agent_lab
```
- Şimdi bu ortamı etkinleştirin:
```bash
source venv_agent_lab/bin/activate
```
3. **Gerekli Kütüphaneleri Yükleyin**
```bash
pip install -r requirements.txt
```
4. **pdflatex'i Yükleyin [SEÇENEKSEL]**
```bash
sudo apt install pdflatex
```
- Bu, ajanların LaTeX kaynaklarını derleyebilmesini sağlar.
- **[ÖNEMLİ]** Bu adımı sudo erişiminiz yoksa çalıştıramıyorsanız, Agent Laboratuvarı'nı çalıştırırken --compile_latex bayrağını false olarak ayarlayarak PDF derlemeyi kapatabilirsiniz: `--compile_latex=False`
5. **Şimdi Agent Laboratuvarı'nı Çalıştırın!**
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA"
```
veya, pdflatex yüklü değilse
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False
```
-----
## Daha İyi Araştırma Sonuçları için İpuçları
#### [İpucu #1] 📝 Kapsamlı Notlar Yazdığınızdan Emin Olun! 📝
**Kapsamlı notlar yazmak**, ajanın projenizde neyi başarmak istediğinizi ve herhangi bir stil tercihlerinizi anlamasına yardımcı olduğu için önemlidir. Notlar, ajanların gerçekleştirmesini istediğiniz deneyler, API anahtarları sağlamak, dahil edilmesini istediğiniz belirli grafikler veya figürler veya araştırma yaparken ajanın bilmesi gereken her şey gibi unsurları içerebilir.
Ayrıca, ajana **erişebileceği hesaplama kaynaklarını** bildirmeniz için bir fırsattır, örneğin GPU'lar (kaç tane, hangi tür GPU, kaç GB), CPU'lar (kaç çekirdek, hangi tür CPU'lar), depolama sınırlamaları ve donanım özellikleri.
Not eklemek için, ai_lab_repo.py içindeki task_notes_LLM yapısını değiştirmeniz gerekir. Aşağıda, bazı deneylerimizde kullanılan örnek notlar verilmiştir.
```python
task_notes_LLM = [
{"phases": ["plan formulation"],
"note": f"You should come up with a plan for TWO experiments."},
{"phases": ["plan formulation", "data preparation", "running experiments"],
"note": "Please use gpt-4o-mini for your experiments."},
{"phases": ["running experiments"],
"note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"},
{"phases": ["running experiments"],
"note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."},
{"phases": ["running experiments"],
"note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."},
{"phases": ["data preparation", "running experiments"],
"note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"},
{"phases": ["data preparation", "running experiments"],
"note": "Generate figures with very colorful and artistic design."},
]
```
--------
#### [İpucu #2] 🚀 Daha Güçlü Modeller Kullanmak Genellikle Daha İyi Araştırma Sonuçlarına Yol Açar 🚀
Araştırma yaparken, **model seçimi sonuçların kalitesi üzerinde önemli bir etkiye sahip olabilir**. Daha güçlü modeller genellikle daha yüksek doğruluk, daha iyi akıl yürütme yetenekleri ve daha iyi rapor oluşturma özelliklerine sahiptir. Hesaplama kaynaklarınız izin veriyorsa, o1-(mini/preview) gibi gelişmiş modellerin veya benzeri en son büyük dil modellerinin kullanımını önceliklendirin.
Ancak, **performans ve maliyet etkinliği arasında denge kurmak önemlidir**. Güçlü modeller daha iyi sonuçlar verebilirken, genellikle çalıştırmaları daha pahalı ve zaman alıcıdır. Bunları seçici olarak kullanmayı düşünün—örneğin, ana deneyler veya son analizler için—iteratif görevler veya ilk prototipler için daha küçük, daha verimli modelleri kullanmaya devam edin.
Kaynaklar sınırlı olduğunda, **daha küçük modelleri özel veri setinizde ince ayar yaparak veya görev odaklı istemlerle önceden eğitilmiş modelleri birleştirerek performans ve hesaplama verimliliği arasında istenen dengeyi sağlayın**.
-----
#### [İpucu #3] ✅ Önceki Kontrol Noktalarından Kaydedilenleri Yükleyebilirsiniz ✅
**İlerlemenizi kaybederseniz, internet bağlantınız kesilirse veya bir alt görev başarısız olursa, her zaman önceki bir durumdan yükleme yapabilirsiniz.** Tüm ilerlemeniz varsayılan olarak her bir kontrol noktasını saklayan state_saves değişkeninde kaydedilir. ai_lab_repo.py çalıştırılırken aşağıdaki argümanları geçmeniz yeterlidir:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH"
```
-----
#### [İpucu #4] 🈯 İngilizce Dışında Bir Dil Kullanıyorsanız 🈲
Agent Laboratuvarı'nı İngilizce dışında bir dilde çalıştırıyorsanız sorun yok, sadece ajanlara araştırmayı tercih ettiğiniz dilde gerçekleştirmeleri için bir dil bayrağı sağlamanız yeterlidir. Agent Laboratuvarı'nı diğer dillerde çalıştırmayı kapsamlı bir şekilde incelemediğimizi unutmayın, bu yüzden karşılaştığınız herhangi bir problemi bildirdiğinizden emin olun.
Örneğin, Çincede çalıştırıyorsanız:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文"
```
----
#### [İpucu #5] 🌟 Geliştirme İçin Çok Fazla Alan Var 🌟
Bu kod tabanını geliştirmek için çok fazla alan var, bu yüzden değişiklik yaparsanız ve topluluğa yardımcı olmak isterseniz, yaptığınız değişiklikleri paylaşmaktan çekinmeyin! Umarız bu araç size yardımcı olur!
## Referans / Bibtex
```bibtex
@preprint{schmidgall2025AgentLaboratory,
title={Agent Laboratory: Using LLM Agents as Research Assistants},
author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad},
year={2025}
}
```
================================================
FILE: readme/README-vietnamese.md
================================================
# Agent Laboratory: Sử dụng Đại Diện LLM làm Trợ Lý Nghiên Cứu
【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】
【🌐 Website | 💻 Software | 🎥 Video | 📚 Example Paper | 📰 Citation】
## 📖 Tổng Quan
- **Agent Laboratory** là một quy trình nghiên cứu tự động từ đầu đến cuối, nhằm hỗ trợ **bạn** với tư cách là nhà nghiên cứu con người trong việc **triển khai các ý tưởng nghiên cứu của bạn**. Agent Laboratory bao gồm các đại diện chuyên biệt được điều khiển bởi các mô hình ngôn ngữ lớn để hỗ trợ bạn trong toàn bộ quy trình nghiên cứu—từ việc thực hiện đánh giá tài liệu và xây dựng kế hoạch đến thực hiện các thí nghiệm và viết các báo cáo toàn diện.
- Hệ thống này không được thiết kế để thay thế sự sáng tạo của bạn mà để bổ sung cho nó, cho phép bạn tập trung vào ý tưởng và tư duy phản biện trong khi tự động hóa các nhiệm vụ lặp đi lặp lại và tốn thời gian như mã hóa và tài liệu hóa. Bằng cách đáp ứng các mức độ tài nguyên tính toán và sự tham gia của con người khác nhau, Agent Laboratory nhằm mục tiêu tăng tốc khám phá khoa học và tối ưu hóa năng suất nghiên cứu của bạn.
### 🔬 Agent Laboratory hoạt động như thế nào?
- Agent Laboratory bao gồm ba giai đoạn chính hướng dẫn hệ thống quy trình nghiên cứu một cách có hệ thống: (1) Đánh giá Tài liệu, (2) Thực nghiệm, và (3) Viết Báo cáo. Trong mỗi giai đoạn, các đại diện chuyên biệt được điều khiển bởi LLM hợp tác để đạt được các mục tiêu riêng biệt, tích hợp các công cụ bên ngoài như arXiv, Hugging Face, Python, và LaTeX để tối ưu hóa kết quả. Quy trình làm việc có cấu trúc này bắt đầu với việc thu thập và phân tích độc lập các bài báo nghiên cứu liên quan, tiến tới lập kế hoạch hợp tác và chuẩn bị dữ liệu, và kết thúc với việc thực hiện các thí nghiệm tự động và tạo báo cáo toàn diện. Chi tiết về các vai trò cụ thể của đại diện và đóng góp của họ trong các giai đoạn này được thảo luận trong bài báo.
## 🖥️ Cài Đặt
### Tùy chọn môi trường ảo Python
1. **Nhân bản kho lưu trữ GitHub**: Bắt đầu bằng cách nhân bản kho lưu trữ bằng lệnh:
```bash
git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git
```
2. **Thiết lập và Kích hoạt Môi trường Python**
```bash
python -m venv venv_agent_lab
```
- Bây giờ kích hoạt môi trường này:
```bash
source venv_agent_lab/bin/activate
```
3. **Cài đặt các thư viện cần thiết**
```bash
pip install -r requirements.txt
```
4. **Cài đặt pdflatex [TUÝ CHỌN]**
```bash
sudo apt install pdflatex
```
- Điều này cho phép mã nguồn latex được biên dịch bởi các đại diện.
- **[QUAN TRỌNG]** Nếu bước này không thể chạy do không có quyền sudo, việc biên dịch pdf có thể được tắt bằng cách chạy Agent Laboratory với cờ --compile_latex đặt thành false: --compile_latex=False
5. **Bây giờ chạy Agent Laboratory!**
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA"
```
hoặc, nếu bạn không cài đặt pdflatex
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False
```
-----
## Mẹo để đạt được kết quả nghiên cứu tốt hơn
#### [Mẹo #1] 📝 Hãy chắc chắn ghi chép kỹ lưỡng! 📝
**Việc ghi chép kỹ lưỡng là quan trọng** để giúp đại diện của bạn hiểu bạn đang muốn đạt được điều gì trong dự án của mình, cũng như bất kỳ sở thích về phong cách nào. Ghi chú có thể bao gồm bất kỳ thí nghiệm nào bạn muốn các đại diện thực hiện, cung cấp các khóa API, các biểu đồ hoặc hình vẽ cụ thể bạn muốn bao gồm, hoặc bất cứ điều gì bạn muốn đại diện biết khi thực hiện nghiên cứu.
Đây cũng là cơ hội của bạn để cho đại diện biết **các tài nguyên tính toán mà nó có quyền truy cập**, ví dụ: GPU (số lượng, loại GPU, số GB), CPU (số lượng lõi, loại CPU), hạn chế về lưu trữ, và các thông số phần cứng.
Để thêm ghi chú, bạn phải sửa cấu trúc task_notes_LLM bên trong ai_lab_repo.py. Dưới đây là một ví dụ về bộ ghi chú được sử dụng cho một số thí nghiệm của chúng tôi.
```python
task_notes_LLM = [
{"phases": ["plan formulation"],
"note": f"You should come up with a plan for TWO experiments."},
{"phases": ["plan formulation", "data preparation", "running experiments"],
"note": "Please use gpt-4o-mini for your experiments."},
{"phases": ["running experiments"],
"note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"},
{"phases": ["running experiments"],
"note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."},
{"phases": ["running experiments"],
"note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."},
{"phases": ["data preparation", "running experiments"],
"note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"},
{"phases": ["data preparation", "running experiments"],
"note": "Generate figures with very colorful and artistic design."},
]
```
--------
#### [Mẹo #2] 🚀 Sử dụng các mô hình mạnh mẽ hơn thường dẫn đến nghiên cứu tốt hơn 🚀
Khi tiến hành nghiên cứu, **lựa chọn mô hình có thể ảnh hưởng đáng kể đến chất lượng kết quả**. Các mô hình mạnh mẽ hơn thường có độ chính xác cao hơn, khả năng lý luận tốt hơn và khả năng tạo báo cáo tốt hơn. Nếu tài nguyên tính toán cho phép, hãy ưu tiên sử dụng các mô hình tiên tiến như o1-(mini/preview) hoặc các mô hình ngôn ngữ lớn tiên tiến tương tự.
Tuy nhiên, **quan trọng là phải cân bằng giữa hiệu suất và chi phí hiệu quả**. Trong khi các mô hình mạnh mẽ có thể mang lại kết quả tốt hơn, chúng thường đắt hơn và tốn thời gian chạy. Hãy cân nhắc sử dụng chúng một cách chọn lọc—ví dụ, cho các thí nghiệm chính hoặc phân tích cuối cùng—trong khi dựa vào các mô hình nhỏ hơn, hiệu quả hơn cho các nhiệm vụ lặp đi lặp lại hoặc phát mẫu ban đầu.
Khi tài nguyên hạn chế, **tối ưu hóa bằng cách tinh chỉnh các mô hình nhỏ hơn** trên bộ dữ liệu cụ thể của bạn hoặc kết hợp các mô hình đã được huấn luyện trước với các gợi ý cụ thể cho nhiệm vụ để đạt được sự cân bằng mong muốn giữa hiệu suất và hiệu quả tính toán.
-----
#### [Mẹo #3] ✅ Bạn có thể tải lại các lưu trạng thái trước từ các điểm kiểm tra ✅
**Nếu bạn mất tiến độ, kết nối internet, hoặc nếu một nhiệm vụ phụ thất bại, bạn luôn có thể tải lại từ trạng thái trước đó.** Tất cả tiến độ của bạn được lưu mặc định trong biến state_saves, lưu trữ từng điểm kiểm tra riêng lẻ. Chỉ cần truyền các tham số sau khi chạy ai_lab_repo.py
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH"
```
-----
#### [Mẹo #4] 🈯 Nếu bạn đang chạy bằng ngôn ngữ khác tiếng Anh 🈲
Nếu bạn đang chạy Agent Laboratory bằng ngôn ngữ khác tiếng Anh, không vấn đề gì, chỉ cần đảm bảo cung cấp cờ ngôn ngữ cho các đại diện để thực hiện nghiên cứu bằng ngôn ngữ bạn mong muốn. Lưu ý rằng chúng tôi chưa nghiên cứu kỹ việc chạy Agent Laboratory bằng các ngôn ngữ khác, vì vậy hãy chắc chắn báo cáo bất kỳ vấn đề nào bạn gặp phải.
Ví dụ, nếu bạn đang chạy bằng tiếng Trung:
```bash
python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文"
```
----
#### [Mẹo #5] 🌟 Có rất nhiều cơ hội để cải thiện 🌟
Có rất nhiều cơ hội để cải thiện cơ sở mã này, vì vậy nếu bạn cuối cùng thay đổi và muốn giúp cộng đồng, hãy cảm thấy tự do chia sẻ các thay đổi mà bạn đã thực hiện! Chúng tôi hy vọng công cụ này sẽ giúp bạn!
## Tài liệu Tham khảo / Bibtex
```bibtex
@preprint{schmidgall2025AgentLaboratory,
title={Agent Laboratory: Using LLM Agents as Research Assistants},
author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad},
year={2025}
}
```