@@ -223,7 +223,7 @@ Action:
|
||||
# check that add_base_tools will not interfere with existing tools
|
||||
with pytest.raises(KeyError) as e:
|
||||
agent = ReactJsonAgent(tools=toolset_3, llm_engine=fake_react_json_llm, add_base_tools=True)
|
||||
assert "python_interpreter already exists in the toolbox" in str(e)
|
||||
assert "already exists in the toolbox" in str(e)
|
||||
|
||||
# check that python_interpreter base tool does not get added to code agents
|
||||
agent = ReactCodeAgent(tools=[], llm_engine=fake_react_code_llm, add_base_tools=True)
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from transformers import load_tool
|
||||
@@ -241,8 +242,41 @@ for block in text_block:
|
||||
code = """
|
||||
digits, i = [1, 2, 3], 1
|
||||
digits[i], digits[i + 1] = digits[i + 1], digits[i]"""
|
||||
evaluate_python_code(code, {"range": range, "print": print, "int": int}, {})
|
||||
|
||||
code = """
|
||||
def calculate_isbn_10_check_digit(number):
|
||||
total = sum((10 - i) * int(digit) for i, digit in enumerate(number))
|
||||
remainder = total % 11
|
||||
check_digit = 11 - remainder
|
||||
if check_digit == 10:
|
||||
return 'X'
|
||||
elif check_digit == 11:
|
||||
return '0'
|
||||
else:
|
||||
return str(check_digit)
|
||||
|
||||
# Given 9-digit numbers
|
||||
numbers = [
|
||||
"478225952",
|
||||
"643485613",
|
||||
"739394228",
|
||||
"291726859",
|
||||
"875262394",
|
||||
"542617795",
|
||||
"031810713",
|
||||
"957007669",
|
||||
"871467426"
|
||||
]
|
||||
|
||||
# Calculate check digits for each number
|
||||
check_digits = [calculate_isbn_10_check_digit(number) for number in numbers]
|
||||
print(check_digits)
|
||||
"""
|
||||
state = {}
|
||||
evaluate_python_code(code, {"range": range, "print": print, "int": int}, state)
|
||||
evaluate_python_code(
|
||||
code, {"range": range, "print": print, "sum": sum, "enumerate": enumerate, "int": int, "str": str}, state
|
||||
)
|
||||
|
||||
def test_listcomp(self):
|
||||
code = "x = [i for i in range(3)]"
|
||||
@@ -273,6 +307,17 @@ digits[i], digits[i + 1] = digits[i + 1], digits[i]"""
|
||||
result = evaluate_python_code(code, {"range": range}, state={})
|
||||
assert result == {0: 0, 1: 1, 2: 4}
|
||||
|
||||
code = "{num: name for num, name in {101: 'a', 102: 'b'}.items() if name not in ['a']}"
|
||||
result = evaluate_python_code(code, {"print": print}, state={}, authorized_imports=["pandas"])
|
||||
assert result == {102: "b"}
|
||||
|
||||
code = """
|
||||
shifts = {'A': ('6:45', '8:00'), 'B': ('10:00', '11:45')}
|
||||
shift_minutes = {worker: ('a', 'b') for worker, (start, end) in shifts.items()}
|
||||
"""
|
||||
result = evaluate_python_code(code, {}, state={})
|
||||
assert result == {"A": ("a", "b"), "B": ("a", "b")}
|
||||
|
||||
def test_tuple_assignment(self):
|
||||
code = "a, b = 0, 1\nb"
|
||||
result = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
|
||||
@@ -341,7 +386,7 @@ if char.isalpha():
|
||||
result = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
|
||||
assert result == "lose"
|
||||
|
||||
code = "import time\ntime.sleep(0.1)"
|
||||
code = "import time, re\ntime.sleep(0.1)"
|
||||
result = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
|
||||
assert result is None
|
||||
|
||||
@@ -369,6 +414,23 @@ if char.isalpha():
|
||||
result = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
|
||||
assert result == "LATIN CAPITAL LETTER A"
|
||||
|
||||
# Test submodules are handled properly, thus not raising error
|
||||
code = "import numpy.random as rd\nrng = rd.default_rng(12345)\nrng.random()"
|
||||
result = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}, authorized_imports=["numpy"])
|
||||
|
||||
code = "from numpy.random import default_rng as d_rng\nrng = d_rng(12345)\nrng.random()"
|
||||
result = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}, authorized_imports=["numpy"])
|
||||
|
||||
def test_additional_imports(self):
|
||||
code = "import numpy as np"
|
||||
evaluate_python_code(code, authorized_imports=["numpy"], state={})
|
||||
|
||||
code = "import numpy.random as rd"
|
||||
evaluate_python_code(code, authorized_imports=["numpy.random"], state={})
|
||||
evaluate_python_code(code, authorized_imports=["numpy"], state={})
|
||||
with pytest.raises(InterpreterError):
|
||||
evaluate_python_code(code, authorized_imports=["random"], state={})
|
||||
|
||||
def test_multiple_comparators(self):
|
||||
code = "0 <= -1 < 4 and 0 <= -5 < 4"
|
||||
result = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
|
||||
@@ -400,7 +462,7 @@ def function():
|
||||
print("2")
|
||||
function()"""
|
||||
state = {}
|
||||
evaluate_python_code(code, {"print": print}, state)
|
||||
evaluate_python_code(code, {"print": print}, state=state)
|
||||
assert state["print_outputs"] == "1\n2\n"
|
||||
|
||||
def test_tuple_target_in_iterator(self):
|
||||
@@ -612,7 +674,7 @@ assert lock.locked == False
|
||||
"""
|
||||
state = {}
|
||||
tools = {}
|
||||
evaluate_python_code(code, tools, state)
|
||||
evaluate_python_code(code, tools, state=state)
|
||||
|
||||
def test_default_arg_in_function(self):
|
||||
code = """
|
||||
@@ -672,3 +734,94 @@ returns_none(1)
|
||||
state = {}
|
||||
result = evaluate_python_code(code, {"print": print, "range": range, "ord": ord, "chr": chr}, state=state)
|
||||
assert result is None
|
||||
|
||||
def test_nested_for_loop(self):
|
||||
code = """
|
||||
all_res = []
|
||||
for i in range(10):
|
||||
subres = []
|
||||
for j in range(i):
|
||||
subres.append(j)
|
||||
all_res.append(subres)
|
||||
|
||||
out = [i for sublist in all_res for i in sublist]
|
||||
out[:10]
|
||||
"""
|
||||
state = {}
|
||||
result = evaluate_python_code(code, {"print": print, "range": range}, state=state)
|
||||
assert result == [0, 0, 1, 0, 1, 2, 0, 1, 2, 3]
|
||||
|
||||
def test_pandas(self):
|
||||
code = """
|
||||
import pandas as pd
|
||||
|
||||
df = pd.DataFrame.from_dict({'SetCount': ['5', '4', '5'], 'Quantity': [1, 0, -1]})
|
||||
|
||||
df['SetCount'] = pd.to_numeric(df['SetCount'], errors='coerce')
|
||||
|
||||
parts_with_5_set_count = df[df['SetCount'] == 5.0]
|
||||
parts_with_5_set_count[['Quantity', 'SetCount']].values[1]
|
||||
"""
|
||||
state = {}
|
||||
result = evaluate_python_code(code, {}, state=state, authorized_imports=["pandas"])
|
||||
assert np.array_equal(result, [-1, 5])
|
||||
|
||||
code = """
|
||||
import pandas as pd
|
||||
|
||||
df = pd.DataFrame.from_dict({"AtomicNumber": [111, 104, 105], "ok": [0, 1, 2]})
|
||||
print("HH0")
|
||||
|
||||
# Filter the DataFrame to get only the rows with outdated atomic numbers
|
||||
filtered_df = df.loc[df['AtomicNumber'].isin([104])]
|
||||
"""
|
||||
result = evaluate_python_code(code, {"print": print}, state={}, authorized_imports=["pandas"])
|
||||
assert np.array_equal(result.values[0], [104, 1])
|
||||
|
||||
code = """import pandas as pd
|
||||
data = pd.DataFrame.from_dict([
|
||||
{"Pclass": 1, "Survived": 1},
|
||||
{"Pclass": 2, "Survived": 0},
|
||||
{"Pclass": 2, "Survived": 1}
|
||||
])
|
||||
survival_rate_by_class = data.groupby('Pclass')['Survived'].mean()
|
||||
"""
|
||||
result = evaluate_python_code(code, {}, state={}, authorized_imports=["pandas"])
|
||||
assert result.values[1] == 0.5
|
||||
|
||||
def test_starred(self):
|
||||
code = """
|
||||
from math import radians, sin, cos, sqrt, atan2
|
||||
|
||||
def haversine(lat1, lon1, lat2, lon2):
|
||||
R = 6371000 # Radius of the Earth in meters
|
||||
lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
|
||||
dlat = lat2 - lat1
|
||||
dlon = lon2 - lon1
|
||||
a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
|
||||
c = 2 * atan2(sqrt(a), sqrt(1 - a))
|
||||
distance = R * c
|
||||
return distance
|
||||
|
||||
coords_geneva = (46.1978, 6.1342)
|
||||
coords_barcelona = (41.3869, 2.1660)
|
||||
|
||||
distance_geneva_barcelona = haversine(*coords_geneva, *coords_barcelona)
|
||||
"""
|
||||
result = evaluate_python_code(code, {"print": print, "map": map}, state={}, authorized_imports=["math"])
|
||||
assert round(result, 1) == 622395.4
|
||||
|
||||
def test_for(self):
|
||||
code = """
|
||||
shifts = {
|
||||
"Worker A": ("6:45 pm", "8:00 pm"),
|
||||
"Worker B": ("10:00 am", "11:45 am")
|
||||
}
|
||||
|
||||
shift_intervals = {}
|
||||
for worker, (start, end) in shifts.items():
|
||||
shift_intervals[worker] = end
|
||||
shift_intervals
|
||||
"""
|
||||
result = evaluate_python_code(code, {"print": print, "map": map}, state={})
|
||||
assert result == {"Worker A": "8:00 pm", "Worker B": "11:45 am"}
|
||||
|
||||
Reference in New Issue
Block a user