Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,203 changes: 1,013 additions & 190 deletions amnesia_bench.py

Large diffs are not rendered by default.

32 changes: 32 additions & 0 deletions models.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[
{
"name": "Qwen3.5-35B-A3B-Q4",
"url": "http://localhost:8080",
"api_key_env": null
},
{
"name": "gemini-3.1-flash-lite-preview",
"url": "gemini://gemini-3.1-flash-lite-preview",
"api_key_env": "GEMINI_API_KEY"
},
{
"name": "openai/gpt-oss-120b",
"url": "openrouter://openai/gpt-oss-120b:free",
"api_key_env": "OPENROUTER_API_KEY"
},
{
"name": "openai/gpt-oss-20b",
"url": "openrouter://openai/gpt-oss-20b:free",
"api_key_env": "OPENROUTER_API_KEY"
},
{
"name": "nvidia/nemotron-3-super-120b",
"url": "openrouter://nvidia/nemotron-3-super-120b-a12b:free",
"api_key_env": "OPENROUTER_API_KEY"
},
{
"name": "qwen/qwen3-next-80b",
"url": "openrouter://qwen/qwen3-next-80b-a3b-instruct:free",
"api_key_env": "OPENROUTER_API_KEY"
}
]
10 changes: 10 additions & 0 deletions problems/aimo3_hard_00eaa992.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"problem_id": "aimo3_hard_00eaa992",
"problem_text": "If \\((60-a)(60-b)(60-c)(60-d)(60-e) = 1025\\), what is the value of \\(a + b + c + d\\)?\n\nNote: 1025 is divisible by 5.",
"ground_truth": "188",
"topic": "number_theory",
"source": "aimo3_hard",
"gptoss_20b_pass_rate": 0.125,
"gptoss_20b_n_correct": 2,
"gptoss_20b_correct_token_avg": null
}
10 changes: 10 additions & 0 deletions problems/aimo3_hard_2e0b7ba3.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"problem_id": "aimo3_hard_2e0b7ba3",
"problem_text": "How many ways are there to paint the first level of the Th\u00e1p R\u00f9a tower model, given that the $3$ doorways at the front are painted with the same color and each of the remaining $7$ doorways is painted with one of the three colors such that any two adjacent doorways with a common side on the same level are painted with different colors?",
"ground_truth": "216",
"topic": "combinatorics",
"source": "aimo3_hard",
"gptoss_20b_pass_rate": 0.125,
"gptoss_20b_n_correct": 2,
"gptoss_20b_correct_token_avg": null
}
10 changes: 10 additions & 0 deletions problems/aimo3_hard_5f9595ae.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"problem_id": "aimo3_hard_5f9595ae",
"problem_text": "In an acute triangle \\(ABC\\) with \\(\\angle A = 30^\\circ\\), a circle with diameter \\(BC\\) intersects \\(AB\\) and \\(AC\\) at points \\(D\\) and \\(E\\), respectively. Find the ratio of the area of \\(\\triangle ADC\\) to the area of quadrilateral \\(DBCE\\).",
"ground_truth": "3",
"topic": "geometry",
"source": "aimo3_hard",
"gptoss_20b_pass_rate": 0.125,
"gptoss_20b_n_correct": 2,
"gptoss_20b_correct_token_avg": null
}
10 changes: 10 additions & 0 deletions problems/aimo3_hard_858cba58.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"problem_id": "aimo3_hard_858cba58",
"problem_text": "All vertices of a pyramid lie on the facets of a cube but not on its edges, and each facet contains at least one vertex. What is the maximum possible number of the vertices of the pyramid?",
"ground_truth": "13",
"topic": "other",
"source": "aimo3_hard",
"gptoss_20b_pass_rate": 0.125,
"gptoss_20b_n_correct": 2,
"gptoss_20b_correct_token_avg": null
}
10 changes: 10 additions & 0 deletions problems/aimo3_hard_b0dc264b.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"problem_id": "aimo3_hard_b0dc264b",
"problem_text": "Given the group $(G, *)$ with $G = \\{a, b, c, d, f, g, h, k\\}$ and identity $k$, and the following operations:\n- $a * b = c$\n- $b * a = d$\n- $f * f = a$\n- $g * g = b$\n- $h * h = c$\n\nHow many self-inverses does $(G, *)$ have?",
"ground_truth": "2",
"topic": "other",
"source": "aimo3_hard",
"gptoss_20b_pass_rate": 0.125,
"gptoss_20b_n_correct": 2,
"gptoss_20b_correct_token_avg": null
}
10 changes: 10 additions & 0 deletions problems/aimo3_hard_b1da52fa.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"problem_id": "aimo3_hard_b1da52fa",
"problem_text": "Drunk and Horse play a game on a $2023! \\times 2023!$ grid. Horse chooses a positive integer $k < 2023$ and lights up each square using $k$ different colors. Horse starts on a random square, and Drunk starts on another square exactly $2023^2$ squares away in one direction. In each turn, Drunk moves one square in some direction but cannot move in the same direction for 2023 consecutive moves. Horse moves up to $m$ squares in a direction of its choice, where $m = 2023 - k$. Horse gets alerted of the color of the square Drunk was previously on. Horse wins if it ends up on a square that shares at least one vertex with Drunk's square. What is the minimum number of turns in which Horse can guarantee a win?",
"ground_truth": "2030",
"topic": "combinatorics",
"source": "aimo3_hard",
"gptoss_20b_pass_rate": 0.125,
"gptoss_20b_n_correct": 2,
"gptoss_20b_correct_token_avg": null
}
10 changes: 10 additions & 0 deletions problems/aimo3_hard_f728b4b1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"problem_id": "aimo3_hard_f728b4b1",
"problem_text": "Triangle $ABC$ is constructed such that $AB>BC>AC$. Points $E$, $F$, and $G$ are drawn from $A$, $B$, and $C$ to their opposite sides respectively. If $\\frac{BE}{EC} = \\frac{7}{12}$, the ratio of $\\frac{AF}{FC}$ can be represented as $\\frac{m}{n}$, where $m$ and $n$ are relatively prime positive integers. What is the smallest possible sum of $m+n$?",
"ground_truth": "26",
"topic": "geometry",
"source": "aimo3_hard",
"gptoss_20b_pass_rate": 0.125,
"gptoss_20b_n_correct": 2,
"gptoss_20b_correct_token_avg": null
}