Skip to content

Commit 0687fb9

Browse files
authored
fix example scripts in colab (#174)
* fixed pip install commands to work with latest colab environment * added numerai-tools to install steps * updated predict proxy functions to include benchmarks as second parameter * updated all cached pickles
1 parent 1f4bd2e commit 0687fb9

9 files changed

+2971
-2565
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,4 +97,6 @@ ENV/
9797
.idea
9898
example_model.xgb
9999

100-
.DS_Store
100+
.DS_Store
101+
102+
v5.0/

cached-pickles/example_model.pkl

30.5 KB
Binary file not shown.
-58.6 KB
Binary file not shown.

cached-pickles/hello_numerai.pkl

30.5 KB
Binary file not shown.

cached-pickles/target_ensemble.pkl

122 KB
Binary file not shown.

example_model.ipynb

Lines changed: 30 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,20 @@
1111
},
1212
{
1313
"cell_type": "code",
14-
"execution_count": null,
14+
"execution_count": 1,
1515
"metadata": {
1616
"colab": {
1717
"base_uri": "https://localhost:8080/"
1818
},
1919
"id": "Ekw8Z93ljC3v",
20-
"outputId": "675ac893-5a46-4c6b-dc03-09438941d1fc"
20+
"outputId": "bdd16698-2ad0-4423-b090-c5ce55fe3053"
2121
},
2222
"outputs": [
2323
{
2424
"name": "stdout",
2525
"output_type": "stream",
2626
"text": [
27-
"Python 3.10.12\n"
27+
"Python 3.11.13\n"
2828
]
2929
}
3030
],
@@ -40,59 +40,47 @@
4040
"base_uri": "https://localhost:8080/"
4141
},
4242
"id": "yoy_wT1rhMqF",
43-
"outputId": "4268fdb0-84d2-4502-97e4-e93a1440c8ee"
43+
"outputId": "e038b50f-1b61-4334-be62-28f4dc40a0a0"
4444
},
45-
"outputs": [
46-
{
47-
"name": "stdout",
48-
"output_type": "stream",
49-
"text": [
50-
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m34.4/34.4 MB\u001b[0m \u001b[31m16.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
51-
"\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
52-
"lida 0.0.10 requires fastapi, which is not installed.\n",
53-
"lida 0.0.10 requires kaleido, which is not installed.\n",
54-
"lida 0.0.10 requires python-multipart, which is not installed.\n",
55-
"lida 0.0.10 requires uvicorn, which is not installed.\u001b[0m\u001b[31m\n",
56-
"\u001b[0m"
57-
]
58-
}
59-
],
45+
"outputs": [],
6046
"source": [
6147
"# Install dependencies\n",
62-
"!pip install -q numerapi pandas lightgbm cloudpickle==2.2.1 pyarrow scikit-learn scipy==1.10.1"
48+
"!pip install -q --upgrade numerapi pandas pyarrow matplotlib lightgbm scikit-learn scipy cloudpickle==3.1.1"
6349
]
6450
},
6551
{
6652
"cell_type": "code",
67-
"execution_count": null,
53+
"execution_count": 4,
6854
"metadata": {
6955
"colab": {
7056
"base_uri": "https://localhost:8080/",
71-
"height": 17
57+
"height": 160
7258
},
7359
"id": "13hdRk9ghMqI",
74-
"outputId": "857a4882-83e5-4a76-9b1e-57d6d822cc67"
60+
"outputId": "d2274374-fd85-4189-f27b-d9d466cc63ca"
7561
},
7662
"outputs": [
7763
{
78-
"data": {
79-
"application/javascript": "\n async function download(id, filename, size) {\n if (!google.colab.kernel.accessAllowed) {\n return;\n }\n const div = document.createElement('div');\n const label = document.createElement('label');\n label.textContent = `Downloading \"${filename}\": `;\n div.appendChild(label);\n const progress = document.createElement('progress');\n progress.max = size;\n div.appendChild(progress);\n document.body.appendChild(div);\n\n const buffers = [];\n let downloaded = 0;\n\n const channel = await google.colab.kernel.comms.open(id);\n // Send a message to notify the kernel that we're ready.\n channel.send({})\n\n for await (const message of channel.messages) {\n // Send a message to notify the kernel that we're ready.\n channel.send({})\n if (message.buffers) {\n for (const buffer of message.buffers) {\n buffers.push(buffer);\n downloaded += buffer.byteLength;\n progress.value = downloaded;\n }\n }\n }\n const blob = new Blob(buffers, {type: 'application/binary'});\n const a = document.createElement('a');\n a.href = window.URL.createObjectURL(blob);\n a.download = filename;\n div.appendChild(a);\n a.click();\n div.remove();\n }\n ",
80-
"text/plain": [
81-
"<IPython.core.display.Javascript object>"
82-
]
83-
},
84-
"metadata": {},
85-
"output_type": "display_data"
64+
"name": "stderr",
65+
"output_type": "stream",
66+
"text": [
67+
"2025-07-25 13:44:58,042 INFO numerapi.utils: starting download\n",
68+
"v5.0/train.parquet: 2.37GB [01:04, 36.7MB/s] \n",
69+
"2025-07-25 13:46:03,017 INFO numerapi.utils: starting download\n",
70+
"v5.0/features.json: 291kB [00:00, 2.75MB/s] \n"
71+
]
8672
},
8773
{
88-
"data": {
89-
"application/javascript": "download(\"download_9cb9b662-7992-47b0-b787-453b845e7050\", \"predict_barebones.pkl\", 6572312)",
90-
"text/plain": [
91-
"<IPython.core.display.Javascript object>"
92-
]
93-
},
94-
"metadata": {},
95-
"output_type": "display_data"
74+
"name": "stdout",
75+
"output_type": "stream",
76+
"text": [
77+
"[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001168 seconds.\n",
78+
"You can set `force_row_wise=true` to remove the overhead.\n",
79+
"And if memory is not enough, you can set `force_col_wise=true`.\n",
80+
"[LightGBM] [Info] Total Bins 210\n",
81+
"[LightGBM] [Info] Number of data points in the train set: 688184, number of used features: 42\n",
82+
"[LightGBM] [Info] Start training from score 0.500008\n"
83+
]
9684
}
9785
],
9886
"source": [
@@ -152,7 +140,7 @@
152140
"# Define predict function\n",
153141
"def predict(\n",
154142
" live_features: pd.DataFrame,\n",
155-
" live_benchmark_models: pd.DataFrame\n",
143+
" _live_benchmark_models: pd.DataFrame\n",
156144
" ) -> pd.DataFrame:\n",
157145
" live_predictions = model.predict(live_features[features])\n",
158146
" submission = pd.Series(live_predictions, index=live_features.index)\n",
@@ -178,7 +166,7 @@
178166
"provenance": []
179167
},
180168
"kernelspec": {
181-
"display_name": "venv",
169+
"display_name": "3.11.13",
182170
"language": "python",
183171
"name": "python3"
184172
},
@@ -192,7 +180,7 @@
192180
"name": "python",
193181
"nbconvert_exporter": "python",
194182
"pygments_lexer": "ipython3",
195-
"version": "3.10.12"
183+
"version": "3.11.13"
196184
},
197185
"orig_nbformat": 4
198186
},

0 commit comments

Comments
 (0)