11
11
},
12
12
{
13
13
"cell_type" : " code" ,
14
- "source" : [
15
- " !python --version"
16
- ],
14
+ "execution_count" : null ,
17
15
"metadata" : {
18
16
"colab" : {
19
17
"base_uri" : " https://localhost:8080/"
20
18
},
21
19
"id" : " Ekw8Z93ljC3v" ,
22
20
"outputId" : " 675ac893-5a46-4c6b-dc03-09438941d1fc"
23
21
},
24
- "execution_count" : null ,
25
22
"outputs" : [
26
23
{
27
- "output_type" : " stream" ,
28
24
"name" : " stdout" ,
25
+ "output_type" : " stream" ,
29
26
"text" : [
30
27
" Python 3.10.12\n "
31
28
]
32
29
}
30
+ ],
31
+ "source" : [
32
+ " !python --version"
33
33
]
34
34
},
35
35
{
44
44
},
45
45
"outputs" : [
46
46
{
47
- "output_type" : " stream" ,
48
47
"name" : " stdout" ,
48
+ "output_type" : " stream" ,
49
49
"text" : [
50
50
" \u001b [2K \u001b [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b [0m \u001b [32m34.4/34.4 MB\u001b [0m \u001b [31m16.2 MB/s\u001b [0m eta \u001b [36m0:00:00\u001b [0m\n " ,
51
51
" \u001b [?25h\u001b [31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n " ,
62
62
" !pip install -q numerapi pandas lightgbm cloudpickle pyarrow scikit-learn scipy==1.10.1"
63
63
]
64
64
},
65
- {
66
- "cell_type" : " code" ,
67
- "execution_count" : null ,
68
- "metadata" : {
69
- "colab" : {
70
- "base_uri" : " https://localhost:8080/"
71
- },
72
- "id" : " 4SrY-eRrhMqH" ,
73
- "outputId" : " 50373903-067a-4298-bab6-c74945fe8a3a"
74
- },
75
- "outputs" : [
76
- {
77
- "output_type" : " stream" ,
78
- "name" : " stderr" ,
79
- "text" : [
80
- " v4.3/train_int8.parquet: 2.09GB [01:10, 29.5MB/s] \n " ,
81
- " v4.3/features.json: 1.12MB [00:00, 4.25MB/s] \n "
82
- ]
83
- }
84
- ],
85
- "source" : []
86
- },
87
- {
88
- "cell_type" : " code" ,
89
- "execution_count" : null ,
90
- "metadata" : {
91
- "colab" : {
92
- "base_uri" : " https://localhost:8080/"
93
- },
94
- "id" : " mcv85XqKhMqH" ,
95
- "outputId" : " a44c7266-be28-4621-afb1-c0abe69abb18"
96
- },
97
- "outputs" : [
98
- {
99
- "output_type" : " stream" ,
100
- "name" : " stdout" ,
101
- "text" : [
102
- " [LightGBM] [Warning] Accuracy may be bad since you didn't explicitly set num_leaves OR 2^max_depth > num_leaves. (num_leaves=31).\n " ,
103
- " [LightGBM] [Warning] Accuracy may be bad since you didn't explicitly set num_leaves OR 2^max_depth > num_leaves. (num_leaves=31).\n " ,
104
- " [LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.151962 seconds.\n " ,
105
- " You can set `force_row_wise=true` to remove the overhead.\n " ,
106
- " And if memory is not enough, you can set `force_col_wise=true`.\n " ,
107
- " [LightGBM] [Info] Total Bins 3525\n " ,
108
- " [LightGBM] [Info] Number of data points in the train set: 606176, number of used features: 705\n " ,
109
- " [LightGBM] [Info] Start training from score 0.499979\n " ,
110
- " [LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n " ,
111
- " [LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n "
112
- ]
113
- }
114
- ],
115
- "source" : []
116
- },
117
65
{
118
66
"cell_type" : " code" ,
119
67
"execution_count" : null ,
127
75
},
128
76
"outputs" : [
129
77
{
130
- "output_type" : " display_data" ,
131
78
"data" : {
79
+ "application/javascript": "\n async function download(id, filename, size) {\n if (!google.colab.kernel.accessAllowed) {\n return;\n }\n const div = document.createElement('div');\n const label = document.createElement('label');\n label.textContent = `Downloading \"${filename}\": `;\n div.appendChild(label);\n const progress = document.createElement('progress');\n progress.max = size;\n div.appendChild(progress);\n document.body.appendChild(div);\n\n const buffers = [];\n let downloaded = 0;\n\n const channel = await google.colab.kernel.comms.open(id);\n // Send a message to notify the kernel that we're ready.\n channel.send({})\n\n for await (const message of channel.messages) {\n // Send a message to notify the kernel that we're ready.\n channel.send({})\n if (message.buffers) {\n for (const buffer of message.buffers) {\n buffers.push(buffer);\n downloaded += buffer.byteLength;\n progress.value = downloaded;\n }\n }\n }\n const blob = new Blob(buffers, {type: 'application/binary'});\n const a = document.createElement('a');\n a.href = window.URL.createObjectURL(blob);\n a.download = filename;\n div.appendChild(a);\n a.click();\n div.remove();\n }\n ",
132
80
"text/plain" : [
133
81
" <IPython.core.display.Javascript object>"
134
- ],
135
- "application/javascript" : [
136
- " \n " ,
137
- " async function download(id, filename, size) {\n " ,
138
- " if (!google.colab.kernel.accessAllowed) {\n " ,
139
- " return;\n " ,
140
- " }\n " ,
141
- " const div = document.createElement('div');\n " ,
142
- " const label = document.createElement('label');\n " ,
143
- " label.textContent = `Downloading \" ${filename}\" : `;\n " ,
144
- " div.appendChild(label);\n " ,
145
- " const progress = document.createElement('progress');\n " ,
146
- " progress.max = size;\n " ,
147
- " div.appendChild(progress);\n " ,
148
- " document.body.appendChild(div);\n " ,
149
- " \n " ,
150
- " const buffers = [];\n " ,
151
- " let downloaded = 0;\n " ,
152
- " \n " ,
153
- " const channel = await google.colab.kernel.comms.open(id);\n " ,
154
- " // Send a message to notify the kernel that we're ready.\n " ,
155
- " channel.send({})\n " ,
156
- " \n " ,
157
- " for await (const message of channel.messages) {\n " ,
158
- " // Send a message to notify the kernel that we're ready.\n " ,
159
- " channel.send({})\n " ,
160
- " if (message.buffers) {\n " ,
161
- " for (const buffer of message.buffers) {\n " ,
162
- " buffers.push(buffer);\n " ,
163
- " downloaded += buffer.byteLength;\n " ,
164
- " progress.value = downloaded;\n " ,
165
- " }\n " ,
166
- " }\n " ,
167
- " }\n " ,
168
- " const blob = new Blob(buffers, {type: 'application/binary'});\n " ,
169
- " const a = document.createElement('a');\n " ,
170
- " a.href = window.URL.createObjectURL(blob);\n " ,
171
- " a.download = filename;\n " ,
172
- " div.appendChild(a);\n " ,
173
- " a.click();\n " ,
174
- " div.remove();\n " ,
175
- " }\n " ,
176
- " "
177
82
]
178
83
},
179
- "metadata" : {}
84
+ "metadata" : {},
85
+ "output_type" : " display_data"
180
86
},
181
87
{
182
- "output_type" : " display_data" ,
183
88
"data" : {
89
+ "application/javascript" : " download(\" download_9cb9b662-7992-47b0-b787-453b845e7050\" , \" predict_barebones.pkl\" , 6572312)" ,
184
90
"text/plain" : [
185
91
" <IPython.core.display.Javascript object>"
186
- ],
187
- "application/javascript" : [
188
- " download(\" download_9cb9b662-7992-47b0-b787-453b845e7050\" , \" predict_barebones.pkl\" , 6572312)"
189
92
]
190
93
},
191
- "metadata" : {}
94
+ "metadata" : {},
95
+ "output_type" : " display_data"
192
96
}
193
97
],
194
98
"source" : [
198
102
" napi = NumerAPI()\n " ,
199
103
" \n " ,
200
104
" # use one of the latest data versions\n " ,
201
- " DATA_VERSION = \" v4.3 \"\n " ,
105
+ " DATA_VERSION = \" v5.0 \"\n " ,
202
106
" \n " ,
203
107
" # Download data\n " ,
204
- " napi.download_dataset(f\" {DATA_VERSION}/train_int8 .parquet\" )\n " ,
108
+ " napi.download_dataset(f\" {DATA_VERSION}/train .parquet\" )\n " ,
205
109
" napi.download_dataset(f\" {DATA_VERSION}/features.json\" )\n " ,
206
110
" \n " ,
207
111
" # Load data\n " ,
208
112
" feature_metadata = json.load(open(f\" {DATA_VERSION}/features.json\" ))\n " ,
209
113
" features = feature_metadata[\" feature_sets\" ][\" medium\" ] # use \" all\" for better performance. Requires more RAM.\n " ,
210
- " train = pd.read_parquet(f\" {DATA_VERSION}/train_int8 .parquet\" , columns=[\" era\" ]+features+[\" target\" ])\n " ,
114
+ " train = pd.read_parquet(f\" {DATA_VERSION}/train .parquet\" , columns=[\" era\" ]+features+[\" target\" ])\n " ,
211
115
" \n " ,
212
116
" # For better models, join train and validation data and train on all of it.\n " ,
213
117
" # This would cause diagnostics to be misleading though.\n " ,
214
- " # napi.download_dataset(f\" {DATA_VERSION}/validation_int8 .parquet\" ); \n " ,
215
- " # validation = pd.read_parquet(f\" {DATA_VERSION}/validation_int8 .parquet\" , columns=[\" era\" ]+features+[\" target\" ])\n " ,
118
+ " # napi.download_dataset(f\" {DATA_VERSION}/validation .parquet\" )\n " ,
119
+ " # validation = pd.read_parquet(f\" {DATA_VERSION}/validation .parquet\" , columns=[\" era\" ]+features+[\" target\" ])\n " ,
216
120
" # validation = validation[validation[\" data_type\" ] == \" validation\" ] # drop rows which don't have targets yet\n " ,
217
121
" # train = pd.concat([train, validation])\n " ,
218
122
" \n " ,
258
162
}
259
163
],
260
164
"metadata" : {
165
+ "colab" : {
166
+ "provenance" : []
167
+ },
261
168
"kernelspec" : {
262
169
"display_name" : " venv" ,
263
170
"language" : " python" ,
275
182
"pygments_lexer" : " ipython3" ,
276
183
"version" : " 3.10.12"
277
184
},
278
- "orig_nbformat" : 4 ,
279
- "colab" : {
280
- "provenance" : []
281
- }
185
+ "orig_nbformat" : 4
282
186
},
283
187
"nbformat" : 4 ,
284
188
"nbformat_minor" : 0
285
- }
189
+ }
0 commit comments