We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9f62039 commit 4e488b0Copy full SHA for 4e488b0
docs/content/pre-process-datasets.ipynb
@@ -214,7 +214,8 @@
214
" # Get just the first few (each file is 11GB so this should be enough for a large dataset)\n",
215
" data_files=[\n",
216
" \"00.jsonl.zst\",\n",
217
- " \"01.jsonl.zst\" \"02.jsonl.zst\",\n",
+ " \"01.jsonl.zst\",\n",
218
+ " \"02.jsonl.zst\",\n",
219
" \"03.jsonl.zst\",\n",
220
" \"04.jsonl.zst\",\n",
221
" \"05.jsonl.zst\",\n",
@@ -226,7 +227,8 @@
226
227
" tokenizer_name=\"EleutherAI/gpt-neox-20b\",\n",
228
229
230
231
232
233
234
0 commit comments