Skip to content

Commit

Permalink
Merge pull request #122 from luabida/update-notebooks
Browse files Browse the repository at this point in the history
fix(notebook): update download references in notebooks
  • Loading branch information
fccoelho authored Mar 22, 2023
2 parents 81ed552 + 9f76437 commit 1a45981
Show file tree
Hide file tree
Showing 19 changed files with 1,964 additions and 460 deletions.
79 changes: 24 additions & 55 deletions pysus/Notebooks/Analyzing SIH.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,41 +6,31 @@
"metadata": {},
"outputs": [],
"source": [
"from ftplib import FTP\n",
"import os\n",
"import pandas as pd\n",
"from pysus.online_data import parquets_to_dataframe\n",
"from pysus.online_data.SIH import download"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"outputs": [],
"source": [
"%pylab inline"
"TO = download('to', 2009, 10)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"dfTO = download('to', 2009, 10)"
"dfTO = parquets_to_dataframe(TO)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 8,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -221,31 +211,24 @@
"3 170000 2009 10 01 25053117002450 1709100918481 1 \n",
"4 170000 2009 10 01 25053117002450 1709100918492 1 \n",
"\n",
" CEP MUNIC_RES NASC ... CID_ASSO CID_MORTE \\\n",
"0 77270000 172100 19731116 ... \n",
"1 77700000 170930 19880312 ... \n",
"2 77015202 172100 20070306 ... \n",
"3 77650000 171320 19311004 ... \n",
"4 77015202 172100 20010705 ... \n",
" CEP MUNIC_RES NASC ... CID_ASSO CID_MORTE COMPLEX FINANC \\\n",
"0 77270000 172100 19731116 ... 02 06 \n",
"1 77700000 170930 19880312 ... 02 06 \n",
"2 77015202 172100 20070306 ... 02 06 \n",
"3 77650000 171320 19311004 ... 03 06 \n",
"4 77015202 172100 20010705 ... 02 06 \n",
"\n",
" COMPLEX FINANC FAEC_TP REGCT RACA_COR ETNIA SEQUENCIA \\\n",
"0 02 06 0000 99 566 \n",
"1 02 06 0000 03 567 \n",
"2 02 06 0000 03 710 \n",
"3 03 06 0000 03 711 \n",
"4 02 06 0000 99 712 \n",
"\n",
" REMESSA \n",
"0 HE17000001N200910.DTS \n",
"1 HE17000001N200910.DTS \n",
"2 HE17000001N200910.DTS \n",
"3 HE17000001N200910.DTS \n",
"4 HE17000001N200910.DTS \n",
" FAEC_TP REGCT RACA_COR ETNIA SEQUENCIA REMESSA \n",
"0 0000 99 566 HE17000001N200910.DTS \n",
"1 0000 03 567 HE17000001N200910.DTS \n",
"2 0000 03 710 HE17000001N200910.DTS \n",
"3 0000 03 711 HE17000001N200910.DTS \n",
"4 0000 99 712 HE17000001N200910.DTS \n",
"\n",
"[5 rows x 86 columns]"
]
},
"execution_count": 5,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -256,27 +239,13 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"dfTO.to_csv('SIH_TO_10_2009.csv.gz', compression='gzip')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -287,7 +256,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -301,9 +270,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
87 changes: 44 additions & 43 deletions pysus/Notebooks/Analyzing SIM.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2020-02-10T13:59:53.611980Z",
Expand All @@ -19,6 +19,7 @@
},
"outputs": [],
"source": [
"from pysus.online_data import parquets_to_dataframe\n",
"from pysus.online_data.SIM import download\n",
"from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator\n",
"import base64"
Expand All @@ -33,7 +34,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2020-02-10T14:00:16.665329Z",
Expand Down Expand Up @@ -357,55 +358,55 @@
"</div>"
],
"text/plain": [
" CONTADOR ORIGEM TIPOBITO DTOBITO HORAOBITO NATURAL DTNASC IDADE \\\n",
"0 1 1 2 28022010 0400 19031945 464 \n",
"1 2 1 2 15062010 1900 828 22091978 431 \n",
"2 3 1 2 20032010 2240 04121929 480 \n",
"3 4 1 2 10052010 1100 16011928 482 \n",
"4 5 1 2 07062010 0000 01071949 460 \n",
"... ... ... ... ... ... ... ... ... \n",
"10937 10938 1 2 17042010 10101942 467 \n",
"10938 10939 1 2 04062010 05071939 470 \n",
"10939 10940 1 2 12022010 29111926 483 \n",
"10940 10941 1 2 27042010 14041917 493 \n",
"10941 10942 1 2 02052010 1700 12091954 455 \n",
" CONTADOR ORIGEM TIPOBITO DTOBITO HORAOBITO NATURAL DTNASC \\\n",
"0 1 1 2 28022010 0400 19031945 \n",
"1 2 1 2 15062010 1900 828 22091978 \n",
"2 3 1 2 20032010 2240 04121929 \n",
"3 4 1 2 10052010 1100 16011928 \n",
"4 5 1 2 07062010 0000 01071949 \n",
"... ... ... ... ... ... ... ... \n",
"10937 10938 1 2 17042010 10101942 \n",
"10938 10939 1 2 04062010 05071939 \n",
"10939 10940 1 2 12022010 29111926 \n",
"10940 10941 1 2 27042010 14041917 \n",
"10941 10942 1 2 02052010 1700 12091954 \n",
"\n",
" SEXO RACACOR ... DTCADASTRO ATESTANTE FONTEINV DTRECEBIM UFINFORM \\\n",
"0 2 ... 26032010 2 31032010 \n",
"1 2 4 ... 22062010 5 8 06072010 \n",
"2 1 1 ... 28062010 1 22072010 \n",
"3 1 ... 28062010 2 17052010 \n",
"4 1 4 ... 20072010 2 22072010 \n",
"... ... ... ... ... ... ... ... ... \n",
"10937 1 ... 03032011 2 03032011 \n",
"10938 1 ... 03032011 2 15032011 \n",
"10939 1 ... 03032011 2 11032011 \n",
"10940 2 ... 03032011 2 14032011 \n",
"10941 2 ... 01032011 2 11032011 \n",
" IDADE SEXO RACACOR ... DTCADASTRO ATESTANTE FONTEINV DTRECEBIM \\\n",
"0 464 2 ... 26032010 2 31032010 \n",
"1 431 2 4 ... 22062010 5 8 06072010 \n",
"2 480 1 1 ... 28062010 1 22072010 \n",
"3 482 1 ... 28062010 2 17052010 \n",
"4 460 1 4 ... 20072010 2 22072010 \n",
"... ... ... ... ... ... ... ... ... \n",
"10937 467 1 ... 03032011 2 03032011 \n",
"10938 470 1 ... 03032011 2 15032011 \n",
"10939 483 1 ... 03032011 2 11032011 \n",
"10940 493 2 ... 03032011 2 14032011 \n",
"10941 455 2 ... 01032011 2 11032011 \n",
"\n",
" CB_PRE MORTEPARTO DTCADINF TPOBITOCOR DTCADINV \n",
"0 E149 \n",
"1 I698 9 21102010 \n",
"2 I219 \n",
"3 E149 \n",
"4 J969 \n",
"... ... ... ... ... ... \n",
"10937 I519 \n",
"10938 C61 \n",
"10939 I519 \n",
"10940 I519 \n",
"10941 C349 \n",
" UFINFORM CB_PRE MORTEPARTO DTCADINF TPOBITOCOR DTCADINV \n",
"0 E149 \n",
"1 I698 9 21102010 \n",
"2 I219 \n",
"3 E149 \n",
"4 J969 \n",
"... ... ... ... ... ... ... \n",
"10937 I519 \n",
"10938 C61 \n",
"10939 I519 \n",
"10940 I519 \n",
"10941 C349 \n",
"\n",
"[10942 rows x 58 columns]"
]
},
"execution_count": 2,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = download('SE', 2010)\n",
"df = parquets_to_dataframe(download('SE', 2010))\n",
"df"
]
},
Expand Down Expand Up @@ -485,7 +486,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -499,7 +500,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
"version": "3.9.16"
},
"latex_envs": {
"LaTeX_envs_menu_present": true,
Expand Down Expand Up @@ -563,5 +564,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
Loading

0 comments on commit 1a45981

Please sign in to comment.