diff --git a/your-project/Ironhack_week 6_project_HDI.pptx b/your-project/Ironhack_week 6_project_HDI.pptx new file mode 100644 index 0000000..84d0ae8 Binary files /dev/null and b/your-project/Ironhack_week 6_project_HDI.pptx differ diff --git a/your-project/dt selection and cleaning criterias/backups/elements to drop in dfs - Copy.csv b/your-project/dt selection and cleaning criterias/backups/elements to drop in dfs - Copy.csv new file mode 100644 index 0000000..5d5ff5c --- /dev/null +++ b/your-project/dt selection and cleaning criterias/backups/elements to drop in dfs - Copy.csv @@ -0,0 +1,79 @@ +East Asia and Pacific +Europe and Central Asia +Latin America and Caribbean +Middle East and North Africa +South Asia +Sub-Saharan Africa +World +European Union (28 countries) +Reunion +Western Sahara +Arab World +Caribbean small states +Central Europe and the Baltics +Early-demographic dividend +East Asia & Pacific +East Asia & Pacific (IDA & IBRD) +East Asia & Pacific (excluding high income) +Euro area +Europe & Central Asia +Europe & Central Asia (IDA & IBRD) +Europe & Central Asia (excluding high income) +European Union +Fragile and conflict affected situations +Heavily indebted poor countries (HIPC) +High income +IBRD only +IDA & IBRD total +IDA blend +IDA only +IDA total +Late-demographic dividend +Latin America & Caribbean +Latin America & Caribbean (IDA & IBRD) +Latin America & Caribbean (excluding high income) +Least developed countries: UN classification +Low & middle income +Low income +Lower middle income +Middle East & North Africa +Middle East & North Africa (IDA & IBRD) +Middle East & North Africa (excluding high income) +Middle income +OECD members +Other small states +Pacific island small states +Post-demographic dividend +Pre-demographic dividend +Saint Martin (French part) +Small states +South Asia (IDA & IBRD) +Sub-Saharan Africa (IDA & IBRD) +Sub-Saharan Africa (excluding high income) +Upper middle income +Andean Latin America +Australasia +Central Asia +Central Europe +"Central Europe, Eastern Europe, and Central Asia" +Central Latin America +Central Sub-Saharan Africa +East Asia +Eastern Europe +Eastern Sub-Saharan Africa +High SDI +High-income +High-income Asia Pacific +High-middle SDI +Low SDI +Low-middle SDI +Middle SDI +North Africa and Middle East +Northern Ireland +Southeast Asia +"Southeast Asia, East Asia, and Oceania" +Southern Latin America +Southern Sub-Saharan Africa +Tropical Latin America +Western Europe +Western Sub-Saharan Africa diff --git a/your-project/dt selection and cleaning criterias/elements to drop in dfs.csv b/your-project/dt selection and cleaning criterias/elements to drop in dfs.csv new file mode 100644 index 0000000..5d5ff5c --- /dev/null +++ b/your-project/dt selection and cleaning criterias/elements to drop in dfs.csv @@ -0,0 +1,79 @@ +East Asia and Pacific +Europe and Central Asia +Latin America and Caribbean +Middle East and North Africa +South Asia +Sub-Saharan Africa +World +European Union (28 countries) +Reunion +Western Sahara +Arab World +Caribbean small states +Central Europe and the Baltics +Early-demographic dividend +East Asia & Pacific +East Asia & Pacific (IDA & IBRD) +East Asia & Pacific (excluding high income) +Euro area +Europe & Central Asia +Europe & Central Asia (IDA & IBRD) +Europe & Central Asia (excluding high income) +European Union +Fragile and conflict affected situations +Heavily indebted poor countries (HIPC) +High income +IBRD only +IDA & IBRD total +IDA blend +IDA only +IDA total +Late-demographic dividend +Latin America & Caribbean +Latin America & Caribbean (IDA & IBRD) +Latin America & Caribbean (excluding high income) +Least developed countries: UN classification +Low & middle income +Low income +Lower middle income +Middle East & North Africa +Middle East & North Africa (IDA & IBRD) +Middle East & North Africa (excluding high income) +Middle income +OECD members +Other small states +Pacific island small states +Post-demographic dividend +Pre-demographic dividend +Saint Martin (French part) +Small states +South Asia (IDA & IBRD) +Sub-Saharan Africa (IDA & IBRD) +Sub-Saharan Africa (excluding high income) +Upper middle income +Andean Latin America +Australasia +Central Asia +Central Europe +"Central Europe, Eastern Europe, and Central Asia" +Central Latin America +Central Sub-Saharan Africa +East Asia +Eastern Europe +Eastern Sub-Saharan Africa +High SDI +High-income +High-income Asia Pacific +High-middle SDI +Low SDI +Low-middle SDI +Middle SDI +North Africa and Middle East +Northern Ireland +Southeast Asia +"Southeast Asia, East Asia, and Oceania" +Southern Latin America +Southern Sub-Saharan Africa +Tropical Latin America +Western Europe +Western Sub-Saharan Africa diff --git a/your-project/dt selection and cleaning criterias/elements to drop in dfs.xlsx b/your-project/dt selection and cleaning criterias/elements to drop in dfs.xlsx new file mode 100644 index 0000000..c92eac1 Binary files /dev/null and b/your-project/dt selection and cleaning criterias/elements to drop in dfs.xlsx differ diff --git a/your-project/dt selection and cleaning criterias/range years per var.xlsx b/your-project/dt selection and cleaning criterias/range years per var.xlsx new file mode 100644 index 0000000..6d1b36f Binary files /dev/null and b/your-project/dt selection and cleaning criterias/range years per var.xlsx differ diff --git a/your-project/from python to excel/OLSmodel_selected_vars.xlsx b/your-project/from python to excel/OLSmodel_selected_vars.xlsx new file mode 100644 index 0000000..d4ace4b Binary files /dev/null and b/your-project/from python to excel/OLSmodel_selected_vars.xlsx differ diff --git a/your-project/from python to excel/corr_kendaltau_matrix.png b/your-project/from python to excel/corr_kendaltau_matrix.png new file mode 100644 index 0000000..f050b81 Binary files /dev/null and b/your-project/from python to excel/corr_kendaltau_matrix.png differ diff --git a/your-project/from python to excel/corr_kendaltau_matrix.xlsx b/your-project/from python to excel/corr_kendaltau_matrix.xlsx new file mode 100644 index 0000000..fa0ebcc Binary files /dev/null and b/your-project/from python to excel/corr_kendaltau_matrix.xlsx differ diff --git a/your-project/from python to excel/corr_pearson_matrix.png b/your-project/from python to excel/corr_pearson_matrix.png new file mode 100644 index 0000000..f050b81 Binary files /dev/null and b/your-project/from python to excel/corr_pearson_matrix.png differ diff --git a/your-project/from python to excel/corr_pearson_matrix.xlsx b/your-project/from python to excel/corr_pearson_matrix.xlsx new file mode 100644 index 0000000..59f410f Binary files /dev/null and b/your-project/from python to excel/corr_pearson_matrix.xlsx differ diff --git a/your-project/from python to excel/corr_pearson_matrix_analyzed.xlsx b/your-project/from python to excel/corr_pearson_matrix_analyzed.xlsx new file mode 100644 index 0000000..4f1848b Binary files /dev/null and b/your-project/from python to excel/corr_pearson_matrix_analyzed.xlsx differ diff --git a/your-project/from python to excel/corr_pearson_matrix_analyzed_final.xlsx b/your-project/from python to excel/corr_pearson_matrix_analyzed_final.xlsx new file mode 100644 index 0000000..c9291fc Binary files /dev/null and b/your-project/from python to excel/corr_pearson_matrix_analyzed_final.xlsx differ diff --git a/your-project/from python to excel/corr_spearman_matrix.png b/your-project/from python to excel/corr_spearman_matrix.png new file mode 100644 index 0000000..f050b81 Binary files /dev/null and b/your-project/from python to excel/corr_spearman_matrix.png differ diff --git a/your-project/from python to excel/corr_spearman_matrix.xlsx b/your-project/from python to excel/corr_spearman_matrix.xlsx new file mode 100644 index 0000000..7b17f93 Binary files /dev/null and b/your-project/from python to excel/corr_spearman_matrix.xlsx differ diff --git a/your-project/from python to excel/df_OLS_all.xlsx b/your-project/from python to excel/df_OLS_all.xlsx new file mode 100644 index 0000000..bac0968 Binary files /dev/null and b/your-project/from python to excel/df_OLS_all.xlsx differ diff --git a/your-project/from python to excel/df_OLSmodel_neg_vars.xlsx b/your-project/from python to excel/df_OLSmodel_neg_vars.xlsx new file mode 100644 index 0000000..c56ead3 Binary files /dev/null and b/your-project/from python to excel/df_OLSmodel_neg_vars.xlsx differ diff --git a/your-project/from python to excel/df_OLSmodel_pos_vars.xlsx b/your-project/from python to excel/df_OLSmodel_pos_vars.xlsx new file mode 100644 index 0000000..e9c0f88 Binary files /dev/null and b/your-project/from python to excel/df_OLSmodel_pos_vars.xlsx differ diff --git a/your-project/from python to excel/df_analysis - Copy (2).xlsx b/your-project/from python to excel/df_analysis - Copy (2).xlsx new file mode 100644 index 0000000..6c76624 Binary files /dev/null and b/your-project/from python to excel/df_analysis - Copy (2).xlsx differ diff --git a/your-project/from python to excel/df_analysis - Copy.xlsx b/your-project/from python to excel/df_analysis - Copy.xlsx new file mode 100644 index 0000000..4c2a771 Binary files /dev/null and b/your-project/from python to excel/df_analysis - Copy.xlsx differ diff --git a/your-project/from python to excel/df_analysis.xlsx b/your-project/from python to excel/df_analysis.xlsx new file mode 100644 index 0000000..1672c76 Binary files /dev/null and b/your-project/from python to excel/df_analysis.xlsx differ diff --git a/your-project/from python to excel/df_analysis_filtr countries.xlsx b/your-project/from python to excel/df_analysis_filtr countries.xlsx new file mode 100644 index 0000000..1beaa5b Binary files /dev/null and b/your-project/from python to excel/df_analysis_filtr countries.xlsx differ diff --git a/your-project/from python to excel/diff_pearson_spearman.png b/your-project/from python to excel/diff_pearson_spearman.png new file mode 100644 index 0000000..f050b81 Binary files /dev/null and b/your-project/from python to excel/diff_pearson_spearman.png differ diff --git a/your-project/from python to excel/diff_pearson_spearman.xlsx b/your-project/from python to excel/diff_pearson_spearman.xlsx new file mode 100644 index 0000000..e1c8273 Binary files /dev/null and b/your-project/from python to excel/diff_pearson_spearman.xlsx differ diff --git a/your-project/from python to excel/hdi.xlsx b/your-project/from python to excel/hdi.xlsx new file mode 100644 index 0000000..ebb66df Binary files /dev/null and b/your-project/from python to excel/hdi.xlsx differ diff --git a/your-project/from python to excel/old/corr_matrix_analyzed.csv b/your-project/from python to excel/old/corr_matrix_analyzed.csv new file mode 100644 index 0000000..3b577d5 --- /dev/null +++ b/your-project/from python to excel/old/corr_matrix_analyzed.csv @@ -0,0 +1,27 @@ +,HDI_x,Trust in others (%)_x,Avg Height (cm),"Crude divorce rate (per 1,000 inhabitants)",Forest area (% of land area),Gini coeff," homicides per 100,000",HDI_y,Human Rights Protection Scores,Military expenditure (% of GDP),Schizophrenia (%),Bipolar disorder (%),Eating disorders (%),Anxiety disorders (%),Drug use disorders (%),Depression (%),Alcohol use disorders (%),Trust in others (%)_y,Share of Top 1% in Pre-tax national income (%),Share with Mental and Substance disorders,Suicide rate,Taxes goods and services (% GDP),Corruption Perception Index,Freedom score,Unnamed: 4,Unnamed: 5 +Year,0.241485692,-0.1133278,0.573523407,0.187590488,-0.010674767,-0.0341413,-0.021271235,0.241485692,0.203362439,-0.200753455,0.039163261,0.012272871,0.086792573,0.012516933,0.058543368,-0.035426924,0.017216598,-0.1133278,-0.299300056,-0.026695728,-0.024162815,0.242618849,0.002761237,0.028792547,, +HDI_x,1,0.364147175,0.679586552,0.360343436,0.017859236,-0.24438168,-0.123210242,1,0.574652339,0.006580022,0.674009947,0.566776423,0.756705718,0.492064749,0.436412736,-0.032316181,0.181838097,0.364147175,-0.282966661,0.316391479,0.132137991,0.53058427,0.730664763,0.466437373,, +Trust in others (%)_x,0.364147175,1,0.183520186,0.380432039,0.059003964,-0.25114818,-0.291553915,0.364147175,0.327147279,-0.024774344,0.598279977,0.035548283,0.362864846,0.26243603,0.244066675,0.275973869,-0.019843504,1,-0.556779563,0.28285192,0.198024619,0.087522224,0.469979977,0.191240379,, +Avg Height (cm),0.679586552,0.183520186,1,0.296429092,-0.084998081,-0.373294335,-0.149509612,0.679586552,0.422030845,0.004400259,0.408599424,0.470052274,0.553224568,0.381984164,0.255069314,0.1160764,0.343572469,0.183520186,-0.482569794,0.23972616,0.411875136,0.560676233,,0.390749223,, +"Crude divorce rate (per 1,000 inhabitants)",0.360343436,0.380432039,0.296429092,1,0.057778496,-0.121923937,-0.02942117,0.360343436,0.219003267,-0.098345192,0.208378236,0.035972725,0.178843895,0.107316774,0.233955339,0.39896291,0.378287693,0.380432039,0.115094405,0.2064133,0.459912031,0.032996761,0.322231825,0.250873665,, +Forest area (% of land area),0.017859236,0.059003964,-0.084998081,0.057778496,1,-0.20193338,0.084216435,0.017859236,0.149006286,-0.223658323,0.047461362,-0.143540167,0.002696841,-0.158967311,-0.23942587,-0.231736159,0.093726091,0.059003964,0.007867737,-0.271104931,0.138088413,-0.075006703,0.070674926,0.21573186,, +Gini coeff,-0.24438168,-0.25114818,-0.373294335,-0.121923937,-0.20193338,1,0.289736397,-0.24438168,-0.477218762,0.220046011,0.22556246,0.024362598,0.108962308,0.123999825,0.422989479,0.100380542,-0.233731406,-0.25114818,0.500519379,0.364044881,-0.644111178,-0.145182328,-0.448741565,-0.302765696,, +"homicides per 100,000",-0.123210242,-0.291553915,-0.149509612,-0.02942117,0.084216435,0.289736397,1,-0.123210242,-0.220965168,-0.071042546,-0.254795877,0.049271808,-0.12708669,-0.160388757,-0.10246824,-0.119867022,0.298205394,-0.291553915,0.47933891,-0.183156151,0.019330304,-0.064488343,-0.184605825,-0.006119172,, +HDI_y,1,0.364147175,0.679586552,0.360343436,0.017859236,-0.24438168,-0.123210242,1,0.574652339,0.006580022,0.674009947,0.566776423,0.756705718,0.492064749,0.436412736,-0.032316181,0.181838097,0.364147175,-0.282966661,0.316391479,0.132137991,0.53058427,0.730664763,0.466437373,, +Human Rights Protection Scores,0.574652339,0.327147279,0.422030845,0.219003267,0.149006286,-0.477218762,-0.220965168,0.574652339,1,-0.215125137,0.446845268,0.294289615,0.538688704,0.301746182,0.087369831,-0.034824535,0.090215995,0.327147279,-0.444558754,0.131627209,0.146100679,0.360253171,0.76901164,0.610881972,, +Military expenditure (% of GDP),0.006580022,-0.024774344,0.004400259,-0.098345192,-0.223658323,0.220046011,-0.071042546,0.006580022,-0.215125137,1,-0.031946061,0.005671509,-0.037906844,0.046656551,0.169672056,0.070286631,-0.1316979,-0.024774344,0.197083128,0.08457278,-0.116901188,-0.157341477,-0.098757295,-0.231890611,, +Schizophrenia (%),0.674009947,0.598279977,0.408599424,0.208378236,0.047461362,0.22556246,-0.254795877,0.674009947,0.446845268,-0.031946061,1,0.19845551,0.668815546,0.469324663,0.475202689,0.167477532,-0.059206939,0.598279977,-0.382558146,0.387764526,-0.029306353,0.292762796,0.661736975,0.41231483,, +Bipolar disorder (%),0.566776423,0.035548283,0.470052274,0.035972725,-0.143540167,0.024362598,0.049271808,0.566776423,0.294289615,0.005671509,0.19845551,1,0.708055326,0.663745285,0.363870618,0.148557216,0.079694317,0.035548283,-0.133592808,0.601410519,-0.138699411,0.357258258,0.500708653,0.3392874,, +Eating disorders (%),0.756705718,0.362864846,0.553224568,0.178843895,0.002696841,0.108962308,-0.12708669,0.756705718,0.538688704,-0.037906844,0.668815546,0.708055326,1,0.698026136,0.494187478,0.24071598,0.03525434,0.362864846,-0.383193402,0.603767547,-0.061615865,0.362218023,0.757939261,0.504648966,, +Anxiety disorders (%),0.492064749,0.26243603,0.381984164,0.107316774,-0.158967311,0.123999825,-0.160388757,0.492064749,0.301746182,0.046656551,0.469324663,0.663745285,0.698026136,1,0.61607729,0.379011551,-0.136502328,0.26243603,-0.18091977,0.889176765,-0.163314088,0.242135511,0.482896043,0.316328004,, +Drug use disorders (%),0.436412736,0.244066675,0.255069314,0.233955339,-0.23942587,0.422989479,-0.10246824,0.436412736,0.087369831,0.169672056,0.475202689,0.363870618,0.494187478,0.61607729,1,0.366538754,-0.113244921,0.244066675,0.209063646,0.642753628,-0.073924905,0.060183932,0.2761726,0.040610789,, +Depression (%),-0.032316181,0.275973869,0.1160764,0.39896291,-0.231736159,0.100380542,-0.119867022,-0.032316181,-0.034824535,0.070286631,0.167477532,0.148557216,0.24071598,0.379011551,0.366538754,1,0.050145561,0.275973869,-0.119800923,0.652032808,0.27413125,0.008391144,0.117825578,-0.024968212,, +Alcohol use disorders (%),0.181838097,-0.019843504,0.343572469,0.378287693,0.093726091,-0.233731406,0.298205394,0.181838097,0.090215995,-0.1316979,-0.059206939,0.079694317,0.03525434,-0.136502328,-0.113244921,0.050145561,1,-0.019843504,0.212948777,-0.136007712,0.63072996,0.316290722,0.080357263,0.151149236,, +Trust in others (%)_y,0.364147175,1,0.183520186,0.380432039,0.059003964,-0.25114818,-0.291553915,0.364147175,0.327147279,-0.024774344,0.598279977,0.035548283,0.362864846,0.26243603,0.244066675,0.275973869,-0.019843504,1,-0.556779563,0.28285192,0.198024619,0.087522224,0.469979977,0.191240379,, +Share of Top 1% in Pre-tax national income (%),-0.282966661,-0.556779563,-0.482569794,0.115094405,0.007867737,0.500519379,0.47933891,-0.282966661,-0.444558754,0.197083128,-0.382558146,-0.133592808,-0.383193402,-0.18091977,0.209063646,-0.119800923,0.212948777,-0.556779563,1,-0.133697441,-0.149072349,-0.211245057,-0.679953527,-0.447243614,, +Share with Mental and Substance disorders,0.316391479,0.28285192,0.23972616,0.2064133,-0.271104931,0.364044881,-0.183156151,0.316391479,0.131627209,0.08457278,0.387764526,0.601410519,0.603767547,0.889176765,0.642753628,0.652032808,-0.136007712,0.28285192,-0.133697441,1,-0.059454176,0.159548326,0.386520289,0.191683639,, +Suicide rate,0.132137991,0.198024619,0.411875136,0.459912031,0.138088413,-0.644111178,0.019330304,0.132137991,0.146100679,-0.116901188,-0.029306353,-0.138699411,-0.061615865,-0.163314088,-0.073924905,0.27413125,0.63072996,0.198024619,-0.149072349,-0.059454176,1,0.308622874,,0.033810065,, +Taxes goods and services (% GDP),0.53058427,0.087522224,0.560676233,0.032996761,-0.075006703,-0.145182328,-0.064488343,0.53058427,0.360253171,-0.157341477,0.292762796,0.357258258,0.362218023,0.242135511,0.060183932,0.008391144,0.316290722,0.087522224,-0.211245057,0.159548326,0.308622874,1,0.314912981,0.341982394,, +Corruption Perception Index,0.730664763,0.469979977,,0.322231825,0.070674926,-0.448741565,-0.184605825,0.730664763,0.76901164,-0.098757295,0.661736975,0.500708653,0.757939261,0.482896043,0.2761726,0.117825578,0.080357263,0.469979977,-0.679953527,0.386520289,,0.314912981,1,0.664157905,, +Freedom score,0.466437373,0.191240379,0.390749223,0.250873665,0.21573186,-0.302765696,-0.006119172,0.466437373,0.610881972,-0.231890611,0.41231483,0.3392874,0.504648966,0.316328004,0.040610789,-0.024968212,0.151149236,0.191240379,-0.447243614,0.191683639,0.033810065,0.341982394,0.664157905,1,, +,,,,,,,,,,,,,,,,,,,,,,,,,, diff --git a/your-project/from python to excel/~$corr_pearson_matrix_analyzed.xlsx b/your-project/from python to excel/~$corr_pearson_matrix_analyzed.xlsx new file mode 100644 index 0000000..f5f6ec2 Binary files /dev/null and b/your-project/from python to excel/~$corr_pearson_matrix_analyzed.xlsx differ diff --git a/your-project/from python to excel/~$df_OLS_all.xlsx b/your-project/from python to excel/~$df_OLS_all.xlsx new file mode 100644 index 0000000..f5f6ec2 Binary files /dev/null and b/your-project/from python to excel/~$df_OLS_all.xlsx differ diff --git a/your-project/py files/ironhack_w6_HDI.py b/your-project/py files/ironhack_w6_HDI.py new file mode 100644 index 0000000..b4dd5ef --- /dev/null +++ b/your-project/py files/ironhack_w6_HDI.py @@ -0,0 +1,242 @@ +# -*- coding: utf-8 -*- +""" +Created on Wed Aug 5 15:20:17 2020 + +@author: joaopq +""" + +# HDI: is development going the right way? + +''' +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ IMPORTING LIBRARIES ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +''' +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +import math +import scipy.stats +import more_itertools +import statsmodels.api as sm +import sys +from glob import glob + +''' +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ CREATING DATAFRAMES BELOW ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +''' + +#DEPENDENT var: HUMAN DEVELOPMENT INDEX (HDI) +hdi=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\human-development-index.csv") + +''' +#INDEPENDENT vars: +mental_disorders=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\share-with-mental-and-substance-disorders.csv") +#schizophrenia=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\share-of-population-with-schizophrenia.csv") +#anxiety=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\share-with-anxiety-disorders.csv") +forest_area=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\forest-area-percent.csv") +#military_exp=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\military-expenditure-as-share-of-gdp.csv") +trust=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\self-reported-trust-attitudes.csv") +divorces=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\divorces-per-1000-people.csv") +#freedom_score=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\world-map-of-the-freedom-of-the-press-status.csv") +perceived_corruption=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\TI-corruption-perception-index.csv") +gini=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\gini-coefficient-equivalized-income-chartbook.csv") +suicides=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\suicide-rates-by-country.csv") +''' +''' +not working: +human_height=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\average-height-by-year-of-birth.csv") +''' +'''IMPORT ALL FILES IN FOLDER:''' +from glob import glob +filenames = glob("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\*.csv") +frames=[pd.read_csv(f) for f in filenames] + + +'''Concatenate all the dataframes with columns YEAR and ENTITY''' + +#frames that we'll set indexes +#frames=[mental_disorders, divorces,freedom_score,perceived_corruption, forest_area,gini, suicides] + +elements_to_drop=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\dt selection and cleaning criterias\\elements to drop in dfs.csv") +#print(elements_to_drop) + +def cleaner(x): + return x not in elements_to_drop + +#cleaning them and then merging the dfs +results = [] +for df in frames: + #condition = df['Entity'].apply(cleaner) + #df = df[condition] + df.drop(['Code'], axis=1) + #df.set_index(["Entity", "Year"], inplace=True) + results.append(df) +#print(results[5]) + +condition = hdi['Entity'].apply(cleaner) +hdi = hdi[condition] +hdi.to_excel("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\from python to excel\\hdi.xlsx") + +hdi=hdi.drop(['Code'], axis=1) +df_analysis = hdi + +for x in range(len(results)): + #print(df_analysis.columns, len(df_analysis),results[x].dtypes) + df_analysis = df_analysis.merge(results[x], on=['Entity', 'Year'], how='outer') + #if len(df_analysis) == 0: + #print(x) + +condition = df_analysis['Entity'].apply(cleaner) +df_analysis = df_analysis[condition] + +#print("status: df_analysis") +#print(df_analysis.head()) +#print(df_analysis.columns) +df_analysis.to_excel("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\from python to excel\\df_analysis.xlsx") + + +''' +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ INFERENTIAL STATISTICS ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +''' + +'''. +▒▒▒▒ Do correlation matrixes to filter worthwhile dependent variables.▒▒▒▒▒▒▒▒ +''' +corr_methods=['pearson','spearman','kendall'] +path="E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\from python to excel\\" +excel_format="xlsx" + +corr_pearson_matrix=df_analysis.corr(method='pearson') +corr_pearson_matrix.drop(['HDI_y', 'Year'], axis=1) +corr_pearson_matrix.to_excel(path+"corr_pearson_matrix."+excel_format) + +corr_spearman_matrix=df_analysis.corr(method='spearman') +corr_spearman_matrix.drop(['HDI_y', 'Year'], axis=1) +corr_spearman_matrix.to_excel(path+"corr_spearman_matrix."+excel_format) + +corr_kendaltau_matrix=df_analysis.corr(method='kendall') +corr_kendaltau_matrix.drop(['HDI_y', 'Year'], axis=1) +corr_kendaltau_matrix.to_excel(path+"corr_kendaltau_matrix."+excel_format) + +#compute differences between correlation matrixes +diff_pearson_spearman=corr_pearson_matrix-corr_spearman_matrix +diff_pearson_spearman.drop(['HDI_y', 'Year'], axis=1) +diff_pearson_spearman.to_excel(path+"diff_pearson_spearman."+excel_format) +print("avg difference between pearson and spearman is: "+str(diff_pearson_spearman.mean().mean())) + + +''' +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ plotting ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +''' +sns.set() + +# Draw a heatmap with the numeric values in each cell +f, ax = plt.subplots(figsize=(16, 8)) +path_plots="E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\from python to images\\" +sns.heatmap(corr_pearson_matrix, linewidths=.5,cmap="RdBu_r") +plt.savefig(path+"corr_pearson_matrix.png") + + +#sns.heatmap(corr_spearman_matrix, annot=True, linewidths=.5,cmap="RdBu_r") +plt.savefig(path+"corr_spearman_matrix.png") + +#sns.heatmap(corr_kendaltau_matrix, annot=True, linewidths=.5,cmap="RdBu_r") +plt.savefig(path+"corr_kendaltau_matrix.png") + +#sns.heatmap(diff_pearson_spearman, linewidths=.5,cmap="RdBu_r") +plt.savefig(path+"diff_pearson_spearman.png") + + +''' +#Univariate statistics on each variable +for n in range(0,len(frames)): + + sns.distplot(frames[n], kde=False) + +print(mental_disorders.head()) + +print(sns.distplot(mental_disorders[3], kde=False)) +''' + + +''' +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ Do multilinear regression ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +''' +#▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +#▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ multilinear regression for ALL ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +print(df_analysis.columns) + +X_all=df_analysis[["Entity", "Year", "Annual hours worked per worker", "Avg Height (cm)" , "Live births per woman","Total population","Crude divorce rate (per 1,000 inhabitants)" ,"Forest area (% of land area)", "Gini coeff" ,"World Happiness Report","Interpersonal violence (homicides per 100,000)" , "Human Rights Protection Scores" ,"Military expenditure (% of GDP)", "Schizophrenia (%)","Bipolar disorder (%)", "Eating disorders (%)", "Anxiety disorders (%)","Drug use disorders (%)", "Depression (%)", "Alcohol use disorders (%)", "Trust in others (%)","Share of Top 1% in Pre-tax national income (%)" ,"Share with Mental and Substance disorders", "Suicide rate", "Taxes goods and services (% GDP)","Corruption Perception Index", "Freedom score"]] +X_all=X_all.fillna(0) +print(X_all.dtypes) +Y=df_analysis['HDI_x'].fillna(0) +print(Y) +OLSmodel_all_vars=sm.OLS(Y,X_all).fit() +print(OLSmodel_all_vars.summary()) + +df_OLS_all = pd.concat((OLSmodel_all_vars.params, OLSmodel_all_vars.tvalues), axis=1) +df_OLS_all.rename(columns={0: 'beta', 1: 't-values',2:'r2'}).to_excel(path+"df_OLS_all."+excel_format) + +#▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +#▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ multilinear regression for strongly correlated ▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +X_selected=df_analysis[["World Happiness Report", "Live births per woman", "Corruption Perception Index", "Avg Height (cm)", "Eating disorders (%)", "Human Rights Protection Scores", "Annual hours worked per worker", "Schizophrenia (%)", "Bipolar disorder (%)", "Taxes goods and services (% GDP)"]] +X_selected=X_selected.fillna(0) +print(X_all.dtypes) +Y=df_analysis['HDI_x'].fillna(0) +print(Y) +OLSmodel_selected_vars=sm.OLS(Y,X_selected).fit() +print(OLSmodel_selected_vars.summary()) + + +df_OLS_selected = pd.concat((OLSmodel_selected_vars.params, OLSmodel_selected_vars.tvalues), axis=1) +df_OLS_selected.rename(columns={0: 'beta', 1: 't-values',2:'r2'}).to_excel(path+"OLSmodel_selected_vars."+excel_format) + +#▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +#multilinear regression for strongly correlated var's with negative connotation +X_negative=df_analysis[["Schizophrenia (%)"," Live births per woman","Eating disorders (%)","Bipolar disorder (%)"]] +X_negative=X_negative.fillna(0) +print(X_all.dtypes) +Y=df_analysis['HDI_x'].fillna(0) +print(Y) +OLSmodel_neg_vars=sm.OLS(Y,X_negative).fit() +print(OLSmodel_neg_vars.summary()) + + +df_OLSmodel_neg_vars = pd.concat((OLSmodel_neg_vars.params, OLSmodel_neg_vars.tvalues), axis=1) +df_OLSmodel_neg_vars.rename(columns={0: 'beta', 1: 't-values',2:'r2'}).to_excel(path+"df_OLSmodel_neg_vars."+excel_format) + +#▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +#multilinear regression for strongly correlated var's with positive connotation + +X_positive=df_analysis[["World Happiness Report","Corruption Perception Index","Human Rights Protection Scores","Annual hours worked per worker","Taxes goods and services (% GDP)"]] +X_positive=X_positive.fillna(0) +print(X_all.dtypes) +Y=df_analysis['HDI_x'].fillna(0) +print(Y) +OLSmodel_pos_vars=sm.OLS(Y,X_positive).fit() +print(OLSmodel_pos_vars.summary()) + + +df_OLSmodel_pos_vars = pd.concat((OLSmodel_pos_vars.params, OLSmodel_pos_vars.tvalues), axis=1) +df_OLSmodel_pos_vars.rename(columns={0: 'beta', 1: 't-values',2:'r2'}).to_excel(path+"df_OLSmodel_pos_vars."+excel_format) + + +#▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ Comparing all R Squareds ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +print("R Squared of all vars is: "+str(OLSmodel_all_vars.rsquared)) +print("R Squared of selected vars is: "+str(OLSmodel_selected_vars.rsquared)) +print("R Squared of selected positive sentiment vars is: "+str(OLSmodel_pos_vars.rsquared)) +print("R Squared of selected negative sentiment vars "+str(OLSmodel_neg_vars.rsquared)) + +''' +X_selected=df_analysis[['Year','Cylinders','Fuel Barrels/Year','Combined MPG','Fuel Cost/Year']] +OLSmodel_all_vars=sm.OLS(Y,X).fit() +print(OLSmodel_selected_vars.summary()) +''' \ No newline at end of file diff --git a/your-project/py files/ironhack_w6_HDI_backup 2020 08 06.py b/your-project/py files/ironhack_w6_HDI_backup 2020 08 06.py new file mode 100644 index 0000000..39d2db7 --- /dev/null +++ b/your-project/py files/ironhack_w6_HDI_backup 2020 08 06.py @@ -0,0 +1,165 @@ +# -*- coding: utf-8 -*- +""" +Created on Wed Aug 5 15:20:17 2020 + +@author: joaopq +""" + +##Beggining of HDI: is HDI going a human way? + +''' +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ IMPORTING LIBRARIES ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +''' + +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +import math +import scipy.stats +import more_itertools +import statsmodels.api as sm +import sys +from glob import glob + +''' +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ CREATING DATAFRAMES BELOW ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +''' + +#DEPENDENT var: HUMAN DEVELOPMENT INDEX (HDI) +hdi=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\human-development-index.csv") + +''' +#INDEPENDENT vars: +mental_disorders=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\share-with-mental-and-substance-disorders.csv") +#schizophrenia=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\share-of-population-with-schizophrenia.csv") +#anxiety=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\share-with-anxiety-disorders.csv") +forest_area=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\forest-area-percent.csv") +#military_exp=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\military-expenditure-as-share-of-gdp.csv") +trust=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\self-reported-trust-attitudes.csv") +divorces=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\divorces-per-1000-people.csv") +#freedom_score=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\world-map-of-the-freedom-of-the-press-status.csv") +perceived_corruption=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\TI-corruption-perception-index.csv") +gini=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\gini-coefficient-equivalized-income-chartbook.csv") +suicides=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\suicide-rates-by-country.csv") +''' +''' +not working: +human_height=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\average-height-by-year-of-birth.csv") +''' +'''IMPORT ALL FILES IN FOLDER:''' +from glob import glob +filenames = glob("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\timeline\\*.csv") +frames=[pd.read_csv(f) for f in filenames] + + +'''Concatenate all the dataframes with columns YEAR and ENTITY''' + +#frames that we'll set indexes +#frames=[mental_disorders, divorces,freedom_score,perceived_corruption, forest_area,gini, suicides] + +elements_to_drop=pd.read_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\dt selection and cleaning criterias\\elements to drop in dfs.csv") +print(elements_to_drop) + +def cleaner(x): + return x not in elements_to_drop + +#cleaning them and then merging the dfs +results = [] +for df in frames: + condition = df['Entity'].apply(cleaner) + df = df[condition] + df=df.drop(['Code'], axis=1) + #df.set_index(["Entity", "Year"], inplace=True) + results.append(df) +print(results[5]) +condition = hdi['Entity'].apply(cleaner) +hdi = hdi[condition] +hdi=hdi.drop(['Code'], axis=1) +df_analysis = hdi + +for x in range(len(results)): + print(df_analysis.columns, len(df_analysis),results[x].dtypes) + df_analysis = df_analysis.merge(results[x], on=['Entity', 'Year'], how='outer') + if len(df_analysis) == 0: + print(x) + +print("status: df_analysis") +print(df_analysis.head()) +print(df_analysis.columns) +df_analysis.to_csv("E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\from python to excel\\df_analysis.csv") + + +''' +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ INFERENTIAL STATISTICS ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +''' + +'''. +Do correlation matrixes to filter worthwhile dependent variables. +''' +corr_methods=['pearson','spearman','kendall'] +path="E:\\2. Aprendizagem\\2.3. Ironhack\\2. Projects\\Week_6\\human development theme\\from python to excel\\" +excel_format="xlsx" + +corr_pearson_matrix=df_analysis.corr(method='pearson') +corr_pearson_matrix.to_excel(path+"corr_pearson_matrix."+excel_format) + +corr_spearman_matrix=df_analysis.corr(method='spearman') +corr_spearman_matrix.to_excel(path+"corr_spearman_matrix."+excel_format) + +corr_kendaltau_matrix=df_analysis.corr(method='kendall') +corr_kendaltau_matrix.to_excel(path+"corr_kendaltau_matrix."+excel_format) + +#compute differences between correlation matrixes +diff_pearson_spearman=corr_pearson_matrix-corr_spearman_matrix +diff_pearson_spearman.to_excel(path+"diff_pearson_spearman."+excel_format) + + +''' +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ plotting ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +''' +sns.set() + +# Draw a heatmap with the numeric values in each cell +f, ax = plt.subplots(figsize=(9, 6)) +sns.heatmap(corr_pearson_matrix, annot=True, linewidths=.5,cmap="RdBu_r") + +''' +#Univariate statistics on each variable +for n in range(0,len(frames)): + + sns.distplot(frames[n], kde=False) + +print(mental_disorders.head()) + +print(sns.distplot(mental_disorders[3], kde=False)) +''' + + +''' +▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ Do multilinear regression ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ +''' + +#Computing Multilinear regression with all X's +X_all=df_analysis[["Avg Height (cm)","Crude divorce rate (per 1,000 inhabitants)","Forest area (% of land area)","Gini coeff","World Happiness Report","Deaths - Interpersonal violence - Sex: Both - Age: All Ages (Rate) (homicides per 100,000)","Human Rights Protection Scores","Military expenditure (% of GDP)","Schizophrenia (%)","Bipolar disorder (%)","Eating disorders (%)","Anxiety disorders (%)","Drug use disorders (%)","Depression (%)","Alcohol use disorders (%)","Share of Top 1% in Pre-tax national income (%)","Share with Mental and Substance disorders","Suicide rate","Taxes goods and services (% GDP)","Corruption Perception Index","Freedom score"]] +X_all=X_all.fillna(0) +print(X_all.dtypes) +Y=df_analysis['HDI_x'].fillna(0) +print(Y) +OLSmodel_all_vars=sm.OLS(Y,X_all).fit() +print(OLSmodel_all_vars.summary()) + +#Computing Multilinear regression with selected X's +''' +''' +''' +X_selected=df_analysis[['Year','Cylinders','Fuel Barrels/Year','Combined MPG','Fuel Cost/Year']] +OLSmodel_all_vars=sm.OLS(Y,X).fit() +print(OLSmodel_selected_vars.summary()) +''' diff --git a/your-project/tableau/ironhack_w6_HDI tableau.twb b/your-project/tableau/ironhack_w6_HDI tableau.twb new file mode 100644 index 0000000..18d576c --- /dev/null +++ b/your-project/tableau/ironhack_w6_HDI tableau.twb @@ -0,0 +1,2927 @@ + + + + + + + + + <_.fcp.MarkAnimation.true...MarkAnimation /> + <_.fcp.ObjectModelEncapsulateLegacy.true...ObjectModelEncapsulateLegacy /> + <_.fcp.ObjectModelTableType.true...ObjectModelTableType /> + <_.fcp.SchemaViewerObjectModel.true...SchemaViewerObjectModel /> + <_.fcp.SetMembershipControl.true...SetMembershipControl /> + + + + + + + + + + + + + + + + <_.fcp.ObjectModelEncapsulateLegacy.false...relation connection='excel-direct.16xyrs90x2f9ks1f38moi1meke6s' name='Sheet1' table='[Sheet1$]' type='table'> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + <_.fcp.ObjectModelEncapsulateLegacy.true...relation connection='excel-direct.16xyrs90x2f9ks1f38moi1meke6s' name='Sheet1' table='[Sheet1$]' type='table'> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + [Sheet1] + + Count + true + + 2 + "A1:AT10001:no:A1:AT2147483647:0" + true + 6 + + + + F1 + 20 + [F1] + [Sheet1] + F1 + 0 + integer + Sum + true + + "I8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Entity + 130 + [Entity] + [Sheet1] + Entity + 1 + string + Count + true + + + "WSTR" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Year + 20 + [Year] + [Sheet1] + Year + 2 + integer + Sum + true + + "I8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + HDI_x + 5 + [HDI_x] + [Sheet1] + HDI_x + 3 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Code_x + 130 + [Code_x] + [Sheet1] + Code_x + 4 + string + Count + true + + + "WSTR" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + (Annual hours worked per worker) + 5 + [(Annual hours worked per worker)] + [Sheet1] + (Annual hours worked per worker) + 5 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Code_y + 130 + [Code_y] + [Sheet1] + Code_y + 6 + string + Count + true + + + "WSTR" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Avg Height (cm) + 5 + [Avg Height (cm)] + [Sheet1] + Avg Height (cm) + 7 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Code_x 1 + 130 + [Code_x 1] + [Sheet1] + Code_x 1 + 8 + string + Count + true + + + "WSTR" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Crude divorce rate (per 1,000 inhabitants) + 5 + [Crude divorce rate (per 1,000 inhabitants)] + [Sheet1] + Crude divorce rate (per 1,000 inhabitants) + 9 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Code_y 1 + 130 + [Code_y 1] + [Sheet1] + Code_y 1 + 10 + string + Count + true + + + "WSTR" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Forest area (% of land area) + 5 + [Forest area (% of land area)] + [Sheet1] + Forest area (% of land area) + 11 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Code_x 2 + 130 + [Code_x 2] + [Sheet1] + Code_x 2 + 12 + string + Count + true + + + "WSTR" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Gini coeff + 5 + [Gini coeff] + [Sheet1] + Gini coeff + 13 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Code_y 2 + 130 + [Code_y 2] + [Sheet1] + Code_y 2 + 14 + string + Count + true + + + "WSTR" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + World Happiness Report + 5 + [World Happiness Report] + [Sheet1] + World Happiness Report + 15 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Code_x 3 + 130 + [Code_x 3] + [Sheet1] + Code_x 3 + 16 + string + Count + true + + + "WSTR" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Deaths - Interpersonal violence - Sex: Both - Age: All Ages (Rate) (homicides per 100,000) + 5 + [Deaths - Interpersonal violence - Sex: Both - Age: All Ages (Rate) (homicides per 100,000)] + [Sheet1] + Deaths - Interpersonal violence - Sex: Both - Age: All Ages (Rate) (homicides per 100,000) + 17 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Code_y 3 + 130 + [Code_y 3] + [Sheet1] + Code_y 3 + 18 + string + Count + true + + + "WSTR" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + HDI_y + 5 + [HDI_y] + [Sheet1] + HDI_y + 19 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Code_x 4 + 130 + [Code_x 4] + [Sheet1] + Code_x 4 + 20 + string + Count + true + + + "WSTR" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Human Rights Protection Scores + 5 + [Human Rights Protection Scores] + [Sheet1] + Human Rights Protection Scores + 21 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Code_y 4 + 130 + [Code_y 4] + [Sheet1] + Code_y 4 + 22 + string + Count + true + + + "WSTR" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Military expenditure (% of GDP) + 5 + [Military expenditure (% of GDP)] + [Sheet1] + Military expenditure (% of GDP) + 23 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Code_x 5 + 130 + [Code_x 5] + [Sheet1] + Code_x 5 + 24 + string + Count + true + + + "WSTR" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Schizophrenia (%) + 5 + [Schizophrenia (%)] + [Sheet1] + Schizophrenia (%) + 25 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Bipolar disorder (%) + 5 + [Bipolar disorder (%)] + [Sheet1] + Bipolar disorder (%) + 26 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Eating disorders (%) + 5 + [Eating disorders (%)] + [Sheet1] + Eating disorders (%) + 27 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Anxiety disorders (%) + 5 + [Anxiety disorders (%)] + [Sheet1] + Anxiety disorders (%) + 28 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Drug use disorders (%) + 5 + [Drug use disorders (%)] + [Sheet1] + Drug use disorders (%) + 29 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Depression (%) + 5 + [Depression (%)] + [Sheet1] + Depression (%) + 30 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Alcohol use disorders (%) + 5 + [Alcohol use disorders (%)] + [Sheet1] + Alcohol use disorders (%) + 31 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Code_y 5 + 130 + [Code_y 5] + [Sheet1] + Code_y 5 + 32 + string + Count + true + + + "WSTR" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Trust in others (%) + 5 + [Trust in others (%)] + [Sheet1] + Trust in others (%) + 33 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Code_x 6 + 130 + [Code_x 6] + [Sheet1] + Code_x 6 + 34 + string + Count + true + + + "WSTR" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Share of Top 1% in Pre-tax national income (%) + 5 + [Share of Top 1% in Pre-tax national income (%)] + [Sheet1] + Share of Top 1% in Pre-tax national income (%) + 35 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Code_y 6 + 130 + [Code_y 6] + [Sheet1] + Code_y 6 + 36 + string + Count + true + + + "WSTR" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Share with Mental and Substance disorders + 5 + [Share with Mental and Substance disorders] + [Sheet1] + Share with Mental and Substance disorders + 37 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Code_x 7 + 130 + [Code_x 7] + [Sheet1] + Code_x 7 + 38 + string + Count + true + + + "WSTR" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Suicide rate + 5 + [Suicide rate] + [Sheet1] + Suicide rate + 39 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Code_y 7 + 130 + [Code_y 7] + [Sheet1] + Code_y 7 + 40 + string + Count + true + + + "WSTR" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Taxes goods and services (% GDP) + 5 + [Taxes goods and services (% GDP)] + [Sheet1] + Taxes goods and services (% GDP) + 41 + real + Sum + 15 + true + + "R8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Code_x 8 + 130 + [Code_x 8] + [Sheet1] + Code_x 8 + 42 + string + Count + true + + + "WSTR" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Corruption Perception Index + 20 + [Corruption Perception Index] + [Sheet1] + Corruption Perception Index + 43 + integer + Sum + true + + "I8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Code_y 8 + 130 + [Code_y 8] + [Sheet1] + Code_y 8 + 44 + string + Count + true + + + "WSTR" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + Freedom score + 20 + [Freedom score] + [Sheet1] + Freedom score + 45 + integer + Sum + true + + "I8" + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Sheet1_0326D132E07947B084960B3D856D9349] + + + + + + + + + + + + + + + + + + + + + + <_.fcp.ObjectModelTableType.true...column caption='Sheet1' datatype='table' name='[__tableau_internal_object_id__].[Sheet1_0326D132E07947B084960B3D856D9349]' role='measure' type='quantitative' /> + + + + + + + + + + + + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-graph> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [federated.1mk3h4c1fliub911ninnm002ebho].[none:Entity:nk] + + + + + + + + + + + + + + + + + + [federated.1mk3h4c1fliub911ninnm002ebho].[Latitude (generated)] + [federated.1mk3h4c1fliub911ninnm002ebho].[Longitude (generated)] +
+ +
+ + + + + + + + + + + + + + + + + + + + [federated.1mk3h4c1fliub911ninnm002ebho].[none:Entity:nk] + + + + + + + + + + + + + + + + + + [federated.1mk3h4c1fliub911ninnm002ebho].[Latitude (generated)] + [federated.1mk3h4c1fliub911ninnm002ebho].[Longitude (generated)] +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [federated.1mk3h4c1fliub911ninnm002ebho].[avg:Avg Height (cm):qk] + +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ([federated.1mk3h4c1fliub911ninnm002ebho].[cnt:Depression (%):qk] + [federated.1mk3h4c1fliub911ninnm002ebho].[cnt:Taxes goods and services (% GDP):qk]) + [federated.1mk3h4c1fliub911ninnm002ebho].[none:Depression (%) (bin):qk] +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [federated.1mk3h4c1fliub911ninnm002ebho].[cnt:Avg Height (cm):qk] + [federated.1mk3h4c1fliub911ninnm002ebho].[none:Avg Height (cm) (bin):qk] + + [federated.1mk3h4c1fliub911ninnm002ebho].[Avg Height (cm) (bin)] + +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [federated.1mk3h4c1fliub911ninnm002ebho].[sum:Corruption Perception Index:qk] + [federated.1mk3h4c1fliub911ninnm002ebho].[none:Depression (%) (bin):qk] + + [federated.1mk3h4c1fliub911ninnm002ebho].[none:Depression (%) (bin):qk] + +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [federated.1mk3h4c1fliub911ninnm002ebho].[ctd:Schizophrenia (%):qk] + [federated.1mk3h4c1fliub911ninnm002ebho].[none:Schizophrenia (%) (bin):qk] + + [federated.1mk3h4c1fliub911ninnm002ebho].[Schizophrenia (%) (bin)] + +
+ +
+
+ + +