@@ -117,9 +117,16 @@ def generate_module(rxcui_ndc_df, rxclass_name):
117
117
#Read in MEPS Reference table
118
118
meps_reference = db_query (meps .utils .get_sql ('meps_reference.sql' ))
119
119
120
+ #Read in FDA Ingredient-RxCUI-Years Reference table (for years that a given ingredient was available on the market)
121
+ ingredient_rxcui_years = db_query ('SELECT * FROM medication_ingredient_rxcui_years' )
122
+
123
+ #Read in FDA Product-RxCUI-Years Reference table (for years that a given product was available on the market)
124
+ product_rxcui_years = db_query ('SELECT * FROM medication_product_rxcui_years' )
125
+
120
126
#Join MEPS to filtered rxcui_ndc dataframe (rxcui_list)
121
127
meps_rxcui = meps_reference .astype (str ).merge (rxcui_ndc_df .astype (str )[['medication_ingredient_name' , 'medication_ingredient_rxcui' ,'medication_product_name' , 'medication_product_rxcui' , 'medication_ndc' ]], how = 'inner' , left_on = 'RXNDC' , right_on = 'medication_ndc' )
122
128
129
+
123
130
#Optional: Age range join - can be customized in the mdt_config.json file
124
131
#groupby_demographic_variable: must be either an empty list [] or list of patient demographics (e.g., age, gender, state) - based on user inputs in the mdt_config.json file
125
132
@@ -162,20 +169,23 @@ def generate_module(rxcui_ndc_df, rxclass_name):
162
169
163
170
filename = rxclass_name + '_ingredient_distrib'
164
171
#1
165
- dcp_dict ['patient_count_ingredient' ] = meps_rxcui [['medication_ingredient_name' , 'medication_ingredient_rxcui' , 'person_weight' , 'DUPERSID' ]+ groupby_demographic_variables ].groupby (['medication_ingredient_name' , 'medication_ingredient_rxcui' , 'person_weight' ]+ groupby_demographic_variables )['DUPERSID' ].nunique ()
172
+ #Join MEPS to ingredient_rxcui_years dataframe (rxcuis_by_fda_marketingdates)
173
+ meps_rxcui_ingred_years = meps_rxcui .astype (str ).merge (ingredient_rxcui_years .astype (str )[['medication_ingredient_rxcui' , 'year' ]], how = 'inner' , on = 'medication_ingredient_rxcui' )
174
+ dcp_dict ['patient_count_ingredient' ] = meps_rxcui_ingred_years [['medication_ingredient_name' , 'medication_ingredient_rxcui' , 'year' , 'person_weight' , 'DUPERSID' ]+ groupby_demographic_variables ].groupby (['medication_ingredient_name' , 'medication_ingredient_rxcui' , 'year' , 'person_weight' ]+ groupby_demographic_variables )['DUPERSID' ].nunique ()
166
175
dcp_df = pd .DataFrame (dcp_dict ['patient_count_ingredient' ]).reset_index ()
167
176
#2
168
177
dcp_df ['weighted_patient_count_ingredient' ] = dcp_df ['person_weight' ].astype (float )* dcp_df ['DUPERSID' ]
169
178
#3
170
- dcp_dict ['patients_by_demographics_ingredient' ] = dcp_df .groupby (['medication_ingredient_name' ]+ groupby_demographic_variables )['weighted_patient_count_ingredient' ].sum ()
179
+ dcp_dict ['patients_by_demographics_ingredient' ] = dcp_df .groupby (['medication_ingredient_name' , 'year' ]+ groupby_demographic_variables )['weighted_patient_count_ingredient' ].sum ()
171
180
dcp_demographic_df = pd .DataFrame (dcp_dict ['patients_by_demographics_ingredient' ]).reset_index ()
172
181
#4
173
182
if len (groupby_demographic_variables ) > 0 :
174
- dcp_demographictotal_df = pd .merge (dcp_demographic_df , dcp_demographic_df .groupby (groupby_demographic_variables )['weighted_patient_count_ingredient' ].sum (), how = 'inner' , left_on = groupby_demographic_variables , right_index = True , suffixes = ('_demographic' , '_total' ))
183
+ dcp_demographictotal_df = pd .merge (dcp_demographic_df , dcp_demographic_df .groupby (groupby_demographic_variables + [ 'year' ] )['weighted_patient_count_ingredient' ].sum (), how = 'inner' , left_on = groupby_demographic_variables + [ 'year' ] , right_index = True , suffixes = ('_demographic' , '_total' ))
175
184
else :
176
- dcp_demographictotal_df = dcp_demographic_df
177
- dcp_demographictotal_df ['weighted_patient_count_ingredient_demographic' ] = dcp_demographic_df ['weighted_patient_count_ingredient' ]
178
- dcp_demographictotal_df ['weighted_patient_count_ingredient_total' ] = dcp_demographic_df ['weighted_patient_count_ingredient' ].sum ()
185
+ # dcp_demographictotal_df = dcp_demographic_df
186
+ # dcp_demographictotal_df['weighted_patient_count_ingredient_demographic'] = dcp_demographic_df['weighted_patient_count_ingredient']
187
+ # dcp_demographictotal_df['weighted_patient_count_ingredient_total'] = dcp_demographic_df['weighted_patient_count_ingredient'].sum()
188
+ dcp_demographictotal_df = pd .merge (dcp_demographic_df , dcp_demographic_df .groupby ('year' )['weighted_patient_count_ingredient' ].sum (), how = 'inner' , left_on = 'year' , right_index = True , suffixes = ('_demographic' , '_total' ))
179
189
#5
180
190
dcp_demographictotal_df ['percent_ingredient_patients' ] = round (dcp_demographictotal_df ['weighted_patient_count_ingredient_demographic' ]/ dcp_demographictotal_df ['weighted_patient_count_ingredient_total' ], 3 )
181
191
#6 TODO: change this column to medication_product_state_name(?)
@@ -199,9 +209,9 @@ def generate_module(rxcui_ndc_df, rxclass_name):
199
209
#7
200
210
dcp_dict ['percent_ingredient_patients' ] = dcp_demographictotal_df
201
211
if len (groupby_demographic_variables ) > 0 :
202
- dcp_dict ['percent_ingredient_patients' ] = dcp_dict ['percent_ingredient_patients' ].reset_index ().pivot (index = groupby_demographic_variables , columns = 'medication_ingredient_name' , values = 'percent_ingredient_patients' ).reset_index ()
212
+ dcp_dict ['percent_ingredient_patients' ] = dcp_dict ['percent_ingredient_patients' ].reset_index ().pivot (index = groupby_demographic_variables + [ 'year' ] , columns = 'medication_ingredient_name' , values = 'percent_ingredient_patients' ).reset_index ()
203
213
else :
204
- dcp_dict ['percent_ingredient_patients' ] = dcp_dict ['percent_ingredient_patients' ][['medication_ingredient_name' , 'percent_ingredient_patients' ]].set_index ('medication_ingredient_name' ).T
214
+ dcp_dict ['percent_ingredient_patients' ] = dcp_dict ['percent_ingredient_patients' ][['medication_ingredient_name' , 'percent_ingredient_patients' , 'year' ]].set_index ('medication_ingredient_name' ).T
205
215
206
216
#Fill NULLs and save as CSV
207
217
dcp_dict ['percent_ingredient_patients' ].fillna (0 , inplace = True )
@@ -216,17 +226,19 @@ def generate_module(rxcui_ndc_df, rxclass_name):
216
226
for ingred_name in medication_ingredient_list :
217
227
filename = rxclass_name + '_product_' + ingred_name + '_distrib'
218
228
#0
219
- meps_rxcui_ingred = meps_rxcui [meps_rxcui ['medication_ingredient_name' ]== ingred_name ][['medication_product_name' , 'medication_product_rxcui' , 'medication_ingredient_name' , 'medication_ingredient_rxcui' , 'person_weight' , 'DUPERSID' ]+ groupby_demographic_variables ]
229
+ #Join MEPS to product_rxcui_years dataframe (rxcuis_by_fda_marketingdates)
230
+ meps_rxcui_prod_years = meps_rxcui .astype (str ).merge (product_rxcui_years .astype (str )[['medication_product_rxcui' , 'year' ]], how = 'inner' , on = 'medication_product_rxcui' )
231
+ meps_rxcui_ingred = meps_rxcui_prod_years [meps_rxcui_prod_years ['medication_ingredient_name' ]== ingred_name ][['medication_product_name' , 'medication_product_rxcui' , 'medication_ingredient_name' , 'medication_ingredient_rxcui' , 'year' , 'person_weight' , 'DUPERSID' ]+ groupby_demographic_variables ]
220
232
#1
221
- dcp_dict ['patient_count_product' ] = meps_rxcui_ingred .groupby (['medication_product_name' , 'medication_product_rxcui' , 'medication_ingredient_name' , 'medication_ingredient_rxcui' , 'person_weight' ]+ groupby_demographic_variables )['DUPERSID' ].nunique ()
233
+ dcp_dict ['patient_count_product' ] = meps_rxcui_ingred .groupby (['medication_product_name' , 'medication_product_rxcui' , 'medication_ingredient_name' , 'medication_ingredient_rxcui' , 'year' , ' person_weight' ]+ groupby_demographic_variables )['DUPERSID' ].nunique ()
222
234
dcp_df = pd .DataFrame (dcp_dict ['patient_count_product' ]).reset_index ()
223
235
#2
224
236
dcp_df ['weighted_patient_count_product' ] = dcp_df ['person_weight' ].astype (float )* dcp_df ['DUPERSID' ]
225
237
#3
226
- dcp_dict ['patients_by_demographics_product' ] = dcp_df .groupby (['medication_product_name' , 'medication_ingredient_name' ]+ groupby_demographic_variables )['weighted_patient_count_product' ].sum ()
238
+ dcp_dict ['patients_by_demographics_product' ] = dcp_df .groupby (['medication_product_name' , 'medication_ingredient_name' , 'year' ]+ groupby_demographic_variables )['weighted_patient_count_product' ].sum ()
227
239
dcp_demographic_df = pd .DataFrame (dcp_dict ['patients_by_demographics_product' ]).reset_index ()
228
240
#4
229
- dcp_demographictotal_df = pd .merge (dcp_demographic_df , dcp_demographic_df .groupby (['medication_ingredient_name' ]+ groupby_demographic_variables )['weighted_patient_count_product' ].sum (), how = 'inner' , left_on = ['medication_ingredient_name' ]+ groupby_demographic_variables , right_index = True , suffixes = ('_demographic' , '_total' ))
241
+ dcp_demographictotal_df = pd .merge (dcp_demographic_df , dcp_demographic_df .groupby (['medication_ingredient_name' , 'year' ]+ groupby_demographic_variables )['weighted_patient_count_product' ].sum (), how = 'inner' , left_on = ['medication_ingredient_name' , 'year ' ]+ groupby_demographic_variables , right_index = True , suffixes = ('_demographic' , '_total' ))
230
242
#5
231
243
dcp_demographictotal_df ['percent_product_patients' ] = round (dcp_demographictotal_df ['weighted_patient_count_product_demographic' ]/ dcp_demographictotal_df ['weighted_patient_count_product_total' ], 3 )
232
244
#6 TODO: change this column to medication_product_state_name or medication_product_transition_name(?)
@@ -250,9 +262,9 @@ def generate_module(rxcui_ndc_df, rxclass_name):
250
262
#7
251
263
dcp_dict ['percent_product_patients' ] = dcp_demographictotal_df
252
264
if len (groupby_demographic_variables ) > 0 :
253
- dcp_dict ['percent_product_patients' ] = dcp_dict ['percent_product_patients' ].reset_index ().pivot (index = groupby_demographic_variables , columns = 'medication_product_name' , values = 'percent_product_patients' ).reset_index ()
265
+ dcp_dict ['percent_product_patients' ] = dcp_dict ['percent_product_patients' ].reset_index ().pivot (index = groupby_demographic_variables + [ 'year' ] , columns = 'medication_product_name' , values = 'percent_product_patients' ).reset_index ()
254
266
else :
255
- dcp_dict ['percent_product_patients' ] = dcp_dict ['percent_product_patients' ][['medication_product_name' , 'percent_product_patients' ]].set_index ('medication_product_name' ).T
267
+ dcp_dict ['percent_product_patients' ] = dcp_dict ['percent_product_patients' ][['medication_product_name' , 'percent_product_patients' , 'year' ]].set_index ('medication_product_name' ).T
256
268
257
269
#Fill NULLs and save as CSV
258
270
dcp_dict ['percent_product_patients' ].fillna (0 , inplace = True )
0 commit comments