@@ -13,9 +13,10 @@ def create_upload_dict(df:pandas.core.frame.DataFrame, lb_client:Client, base_cl
1313 row_data_col : Required (str) - Column containing asset URL or file path
1414 global_key_col : Optional (str) - Column name containing the data row global key - defaults to row data
1515 external_id_col : Optional (str) - Column name containing the data row external ID - defaults to global key
16- metadata_index : Required (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type = "enum", "string", "datetime" or "number"
16+ metadata_index : Optional (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type = "enum", "string", "datetime" or "number"
17+ local_files : Optional (bool) - If True, will create urls for local files; if False, uploads `row_data_col` as urls
1718 divider : Optional (str) - String delimiter for all name keys generated
18- verbose : Required (bool) - If True, prints information about code execution
19+ verbose : Optional (bool) - If True, prints information about code execution
1920 Returns:
2021 Two items - the global_key, and a dictionary with "row_data", "global_key", "external_id" and "metadata_fields" keys
2122 """
@@ -32,7 +33,7 @@ def create_upload_dict(df:pandas.core.frame.DataFrame, lb_client:Client, base_cl
3233 futures .append (
3334 exc .submit (
3435 create_data_rows , lb_client , base_client , row ,
35- metadata_name_key_to_schema , metadata_schema_to_name_key ,
36+ metadata_name_key_to_schema , metadata_schema_to_name_key , local_files ,
3637 row_data_col , global_key_col , external_id_col , metadata_index , divider
3738 )
3839 )
@@ -43,24 +44,26 @@ def create_upload_dict(df:pandas.core.frame.DataFrame, lb_client:Client, base_cl
4344 print (f'Generated upload list - { len (global_key_to_upload_dict )} data rows to upload' )
4445 return global_key_to_upload_dict
4546
46- def create_data_rows (lb_client :Client , base_client :baseClient , row :pandas .core .series .Series ,
47- metadata_name_key_to_schema :dict , metadata_schema_to_name_key :dict ,
48- row_data_col : str , global_key_col :str = "" , external_id_col :str = "" , metadata_index :dict = {}, divider :str = "///" ):
47+ def create_data_rows (lb_client :Client , base_client :baseClient , row :pandas .core .series .Series ,
48+ metadata_name_key_to_schema :dict , metadata_schema_to_name_key :dict , row_data_col : str ,
49+ global_key_col :str = "" , external_id_col :str = "" , metadata_index :dict = {}, local_files = False , divider :str = "///" ):
4950 """ Function to-be-multithreaded to create data row dictionaries from a Pandas DataFrame
5051 Args:
5152 lb_client : Required (labelbox.client.Client) - Labelbox Client object
5253 base_client : Required (labelbase.client.Client) - Labelbase Client object
53- row_data_col : Required (str) - Column containing asset URL or file path
54+ row : Required (pandas.core.series.Series) - Pandas Series object, corresponds to one row in a df.iterrow()
55+ metadata_name_key_to_schema : Required (dict) - Dictionary where {key=metadata_field_name_key : value=metadata_schema_id}
56+ metadata_schema_to_name_key : Required (dict) - Inverse of metadata_name_key_to_schema
57+ row_data_col : Required (str) - Column containing asset URL or file path
5458 global_key_col : Optional (str) - Column name containing the data row global key - defaults to row data
5559 external_id_col : Optional (str) - Column name containing the data row external ID - defaults to global key
56- metadata_index : Required (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type = "enum", "string", "datetime" or "number"
57- metadata_name_key_to_schema : Required (dict) - Dictionary where {key=metadata_field_name_key : value=metadata_schema_id}
58- metadata_schema_to_name_key : Required (dict) - Inverse of metadata_name_key_to_schema
60+ metadata_index : Optional (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type = "enum", "string", "datetime" or "number"
61+ local_files : Optional (bool) - If True, will create urls for local files; if False, uploads `row_data_col` as urls
5962 divider : Optional (str) - String delimiter for all name keys generated
6063 Returns:
6164 Two items - the global_key, and a dictionary with "row_data", "global_key", "external_id" and "metadata_fields" keys
6265 """
63- row_data = str (row [row_data_col ])
66+ row_data = str (row [row_data_col ]) if not local_files else base_client . connector . upload_local_file ( lb_client , str ( row [ row_data_col ]))
6467 metadata_fields = [{"schema_id" : metadata_name_key_to_schema ['lb_integration_source' ], "value" : "Pandas" }]
6568 if metadata_index :
6669 for metadata_field_name in metadata_index .keys ():
0 commit comments