22from labelbase import Client as baseClient
33import pandas
44from concurrent .futures import ThreadPoolExecutor , as_completed
5- from google .api_core import retry
65
7- def create_upload_dict (df :pandas .core .frame .DataFrame , local_files : bool , lb_client :Client , base_client :baseClient , row_data_col :str ,
6+ def create_upload_dict (df :pandas .core .frame .DataFrame , lb_client :Client , base_client :baseClient , row_data_col :str ,
87 global_key_col :str = "" , external_id_col :str = "" , metadata_index :dict = {}, divider :str = "///" , verbose = False ):
98 """ Multithreads over a Pandas DataFrame, calling create_data_rows() on each row to return an upload dictionary
109 Args:
1110 df : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
12- local_files : Required (bool) - If True, will create urls for local files; if False, uploads `row_data_col` as urls
1311 lb_client : Required (labelbox.client.Client) - Labelbox Client object
1412 base_client : Required (labelbase.client.Client) - Labelbase Client object
1513 row_data_col : Required (str) - Column containing asset URL or file path
@@ -33,36 +31,23 @@ def create_upload_dict(df:pandas.core.frame.DataFrame, local_files:bool, lb_clie
3331 for index , row in df .iterrows ():
3432 futures .append (
3533 exc .submit (
36- create_data_rows , local_files , lb_client , base_client , row ,
34+ create_data_rows , lb_client , base_client , row ,
3735 metadata_name_key_to_schema , metadata_schema_to_name_key ,
3836 row_data_col , global_key_col , external_id_col , metadata_index , divider
3937 )
4038 )
4139 for f in as_completed (futures ):
4240 res = f .result ()
43- print (res )
4441 global_key_to_upload_dict [str (res ["global_key" ])] = res
4542 if verbose :
4643 print (f'Generated upload list - { len (global_key_to_upload_dict )} data rows to upload' )
47- return global_key_to_upload_dict
48-
49- @retry .Retry (predicate = retry .if_exception_type (Exception ), deadline = 120. )
50- def create_file (lb_client , file_path :str ):
51- """ Wraps lb_client.upload_file() in retry logic
52- Args:
53- lb_client : Required (labelbox.client.Client) - Labelbox Client object
54- file_path : Required (str) - String corresponding to the row data file path
55- Returns:
56- Temporary URL to-be-uploaded to Labelbox
57- """
58- return lb_client .upload_file (file_path )
44+ return global_key_to_upload_dict
5945
60- def create_data_rows (local_files : bool , lb_client :Client , base_client :baseClient , row :pandas .core .series .Series ,
46+ def create_data_rows (lb_client :Client , base_client :baseClient , row :pandas .core .series .Series ,
6147 metadata_name_key_to_schema :dict , metadata_schema_to_name_key :dict ,
6248 row_data_col :str , global_key_col :str = "" , external_id_col :str = "" , metadata_index :dict = {}, divider :str = "///" ):
6349 """ Function to-be-multithreaded to create data row dictionaries from a Pandas DataFrame
6450 Args:
65- local_files : Required (bool) - If True, will create urls for local files; if False, uploads `row_data_col` as urls
6651 lb_client : Required (labelbox.client.Client) - Labelbox Client object
6752 base_client : Required (labelbase.client.Client) - Labelbase Client object
6853 row_data_col : Required (str) - Column containing asset URL or file path
@@ -75,7 +60,7 @@ def create_data_rows(local_files:bool, lb_client:Client, base_client:baseClient,
7560 Returns:
7661 Two items - the global_key, and a dictionary with "row_data", "global_key", "external_id" and "metadata_fields" keys
7762 """
78- row_data = create_file ( str ( row [ row_data_col ])) if local_files else str (row [row_data_col ])
63+ row_data = str (row [row_data_col ])
7964 metadata_fields = [{"schema_id" : metadata_name_key_to_schema ['lb_integration_source' ], "value" : "Pandas" }]
8065 if metadata_index :
8166 for metadata_field_name in metadata_index .keys ():
@@ -87,7 +72,7 @@ def create_data_rows(local_files:bool, lb_client:Client, base_client:baseClient,
8772 divider = divider
8873 )
8974 if metadata_value :
90- metadata_fields .append ({"schema_id" : metadata_name_key_to_schema [metadata_field_name ], "value" : value })
75+ metadata_fields .append ({"schema_id" : metadata_name_key_to_schema [metadata_field_name ], "value" : metadata_value })
9176 else :
9277 continue
9378 return {"row_data" :row_data ,"global_key" :str (row [global_key_col ]),"external_id" :str (row [external_id_col ]),"metadata_fields" :metadata_fields }
0 commit comments