@@ -36,14 +36,14 @@ def download_json(url: str) -> Any:
36
36
raise Exception ("Download failed" , e )
37
37
38
38
39
- def validate_json (dataset_name : str , json_data : dict [str , Any ]) -> None :
40
- logger .info (f"Validating dataset { dataset_name } " )
39
+ def validate_json (dataset_id : str , json_data : dict [str , Any ]) -> None :
40
+ logger .info (f"Validating dataset { dataset_id } " )
41
41
try :
42
42
validation_result = oc4ids_json_output (json_data = json_data )
43
43
validation_errors_count = validation_result ["validation_errors_count" ]
44
44
if validation_errors_count > 0 :
45
45
raise Exception (f"Dataset has { validation_errors_count } validation errors" )
46
- logger .info (f"Dataset { dataset_name } is valid" )
46
+ logger .info (f"Dataset { dataset_id } is valid" )
47
47
except Exception as e :
48
48
raise Exception ("Validation failed" , e )
49
49
@@ -81,19 +81,19 @@ def transform_to_csv_and_xlsx(json_path: str) -> tuple[Optional[str], Optional[s
81
81
82
82
83
83
def save_dataset_metadata (
84
- dataset_name : str ,
84
+ dataset_id : str ,
85
85
source_url : str ,
86
86
json_data : dict [str , Any ],
87
87
json_url : Optional [str ],
88
88
csv_url : Optional [str ],
89
89
xlsx_url : Optional [str ],
90
90
) -> None :
91
- logger .info (f"Saving metadata for dataset { dataset_name } " )
91
+ logger .info (f"Saving metadata for dataset { dataset_id } " )
92
92
publisher_name = json_data .get ("publisher" , {}).get ("name" , "" )
93
93
license_url = json_data .get ("license" , None )
94
94
license_name = get_license_name_from_url (license_url ) if license_url else None
95
95
dataset = Dataset (
96
- dataset_id = dataset_name ,
96
+ dataset_id = dataset_id ,
97
97
source_url = source_url ,
98
98
publisher_name = publisher_name ,
99
99
license_url = license_url ,
@@ -106,29 +106,29 @@ def save_dataset_metadata(
106
106
save_dataset (dataset )
107
107
108
108
109
- def process_dataset (dataset_name : str , dataset_url : str ) -> None :
110
- logger .info (f"Processing dataset { dataset_name } " )
109
+ def process_dataset (dataset_id : str , source_url : str ) -> None :
110
+ logger .info (f"Processing dataset { dataset_id } " )
111
111
try :
112
- json_data = download_json (dataset_url )
113
- validate_json (dataset_name , json_data )
112
+ json_data = download_json (source_url )
113
+ validate_json (dataset_id , json_data )
114
114
json_path = write_json_to_file (
115
- f"data/{ dataset_name } /{ dataset_name } .json" , json_data
115
+ f"data/{ dataset_id } /{ dataset_id } .json" , json_data
116
116
)
117
117
csv_path , xlsx_path = transform_to_csv_and_xlsx (json_path )
118
118
json_public_url , csv_public_url , xlsx_public_url = upload_files (
119
- dataset_name , json_path = json_path , csv_path = csv_path , xlsx_path = xlsx_path
119
+ dataset_id , json_path = json_path , csv_path = csv_path , xlsx_path = xlsx_path
120
120
)
121
121
save_dataset_metadata (
122
- dataset_name = dataset_name ,
123
- source_url = dataset_url ,
122
+ dataset_id = dataset_id ,
123
+ source_url = source_url ,
124
124
json_data = json_data ,
125
125
json_url = json_public_url ,
126
126
csv_url = csv_public_url ,
127
127
xlsx_url = xlsx_public_url ,
128
128
)
129
- logger .info (f"Processed dataset { dataset_name } " )
129
+ logger .info (f"Processed dataset { dataset_id } " )
130
130
except Exception as e :
131
- logger .warning (f"Failed to process dataset { dataset_name } with error { e } " )
131
+ logger .warning (f"Failed to process dataset { dataset_id } with error { e } " )
132
132
133
133
134
134
def process_deleted_datasets (registered_datasets : dict [str , str ]) -> None :
@@ -143,8 +143,8 @@ def process_deleted_datasets(registered_datasets: dict[str, str]) -> None:
143
143
def process_registry () -> None :
144
144
registered_datasets = fetch_registered_datasets ()
145
145
process_deleted_datasets (registered_datasets )
146
- for name , url in registered_datasets .items ():
147
- process_dataset (name , url )
146
+ for dataset_id , url in registered_datasets .items ():
147
+ process_dataset (dataset_id , url )
148
148
logger .info ("Finished processing all datasets" )
149
149
150
150
0 commit comments