Skip to content

Commit 17b05e6

Browse files
refactor: rename variables
1 parent fc1f386 commit 17b05e6

File tree

2 files changed

+20
-20
lines changed

2 files changed

+20
-20
lines changed

oc4ids_datastore_pipeline/pipeline.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,14 @@ def download_json(url: str) -> Any:
3636
raise Exception("Download failed", e)
3737

3838

39-
def validate_json(dataset_name: str, json_data: dict[str, Any]) -> None:
40-
logger.info(f"Validating dataset {dataset_name}")
39+
def validate_json(dataset_id: str, json_data: dict[str, Any]) -> None:
40+
logger.info(f"Validating dataset {dataset_id}")
4141
try:
4242
validation_result = oc4ids_json_output(json_data=json_data)
4343
validation_errors_count = validation_result["validation_errors_count"]
4444
if validation_errors_count > 0:
4545
raise Exception(f"Dataset has {validation_errors_count} validation errors")
46-
logger.info(f"Dataset {dataset_name} is valid")
46+
logger.info(f"Dataset {dataset_id} is valid")
4747
except Exception as e:
4848
raise Exception("Validation failed", e)
4949

@@ -81,19 +81,19 @@ def transform_to_csv_and_xlsx(json_path: str) -> tuple[Optional[str], Optional[s
8181

8282

8383
def save_dataset_metadata(
84-
dataset_name: str,
84+
dataset_id: str,
8585
source_url: str,
8686
json_data: dict[str, Any],
8787
json_url: Optional[str],
8888
csv_url: Optional[str],
8989
xlsx_url: Optional[str],
9090
) -> None:
91-
logger.info(f"Saving metadata for dataset {dataset_name}")
91+
logger.info(f"Saving metadata for dataset {dataset_id}")
9292
publisher_name = json_data.get("publisher", {}).get("name", "")
9393
license_url = json_data.get("license", None)
9494
license_name = get_license_name_from_url(license_url) if license_url else None
9595
dataset = Dataset(
96-
dataset_id=dataset_name,
96+
dataset_id=dataset_id,
9797
source_url=source_url,
9898
publisher_name=publisher_name,
9999
license_url=license_url,
@@ -106,29 +106,29 @@ def save_dataset_metadata(
106106
save_dataset(dataset)
107107

108108

109-
def process_dataset(dataset_name: str, dataset_url: str) -> None:
110-
logger.info(f"Processing dataset {dataset_name}")
109+
def process_dataset(dataset_id: str, source_url: str) -> None:
110+
logger.info(f"Processing dataset {dataset_id}")
111111
try:
112-
json_data = download_json(dataset_url)
113-
validate_json(dataset_name, json_data)
112+
json_data = download_json(source_url)
113+
validate_json(dataset_id, json_data)
114114
json_path = write_json_to_file(
115-
f"data/{dataset_name}/{dataset_name}.json", json_data
115+
f"data/{dataset_id}/{dataset_id}.json", json_data
116116
)
117117
csv_path, xlsx_path = transform_to_csv_and_xlsx(json_path)
118118
json_public_url, csv_public_url, xlsx_public_url = upload_files(
119-
dataset_name, json_path=json_path, csv_path=csv_path, xlsx_path=xlsx_path
119+
dataset_id, json_path=json_path, csv_path=csv_path, xlsx_path=xlsx_path
120120
)
121121
save_dataset_metadata(
122-
dataset_name=dataset_name,
123-
source_url=dataset_url,
122+
dataset_id=dataset_id,
123+
source_url=source_url,
124124
json_data=json_data,
125125
json_url=json_public_url,
126126
csv_url=csv_public_url,
127127
xlsx_url=xlsx_public_url,
128128
)
129-
logger.info(f"Processed dataset {dataset_name}")
129+
logger.info(f"Processed dataset {dataset_id}")
130130
except Exception as e:
131-
logger.warning(f"Failed to process dataset {dataset_name} with error {e}")
131+
logger.warning(f"Failed to process dataset {dataset_id} with error {e}")
132132

133133

134134
def process_deleted_datasets(registered_datasets: dict[str, str]) -> None:
@@ -143,8 +143,8 @@ def process_deleted_datasets(registered_datasets: dict[str, str]) -> None:
143143
def process_registry() -> None:
144144
registered_datasets = fetch_registered_datasets()
145145
process_deleted_datasets(registered_datasets)
146-
for name, url in registered_datasets.items():
147-
process_dataset(name, url)
146+
for dataset_id, url in registered_datasets.items():
147+
process_dataset(dataset_id, url)
148148
logger.info("Finished processing all datasets")
149149

150150

tests/test_pipeline.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def test_validate_json_raises_failure_exception(mocker: MockerFixture) -> None:
3333
patch_oc4ids_json_output.side_effect = Exception("Mocked exception")
3434

3535
with pytest.raises(Exception) as exc_info:
36-
validate_json(dataset_name="test_dataset", json_data={})
36+
validate_json(dataset_id="test_dataset", json_data={})
3737

3838
assert "Validation failed" in str(exc_info.value)
3939
assert "Mocked exception" in str(exc_info.value)
@@ -48,7 +48,7 @@ def test_validate_json_raises_validation_errors_exception(
4848
patch_oc4ids_json_output.return_value = {"validation_errors_count": 2}
4949

5050
with pytest.raises(Exception) as exc_info:
51-
validate_json(dataset_name="test_dataset", json_data={})
51+
validate_json(dataset_id="test_dataset", json_data={})
5252

5353
assert "Validation failed" in str(exc_info.value)
5454
assert "Dataset has 2 validation errors" in str(exc_info.value)

0 commit comments

Comments
 (0)