diff --git a/src/compare_csv.py b/src/compare_csv.py index 333c521..183cae0 100644 --- a/src/compare_csv.py +++ b/src/compare_csv.py @@ -1,3 +1,4 @@ +# pylint: disable=too-many-lines """ Given two CSV files, compare the two files and return the differences between the two files. @@ -257,6 +258,41 @@ def collection_parent_compare(list1, list2): return list1_collection_ids == list2 +# +def filename_with_uuid_compare(era_filename, scholaris_filename): + """ + Compare filenames, allowing for Scholaris filenames to have a UUID appended. + Example: + ERA: a.pdf + Scholaris: a_uuid.pdf + """ + + logging.debug("%s ---- %s", era_filename, scholaris_filename) + print("%s ---- %s", era_filename, scholaris_filename) + if not era_filename or not scholaris_filename: + return False + + # Remove any whitespace and compare the base filename + era_filename = era_filename.strip() + scholaris_filename = scholaris_filename.strip() + + if scholaris_filename == era_filename: + return True + + era_filename_without_type = era_filename.split(".")[0] + # Check if the Scholaris filename starts with the ERA filename (before the UUID) + if scholaris_filename.startswith(era_filename_without_type): + # Ensure the Scholaris filename has a valid UUID appended after the base filename + # Extract the suffix + # slices with "black" styling in PEP 8 and + # flake lint uses PEP 8 + PEP 257 thus tool flags spacing differently + suffix = scholaris_filename[len(era_filename_without_type) :] # noqa: E203 + if suffix.startswith("_") and len(suffix.split(".")[0]) > 1: + return True + + return False + + # def special_language_compare(row, key, value): """ @@ -618,7 +654,7 @@ def special_type_compare(row, key, value): "comparison_types": { "name": { "columns": {"jupiter": "filename", "dspace": "bitstream.name"}, - "comparison_function": string_compare_ignore_whitespace, + "comparison_function": filename_with_uuid_compare, }, "checksum": { "columns": { diff --git a/src/tests/test_integration.py b/src/tests/test_integration.py index a703b0a..f310552 100644 --- a/src/tests/test_integration.py +++ b/src/tests/test_integration.py @@ -147,6 +147,18 @@ def test_collection_parent_compare(): assert compare.collection_parent_compare("[]", float("NaN")) is False +def test_filename_with_uuid_compare(): + """ + Test the filename with uuid + """ + assert compare.filename_with_uuid_compare("a.pdf", "a.pdf") is True + assert compare.filename_with_uuid_compare("a.pdf", "b.pdf") is False + assert compare.filename_with_uuid_compare("a.pdf", "a_12345.pdf") is True + assert compare.filename_with_uuid_compare("a.pdf", "a.pdf_extra") is False + assert compare.filename_with_uuid_compare("a.pdf", None) is False + assert compare.filename_with_uuid_compare(None, "a_uuid.pdf") is False + + def test_language_compare(): """ Language tests