Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion src/compare_csv.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# pylint: disable=too-many-lines
"""
Given two CSV files, compare the two files and return the differences between the two files.

Expand Down Expand Up @@ -257,6 +258,41 @@ def collection_parent_compare(list1, list2):
return list1_collection_ids == list2


#
def filename_with_uuid_compare(era_filename, scholaris_filename):
"""
Compare filenames, allowing for Scholaris filenames to have a UUID appended.
Example:
ERA: a.pdf
Scholaris: a_uuid.pdf
"""

logging.debug("%s ---- %s", era_filename, scholaris_filename)
print("%s ---- %s", era_filename, scholaris_filename)
if not era_filename or not scholaris_filename:
return False

# Remove any whitespace and compare the base filename
era_filename = era_filename.strip()
scholaris_filename = scholaris_filename.strip()

if scholaris_filename == era_filename:
return True

era_filename_without_type = era_filename.split(".")[0]
# Check if the Scholaris filename starts with the ERA filename (before the UUID)
if scholaris_filename.startswith(era_filename_without_type):
# Ensure the Scholaris filename has a valid UUID appended after the base filename
# Extract the suffix
# slices with "black" styling in PEP 8 and
# flake lint uses PEP 8 + PEP 257 thus tool flags spacing differently
suffix = scholaris_filename[len(era_filename_without_type) :] # noqa: E203
if suffix.startswith("_") and len(suffix.split(".")[0]) > 1:
return True

return False


#
def special_language_compare(row, key, value):
"""
Expand Down Expand Up @@ -618,7 +654,7 @@ def special_type_compare(row, key, value):
"comparison_types": {
"name": {
"columns": {"jupiter": "filename", "dspace": "bitstream.name"},
"comparison_function": string_compare_ignore_whitespace,
"comparison_function": filename_with_uuid_compare,
},
"checksum": {
"columns": {
Expand Down
12 changes: 12 additions & 0 deletions src/tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,18 @@ def test_collection_parent_compare():
assert compare.collection_parent_compare("[]", float("NaN")) is False


def test_filename_with_uuid_compare():
"""
Test the filename with uuid
"""
assert compare.filename_with_uuid_compare("a.pdf", "a.pdf") is True
assert compare.filename_with_uuid_compare("a.pdf", "b.pdf") is False
assert compare.filename_with_uuid_compare("a.pdf", "a_12345.pdf") is True
assert compare.filename_with_uuid_compare("a.pdf", "a.pdf_extra") is False
assert compare.filename_with_uuid_compare("a.pdf", None) is False
assert compare.filename_with_uuid_compare(None, "a_uuid.pdf") is False


def test_language_compare():
"""
Language tests
Expand Down