From 2624ee244d6af12cf9d800ae9c5c597945a2a559 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Thu, 16 Apr 2026 14:22:15 -0400 Subject: [PATCH 1/2] Fix SSSOM structural mapping column names for sssom-py compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename subject_type → d4d_subject_range and object_type → rocrate_value_type to avoid colliding with reserved SSSOM columns that expect EntityTypeEnum values. Add fallback in SSSOMIntegration when sssom-py silently drops rows. Co-Authored-By: Claude Opus 4.6 --- data/mappings/d4d_rocrate_structural_mapping.sssom.tsv | 2 +- src/alignment/generate_structural_mapping.py | 4 ++-- src/fairscape_integration/utils/sssom_integration.py | 10 +++++++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/data/mappings/d4d_rocrate_structural_mapping.sssom.tsv b/data/mappings/d4d_rocrate_structural_mapping.sssom.tsv index 68725263..e72eea4f 100644 --- a/data/mappings/d4d_rocrate_structural_mapping.sssom.tsv +++ b/data/mappings/d4d_rocrate_structural_mapping.sssom.tsv @@ -1,4 +1,4 @@ -subject_id subject_label subject_category predicate_id object_id object_label mapping_justification confidence subject_source object_source subject_type subject_multivalued object_type type_compatible composition_path structural_notes warnings +subject_id subject_label subject_category predicate_id object_id object_label mapping_justification confidence subject_source object_source d4d_subject_range subject_multivalued rocrate_value_type type_compatible composition_path structural_notes warnings d4d:Dataset/addressing_gaps addressing_gaps Dataset skos:closeMatch d4d:addressingGaps d4d:addressingGaps semapv:SemanticSimilarity 0.6 d4d:data_sheets_schema rocrate:fairscape AddressingGap True str False slot_uri mapping: d4d:addressingGaps Cardinality mismatch: multivalued slot mapping to single value d4d:Dataset/informed_consent informed_consent Dataset skos:closeMatch d4d:informedConsent d4d:informedConsent semapv:SemanticSimilarity 0.6 d4d:data_sheets_schema rocrate:fairscape InformedConsent True str False slot_uri mapping: d4d:informedConsent Cardinality mismatch: multivalued slot mapping to single value d4d:Dataset/at_risk_populations at_risk_populations Dataset skos:exactMatch d4d:atRiskPopulations d4d:atRiskPopulations semapv:SemanticSimilarity 0.9 d4d:data_sheets_schema rocrate:fairscape AtRiskPopulations False str True slot_uri mapping: d4d:atRiskPopulations diff --git a/src/alignment/generate_structural_mapping.py b/src/alignment/generate_structural_mapping.py index b5fa5428..2d75ebe4 100755 --- a/src/alignment/generate_structural_mapping.py +++ b/src/alignment/generate_structural_mapping.py @@ -129,9 +129,9 @@ def to_sssom_row(self) -> Dict[str, str]: "confidence": str(self.confidence), "subject_source": "d4d:data_sheets_schema", "object_source": "rocrate:fairscape", - "subject_type": self.d4d_range or "string", + "d4d_subject_range": self.d4d_range or "string", "subject_multivalued": str(self.d4d_multivalued), - "object_type": self.rocrate_type, + "rocrate_value_type": self.rocrate_type, "type_compatible": str(self.type_compatible), "composition_path": self.composition_path or "", "structural_notes": self.structural_notes, diff --git a/src/fairscape_integration/utils/sssom_integration.py b/src/fairscape_integration/utils/sssom_integration.py index 6aba08f0..89ff0c2f 100644 --- a/src/fairscape_integration/utils/sssom_integration.py +++ b/src/fairscape_integration/utils/sssom_integration.py @@ -67,7 +67,15 @@ def _load_with_sssom_py(self): # Parse SSSOM file self.msdf = parse_sssom_table(str(self.sssom_path)) - if self.verbose: + # sssom-py may silently drop rows with non-standard values + # (e.g. unknown EntityTypeEnum codes), returning an empty DataFrame + # instead of raising an exception. Fall back to custom reader. + if len(self.msdf.df) == 0: + if self.verbose: + print("sssom-py returned 0 mappings, falling back to custom reader") + self.use_standard = False + self._load_with_custom_reader() + elif self.verbose: print(f"Loaded {len(self.msdf.df)} mappings") print(f"Mapping set ID: {self.msdf.mapping_set_id}") except Exception as e: From 8f29c683851fb507a7c163733669f0ac52b4cac9 Mon Sep 17 00:00:00 2001 From: "marcin p. joachimiak" <4625870+realmarcin@users.noreply.github.com> Date: Fri, 24 Apr 2026 17:23:22 -0700 Subject: [PATCH 2/2] Address Copilot review: warn on silent fallback, add per-instance impl getter - Emit RuntimeWarning when sssom-py returns 0 rows so the silent fallback to the custom reader is visible even when verbose=False - Add SSSOMIntegration.get_active_implementation() instance method that reflects per-instance state (self.use_standard), so callers can tell which reader an instance is actually using after a runtime fallback - Clarify get_implementation() docstring: it reports the default selection based on package availability, not per-instance state Co-Authored-By: Claude Opus 4.7 (1M context) --- .../utils/sssom_integration.py | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/fairscape_integration/utils/sssom_integration.py b/src/fairscape_integration/utils/sssom_integration.py index 89ff0c2f..6a35d42f 100644 --- a/src/fairscape_integration/utils/sssom_integration.py +++ b/src/fairscape_integration/utils/sssom_integration.py @@ -71,6 +71,13 @@ def _load_with_sssom_py(self): # (e.g. unknown EntityTypeEnum codes), returning an empty DataFrame # instead of raising an exception. Fall back to custom reader. if len(self.msdf.df) == 0: + warnings.warn( + f"sssom-py returned 0 mappings for {self.sssom_path}; " + "this typically indicates rows were silently dropped due to " + "non-standard column values. Falling back to custom reader.", + RuntimeWarning, + stacklevel=2, + ) if self.verbose: print("sssom-py returned 0 mappings, falling back to custom reader") self.use_standard = False @@ -317,9 +324,28 @@ def is_sssom_available() -> bool: @staticmethod def get_implementation() -> str: """ - Get the implementation being used. + Get the implementation that would be selected by default. + + Reflects whether the standard sssom-py package is importable. This is a + process-wide check and does not account for per-instance fallbacks (e.g. + when sssom-py returns 0 rows and an instance switches to the custom + reader). Use :meth:`get_active_implementation` for per-instance state. Returns: 'sssom-py' if standard package is available, 'custom' otherwise """ return 'sssom-py' if SSSOM_AVAILABLE else 'custom' + + def get_active_implementation(self) -> str: + """ + Get the implementation actively in use by this instance. + + Unlike :meth:`get_implementation`, this reflects any runtime fallback + that occurred for this specific instance (e.g. sssom-py was available + but returned 0 rows, causing this instance to fall back to the custom + reader). + + Returns: + 'sssom-py' if this instance is using sssom-py, 'custom' otherwise + """ + return 'sssom-py' if self.use_standard else 'custom'