From 0ecd2481ac598c05ab0fad17332b901dd9b7c74c Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Mon, 1 Apr 2024 15:14:32 -0400 Subject: [PATCH] feat(image-sets-normalization): array and number types for tag values --- packages/dicom/gdcm/Tags.h | 341 +++++------ .../dicom/gdcm/image-sets-normalization.cxx | 568 +++++++++++++++--- 2 files changed, 649 insertions(+), 260 deletions(-) diff --git a/packages/dicom/gdcm/Tags.h b/packages/dicom/gdcm/Tags.h index ce5c234ac..14d660a86 100644 --- a/packages/dicom/gdcm/Tags.h +++ b/packages/dicom/gdcm/Tags.h @@ -22,212 +22,175 @@ #include #include "itkGDCMImageIO.h" -using TagKey = std::string; -using TagKeys = std::unordered_set; -using TagNames = std::unordered_map; -using TagMap = TagNames; // TagKey -> TagValue +using Tag = gdcm::Tag; +using Tags = std::set; + +const Tag STUDY_UID(0x0020, 0x000d); // "Study Instance UID" +const Tag SERIES_UID(0x0020, 0x000e); // "Series Instance UID" +const Tag INSTANCE_UID(0x0008, 0x0018); // "Instance UID" + +const Tag FRAME_OF_REFERENCE_UID(0x0020, 0x0052); +const Tag IMAGE_ORIENTATION_PATIENT(0x0020, 0x0037); // Tag names from https://docs.aws.amazon.com/healthimaging/latest/devguide/reference-dicom-support.html -const TagNames PATIENT_TAG_NAMES = { +const Tags PATIENT_TAGS = { // Patient Module Elements - {"0010|0010", "Patient's Name"}, - {"0010|0020", "Patient ID"}, + Tag(0x0010, 0x0010), // "Patient's Name" + Tag(0x0010, 0x0020), // "Patient ID" // Issuer of Patient ID Macro Elements - {"0010|0021", "Issuer of Patient ID"}, - {"0010|0024", "Issuer of Patient ID Qualifiers Sequence"}, - {"0010|0022", "Type of Patient ID"}, - {"0010|0030", "Patient's Birth Date"}, - {"0010|0033", "Patient's Birth Date in Alternative Calendar"}, - {"0010|0034", "Patient's Death Date in Alternative Calendar"}, - {"0010|0035", "Patient's Alternative Calendar Attribute"}, - {"0010|0040", "Patient's Sex"}, - {"0010|1100", "Referenced Patient Photo Sequence"}, - {"0010|0200", "Quality Control Subject"}, - {"0008|1120", "Referenced Patient Sequence"}, - {"0010|0032", "Patient's Birth Time"}, - {"0010|1002", "Other Patient IDs Sequence"}, - {"0010|1001", "Other Patient Names"}, - {"0010|2160", "Ethnic Group"}, - {"0010|4000", "Patient Comments"}, - {"0010|2201", "Patient Species Description"}, - {"0010|2202", "Patient Species Code Sequence Attribute"}, - {"0010|2292", "Patient Breed Description"}, - {"0010|2293", "Patient Breed Code Sequence"}, - {"0010|2294", "Breed Registration Sequence Attribute"}, - {"0010|0212", "Strain Description"}, - {"0010|0213", "Strain Nomenclature Attribute"}, - {"0010|0219", "Strain Code Sequence"}, - {"0010|0218", "Strain Additional Information Attribute"}, - {"0010|0216", "Strain Stock Sequence"}, - {"0010|0221", "Genetic Modifications Sequence Attribute"}, - {"0010|2297", "Responsible Person"}, - {"0010|2298", "Responsible Person Role Attribute"}, - {"0010|2299", "Responsible Organization"}, - {"0012|0062", "Patient Identity Removed"}, - {"0012|0063", "De-identification Method"}, - {"0012|0064", "De-identification Method Code Sequence"}, + Tag(0x0010, 0x0021), // "Issuer of Patient ID" + Tag(0x0010, 0x0024), // "Issuer of Patient ID Qualifiers Sequence" + Tag(0x0010, 0x0022), // "Type of Patient ID" + Tag(0x0010, 0x0030), // "Patient's Birth Date" + Tag(0x0010, 0x0033), // "Patient's Birth Date in Alternative Calendar" + Tag(0x0010, 0x0034), // "Patient's Death Date in Alternative Calendar" + Tag(0x0010, 0x0035), // "Patient's Alternative Calendar Attribute" + Tag(0x0010, 0x0040), // "Patient's Sex" + Tag(0x0010, 0x1100), // "Referenced Patient Photo Sequence" + Tag(0x0010, 0x0200), // "Quality Control Subject" + Tag(0x0008, 0x1120), // "Referenced Patient Sequence" + Tag(0x0010, 0x0032), // "Patient's Birth Time" + Tag(0x0010, 0x1002), // "Other Patient IDs Sequence" + Tag(0x0010, 0x1001), // "Other Patient Names" + Tag(0x0010, 0x2160), // "Ethnic Group" + Tag(0x0010, 0x4000), // "Patient Comments" + Tag(0x0010, 0x2201), // "Patient Species Description" + Tag(0x0010, 0x2202), // "Patient Species Code Sequence Attribute" + Tag(0x0010, 0x2292), // "Patient Breed Description" + Tag(0x0010, 0x2293), // "Patient Breed Code Sequence" + Tag(0x0010, 0x2294), // "Breed Registration Sequence Attribute" + Tag(0x0010, 0x0212), // "Strain Description" + Tag(0x0010, 0x0213), // "Strain Nomenclature Attribute" + Tag(0x0010, 0x0219), // "Strain Code Sequence" + Tag(0x0010, 0x0218), // "Strain Additional Information Attribute" + Tag(0x0010, 0x0216), // "Strain Stock Sequence" + Tag(0x0010, 0x0221), // "Genetic Modifications Sequence Attribute" + Tag(0x0010, 0x2297), // "Responsible Person" + Tag(0x0010, 0x2298), // "Responsible Person Role Attribute" + Tag(0x0010, 0x2299), // "Responsible Organization" + Tag(0x0012, 0x0062), // "Patient Identity Removed" + Tag(0x0012, 0x0063), // "De-identification Method" + Tag(0x0012, 0x0064), // "De-identification Method Code Sequence" // Patient Group Macro Elements - {"0010|0026", "Source Patient Group Identification Sequence"}, - {"0010|0027", "Group of Patients Identification Sequence"}, + Tag(0x0010, 0x0026), // "Source Patient Group Identification Sequence" + Tag(0x0010, 0x0027), // "Group of Patients Identification Sequence" // Clinical Trial Subject Module - {"0012|0010", "Clinical Trial Sponsor Name"}, - {"0012|0020", "Clinical Trial Protocol ID"}, - {"0012|0021", "Clinical Trial Protocol Name Attribute"}, - {"0012|0030", "Clinical Trial Site ID"}, - {"0012|0031", "Clinical Trial Site Name"}, - {"0012|0040", "Clinical Trial Subject ID"}, - {"0012|0042", "Clinical Trial Subject Reading ID"}, - {"0012|0081", "Clinical Trial Protocol Ethics Committee Name"}, - {"0012|0082", "Clinical Trial Protocol Ethics Committee Approval Number"}, + Tag(0x0012, 0x0010), // "Clinical Trial Sponsor Name" + Tag(0x0012, 0x0020), // "Clinical Trial Protocol ID" + Tag(0x0012, 0x0021), // "Clinical Trial Protocol Name Attribute" + Tag(0x0012, 0x0030), // "Clinical Trial Site ID" + Tag(0x0012, 0x0031), // "Clinical Trial Site Name" + Tag(0x0012, 0x0040), // "Clinical Trial Subject ID" + Tag(0x0012, 0x0042), // "Clinical Trial Subject Reading ID" + Tag(0x0012, 0x0081), // "Clinical Trial Protocol Ethics Committee Name" + Tag(0x0012, 0x0082) // "Clinical Trial Protocol Ethics Committee Approval Number" }; -const TagNames STUDY_TAG_NAMES = { +const Tags STUDY_TAGS = { // General Study Module - {"0020|000d", "Study Instance UID"}, - {"0008|0020", "Study Date"}, - {"0008|0030", "Study Time"}, - {"0008|0090", "Referring Physician's Name"}, - {"0008|0096", "Referring Physician Identification Sequence"}, - {"0008|009c", "Consulting Physician's Name"}, - {"0008|009d", "Consulting Physician Identification Sequence"}, - {"0020|0010", "Study ID"}, - {"0008|0050", "Accession Number"}, - {"0008|0051", "Issuer of Accession Number Sequence"}, - {"0008|1030", "Study Description"}, - {"0008|1048", "Physician(s) of Record"}, - {"0008|1049", "Physician(s) of Record Identification Sequence"}, - {"0008|1060", "Name of Physician(s) Reading Study"}, - {"0008|1062", "Physician(s) Reading Study Identification Sequence"}, - {"0032|1033", "Requesting Service"}, - {"0032|1034", "Requesting Service Code Sequence"}, - {"0008|1110", "Referenced Study Sequence"}, - {"0008|1032", "Procedure Code Sequence"}, - {"0040|1012", "Reason For Performed Procedure Code Sequence"}, + Tag(0x0020, 0x000d), // "Study Instance UID" + Tag(0x0008, 0x0020), // "Study Date" + Tag(0x0008, 0x0030), // "Study Time" + Tag(0x0008, 0x0090), // "Referring Physician's Name" + Tag(0x0008, 0x0096), // "Referring Physician Identification Sequence" + Tag(0x0008, 0x009c), // "Consulting Physician's Name" + Tag(0x0008, 0x009d), // "Consulting Physician Identification Sequence" + Tag(0x0020, 0x0010), // "Study ID" + Tag(0x0008, 0x0050), // "Accession Number" + Tag(0x0008, 0x0051), // "Issuer of Accession Number Sequence" + Tag(0x0008, 0x1030), // "Study Description" + Tag(0x0008, 0x1048), // "Physician(s) of Record" + Tag(0x0008, 0x1049), // "Physician(s) of Record Identification Sequence" + Tag(0x0008, 0x1060), // "Name of Physician(s) Reading Study" + Tag(0x0008, 0x1062), // "Physician(s) Reading Study Identification Sequence" + Tag(0x0032, 0x1033), // "Requesting Service" + Tag(0x0032, 0x1034), // "Requesting Service Code Sequence" + Tag(0x0008, 0x1110), // "Referenced Study Sequence" + Tag(0x0008, 0x1032), // "Procedure Code Sequence" + Tag(0x0040, 0x1012), // "Reason For Performed Procedure Code Sequence" // Patient Study Module - {"0008|1080", "Admitting Diagnoses Description"}, - {"0008|1084", "Admitting Diagnoses Code Sequence"}, - {"0010|1010", "Patient's Age"}, - {"0010|1020", "Patient's Size"}, - {"0010|1030", "Patient's Weight"}, - {"0010|1022", "Patient's Body Mass Index"}, - {"0010|1023", "Measured AP Dimension"}, - {"0010|1024", "Measured Lateral Dimension"}, - {"0010|1021", "Patient's Size Code Sequence"}, - {"0010|2000", "Medical Alerts"}, - {"0010|2110", "Allergies"}, - {"0010|21a0", "Smoking Status"}, - {"0010|21c0", "Pregnancy Status"}, - {"0010|21d0", "Last Menstrual Date"}, - {"0038|0500", "Patient State"}, - {"0010|2180", "Occupation"}, - {"0010|21b0", "Additional Patient History"}, - {"0038|0010", "Admission ID"}, - {"0038|0014", "Issuer of Admission ID Sequence"}, - {"0032|1066", "Reason for Visit"}, - {"0032|1067", "Reason for Visit Code Sequence"}, - {"0038|0060", "Service Episode ID"}, - {"0038|0064", "Issuer of Service Episode ID Sequence"}, - {"0038|0062", "Service Episode Description"}, - {"0010|2203", "Patient's Sex Neutered"}, + Tag(0x0008, 0x1080), // "Admitting Diagnoses Description" + Tag(0x0008, 0x1084), // "Admitting Diagnoses Code Sequence" + Tag(0x0010, 0x1010), // "Patient's Age" + Tag(0x0010, 0x1020), // "Patient's Size" + Tag(0x0010, 0x1030), // "Patient's Weight" + Tag(0x0010, 0x1022), // "Patient's Body Mass Index" + Tag(0x0010, 0x1023), // "Measured AP Dimension" + Tag(0x0010, 0x1024), // "Measured Lateral Dimension" + Tag(0x0010, 0x1021), // "Patient's Size Code Sequence" + Tag(0x0010, 0x2000), // "Medical Alerts" + Tag(0x0010, 0x2110), // "Allergies" + Tag(0x0010, 0x21a0), // "Smoking Status" + Tag(0x0010, 0x21c0), // "Pregnancy Status" + Tag(0x0010, 0x21d0), // "Last Menstrual Date" + Tag(0x0038, 0x0500), // "Patient State" + Tag(0x0010, 0x2180), // "Occupation" + Tag(0x0010, 0x21b0), // "Additional Patient History" + Tag(0x0038, 0x0010), // "Admission ID" + Tag(0x0038, 0x0014), // "Issuer of Admission ID Sequence" + Tag(0x0032, 0x1066), // "Reason for Visit" + Tag(0x0032, 0x1067), // "Reason for Visit Code Sequence" + Tag(0x0038, 0x0060), // "Service Episode ID" + Tag(0x0038, 0x0064), // "Issuer of Service Episode ID Sequence" + Tag(0x0038, 0x0062), // "Service Episode Description" + Tag(0x0010, 0x2203), // "Patient's Sex Neutered" // Clinical Trial Study Module - {"0012|0050", "Clinical Trial Time Point ID"}, - {"0012|0051", "Clinical Trial Time Point Description"}, - {"0012|0052", "Longitudinal Temporal Offset from Event"}, - {"0012|0053", "Longitudinal Temporal Event Type"}, - {"0012|0083", "Consent for Clinical Trial Use Sequence"}, + Tag(0x0012, 0x0050), // "Clinical Trial Time Point ID" + Tag(0x0012, 0x0051), // "Clinical Trial Time Point Description" + Tag(0x0012, 0x0052), // "Longitudinal Temporal Offset from Event" + Tag(0x0012, 0x0053), // "Longitudinal Temporal Event Type" + Tag(0x0012, 0x0083) // "Consent for Clinical Trial Use Sequence" }; -const TagNames SERIES_TAG_NAMES = { +const Tags SERIES_TAGS = { // General Series Module - {"0008|0060", "Modality"}, - {"0020|000e", "Series Instance UID"}, - {"0020|0011", "Series Number"}, - {"0020|0060", "Laterality"}, - {"0008|0021", "Series Date"}, - {"0008|0031", "Series Time"}, - {"0008|1050", "Performing Physician's Name"}, - {"0008|1052", "Performing Physician Identification Sequence"}, - {"0018|1030", "Protocol Name"}, - {"0008|103e", "Series Description"}, - {"0008|103f", "Series Description Code Sequence"}, - {"0008|1070", "Operators' Name"}, - {"0008|1072", "Operator Identification Sequence"}, - {"0008|1111", "Referenced Performed Procedure Step Sequence"}, - {"0008|1250", "Related Series Sequence"}, - {"0018|0015", "Body Part Examined"}, - {"0018|5100", "Patient Position"}, - {"0028|0108", "Smallest Pixel Value in Series"}, - {"0028|0109", "Largest Pixel Value in Series"}, - {"0040|0275", "Request Attributes Sequence"}, - {"0010|2210", "Anatomical Orientation Type"}, - {"300a|0700", "Treatment Session UID"}, + Tag(0x0008, 0x0060), // "Modality" + Tag(0x0020, 0x000e), // "Series Instance UID" + Tag(0x0020, 0x0011), // "Series Number" + Tag(0x0020, 0x0060), // "Laterality" + Tag(0x0008, 0x0021), // "Series Date" + Tag(0x0008, 0x0031), // "Series Time" + Tag(0x0008, 0x1050), // "Performing Physician's Name" + Tag(0x0008, 0x1052), // "Performing Physician Identification Sequence" + Tag(0x0018, 0x1030), // "Protocol Name" + Tag(0x0008, 0x103e), // "Series Description" + Tag(0x0008, 0x103f), // "Series Description Code Sequence" + Tag(0x0008, 0x1070), // "Operators' Name" + Tag(0x0008, 0x1072), // "Operator Identification Sequence" + Tag(0x0008, 0x1111), // "Referenced Performed Procedure Step Sequence" + Tag(0x0008, 0x1250), // "Related Series Sequence" + Tag(0x0018, 0x0015), // "Body Part Examined" + Tag(0x0018, 0x5100), // "Patient Position" + Tag(0x0028, 0x0108), // "Smallest Pixel Value in Series" + Tag(0x0028, 0x0109), // "Largest Pixel Value in Series" + Tag(0x0040, 0x0275), // "Request Attributes Sequence" + Tag(0x0010, 0x2210), // "Anatomical Orientation Type" + Tag(0x300a, 0x0700), // "Treatment Session UID" // Clinical Trial Series Module - {"0012|0060", "Clinical Trial Coordinating Center Name"}, - {"0012|0071", "Clinical Trial Series ID"}, - {"0012|0072", "Clinical Trial Series Description"}, + Tag(0x0012, 0x0060), // "Clinical Trial Coordinating Center Name" + Tag(0x0012, 0x0071), // "Clinical Trial Series ID" + Tag(0x0012, 0x0072), // "Clinical Trial Series Description" // General Equipment Module - {"0008|0070", "Manufacturer"}, - {"0008|0080", "Institution Name"}, - {"0008|0081", "Institution Address"}, - {"0008|1010", "Station Name"}, - {"0008|1040", "Institutional Department Name"}, - {"0008|1041", "Institutional Department Type Code Sequence"}, - {"0008|1090", "Manufacturer's Model Name"}, - {"0018|100b", "Manufacturer's Device Class UID"}, - {"0018|1000", "Device Serial Number"}, - {"0018|1020", "Software Versions"}, - {"0018|1008", "Gantry ID"}, - {"0018|100a", "UDI Sequence"}, - {"0018|1002", "Device UID"}, - {"0018|1050", "Spatial Resolution"}, - {"0018|1200", "Date of Last Calibration"}, - {"0018|1201", "Time of Last Calibration"}, - {"0028|0120", "Pixel Padding Value"}, + Tag(0x0008, 0x0070), // "Manufacturer" + Tag(0x0008, 0x0080), // "Institution Name" + Tag(0x0008, 0x0081), // "Institution Address" + Tag(0x0008, 0x1010), // "Station Name" + Tag(0x0008, 0x1040), // "Institutional Department Name" + Tag(0x0008, 0x1041), // "Institutional Department Type Code Sequence" + Tag(0x0008, 0x1090), // "Manufacturer's Model Name" + Tag(0x0018, 0x100b), // "Manufacturer's Device Class UID" + Tag(0x0018, 0x1000), // "Device Serial Number" + Tag(0x0018, 0x1020), // "Software Versions" + Tag(0x0018, 0x1008), // "Gantry ID" + Tag(0x0018, 0x100a), // "UDI Sequence" + Tag(0x0018, 0x1002), // "Device UID" + Tag(0x0018, 0x1050), // "Spatial Resolution" + Tag(0x0018, 0x1200), // "Date of Last Calibration" + Tag(0x0018, 0x1201), // "Time of Last Calibration" + Tag(0x0028, 0x0120), // "Pixel Padding Value" // Frame of Reference Module - {"0020|0052", "Frame of Reference UID"}, - {"0020|1040", "Position Reference Indicator"}, + Tag(0x0020, 0x0052), // "Frame of Reference UID" + Tag(0x0020, 0x1040), // "Position Reference Indicator" }; -TagMap extractAndRename(const TagMap &tags, const TagNames &keeperTags) -{ - TagMap extracted; - for (const auto &[key, name] : keeperTags) - { - const auto it = tags.find(key); - if (it != tags.end()) - { - extracted[name] = it->second; - } - } - return extracted; -} - -TagMap remove(const TagMap &tags, const TagNames &removeTags) -{ - TagMap filteredTags = tags; - for (const auto &[key, name] : removeTags) - { - filteredTags.erase(key); - } - return filteredTags; -} - -TagMap relabel(const TagMap &tags) -{ - TagMap relabelTags; - for (const auto &[key, value] : tags) - { - std::string name = key; - if (itk::GDCMImageIO::GetLabelFromTag(key, name)) - { - relabelTags[name] = value; - } - else - { - relabelTags[key] = value; - } - } - return relabelTags; -} - #endif // TAGS_H \ No newline at end of file diff --git a/packages/dicom/gdcm/image-sets-normalization.cxx b/packages/dicom/gdcm/image-sets-normalization.cxx index bf80a03fd..1fa68056f 100644 --- a/packages/dicom/gdcm/image-sets-normalization.cxx +++ b/packages/dicom/gdcm/image-sets-normalization.cxx @@ -1,5 +1,5 @@ /*========================================================================= - + * * Copyright NumFOCUS * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,12 +15,33 @@ * limitations under the License. * *=========================================================================*/ +/*========================================================================= + + Program: GDCM (Grassroots DICOM). A DICOM library + + Copyright (c) 2006-2011 Mathieu Malaterre + All rights reserved. + See Copyright.txt or http://gdcm.sourceforge.net/Copyright.html for details. + + This software is distributed WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + PURPOSE. See the above copyright notice for more information. + +=========================================================================*/ #include #include "rapidjson/document.h" #include "rapidjson/stringbuffer.h" #include "rapidjson/writer.h" +#include "gdcmGlobal.h" +#include "gdcmDicts.h" +#include "gdcmImageReader.h" + +#include "itksys/SystemTools.hxx" +#include "itksys/Base64.h" +#include "itkMakeUniqueForOverwrite.h" + #include "itkPipeline.h" #include "itkOutputTextStream.h" @@ -28,49 +49,421 @@ #include "Tags.h" #include "SortSpatially.h" -const std::string STUDY_INSTANCE_UID = "0020|000D"; -const std::string SERIES_INSTANCE_UID = "0020|000e"; -const std::string FRAME_OF_REFERENCE_UID = "0020|0052"; -const std::string IMAGE_ORIENTATION_PATIENT = "0020|0037"; - -rapidjson::Value mapToJsonObj(const TagMap &tags, rapidjson::Document::AllocatorType &allocator) +std::string getLabelFromTag(const gdcm::Tag &tag, const gdcm::DataSet &dataSet) { - rapidjson::Value json(rapidjson::kObjectType); - for (const auto &[tag, value] : tags) + std::string strowner; + const char *owner = 0; + if (tag.IsPrivate() && !tag.IsPrivateCreator()) { - rapidjson::Value tagName; - tagName.SetString(tag.c_str(), tag.size(), allocator); - rapidjson::Value tagValue; - tagValue.SetString(value.c_str(), value.size(), allocator); - json.AddMember(tagName, tagValue, allocator); + strowner = dataSet.GetPrivateCreator(tag); + owner = strowner.c_str(); } - return json; + const gdcm::Global &g = gdcm::Global::GetInstance(); + const gdcm::Dicts &dicts = g.GetDicts(); + const gdcm::DictEntry &entry = dicts.GetDictEntry(tag, owner); + return entry.GetKeyword(); } -TagMap filterTags(const TagMap &tags, const TagKeys &keeperTagKeys) +namespace gdcm { - TagMap filteredTags; - for (const auto &tagName : keeperTagKeys) + + inline bool canContainBackslash(const VR::VRType vrType) { - const auto it = tags.find(tagName); - if (it != tags.end()) + assert(VR::IsASCII(vrType)); + // PS 3.5-2011 / Table 6.2-1 DICOM VALUE REPRESENTATIONS + switch (vrType) { - filteredTags[tagName] = it->second; + case VR::AE: // ScheduledStationAETitle + // case VR::AS: // no + // case VR::AT: // binary + case VR::CS: // SpecificCharacterSet + case VR::DA: // CalibrationDate + case VR::DS: // FrameTimeVector + case VR::DT: // ReferencedDateTime + // case VR::FD: // binary + // case VR::FL: + case VR::IS: // ReferencedFrameNumber + case VR::LO: // OtherPatientIDs + // case VR::LT: // VM1 + // case VR::OB: // binary + // case VR::OD: // binary + // case VR::OF: // binary + // case VR::OW: // binary + case VR::PN: // PerformingPhysicianName + case VR::SH: // PatientTelephoneNumbers + // case VR::SL: // binary + // case VR::SQ: // binary + // case VR::SS: // binary + // case VR::ST: // VM1 + case VR::TM: // CalibrationTime + case VR::UI: // SOPClassesInStudy + // case VR::UL: // binary + // case VR::UN: // binary + // case VR::US: // binary + // case VR::UT: // VM1 + assert(!(vrType & VR::VR_VM1)); + return true; + default:; } + return false; } - return filteredTags; -} -rapidjson::Value jsonFromTags(const TagMap &tags, const TagKeys &tagKeys, rapidjson::Document::AllocatorType &allocator) -{ - const TagMap filteredTags = filterTags(tags, tagKeys); - return mapToJsonObj(filteredTags, allocator); -} + void dataElementToJSONArray(const VR::VRType vr, const DataElement &de, rapidjson::Value &jsonArray, rapidjson::Document::AllocatorType &allocator) + { + jsonArray.SetArray(); + if (de.IsEmpty()) + { + // F.2.5 DICOM JSON Model Null Values + if (vr == VR::PN) + { + jsonArray.PushBack(rapidjson::Value(rapidjson::kObjectType), allocator); + } + return; + } + const bool checkbackslash = canContainBackslash(vr); + const ByteValue *bv = de.GetByteValue(); + const char *value = bv->GetPointer(); + size_t len = bv->GetLength(); -rapidjson::Value jsonFromTags(const TagMap &tags, const TagNames &tagNames, rapidjson::Document::AllocatorType &allocator) -{ - const TagMap filteredTags = extractAndRename(tags, tagNames); - return mapToJsonObj(filteredTags, allocator); + if (vr == VR::UI) + { + const std::string strui(value, len); + const size_t lenuid = strlen(strui.c_str()); // trick to remove trailing \0 + rapidjson::Value stringValue; + stringValue.SetString(strui.c_str(), lenuid, allocator); + jsonArray.PushBack(stringValue, allocator); + } + else if (vr == VR::PN) + { + const char *str1 = value; + // remove whitespace: + while (str1[len - 1] == ' ') + { + len--; + } + assert(str1); + std::stringstream ss; + // static const char *Keys[] = { + // "Alphabetic", + // "Ideographic", + // "Phonetic", + // }; + while (1) + { + assert(str1 && (size_t)(str1 - value) <= len); + const char *sep = strchr(str1, '\\'); + const size_t llen = (sep != NULL) ? (sep - str1) : (value + len - str1); + const std::string component(str1, llen); + + const char *str2 = component.c_str(); + assert(str2); + const size_t len2 = component.size(); + assert(len2 == llen); + + int idx = 0; + // Just get Alphabetic name, hence the comments and extra breaks + // rapidjson::Value namesObject(rapidjson::kObjectType); + rapidjson::Value name; + while (1) + { + assert(str2 && (size_t)(str2 - component.c_str()) <= len2); + const char *sep2 = strchr(str2, '='); + const size_t llen2 = (sep2 != NULL) ? (sep2 - str2) : (component.c_str() + len2 - str2); + const std::string group(str2, llen2); + // const char *thekey = Keys[idx++]; + + // rapidjson::Value nameType(thekey, allocator); + name.SetString(group.c_str(), group.size(), allocator); + + // namesObject.AddMember(nameType, name, allocator); + break; // just Alphabetic, short circuit + // if (sep2 == NULL) + // break; + // str2 = sep2 + 1; + } + // jsonArray.PushBack(namesObject, allocator); + jsonArray.PushBack(name, allocator); + break; // just Alphabetic, short circuit + if (sep == NULL) + break; + str1 = sep + 1; + assert(checkbackslash); + } + } + else if (vr == VR::DS || vr == VR::IS) + { + const char *str1 = value; + assert(str1); + VRToType::Type vris; + VRToType::Type vrds; + while (1) + { + std::stringstream ss; + assert(str1 && (size_t)(str1 - value) <= len); + const char *sep = strchr(str1, '\\'); + const size_t llen = (sep != NULL) ? (sep - str1) : (value + len - str1); + rapidjson::Value elementValue; + // This is complex, IS/DS should not be stored as string anymore + switch (vr) + { + case VR::IS: + ss.str(std::string(str1, llen)); + ss >> vris; + elementValue.SetInt(vris); + jsonArray.PushBack(elementValue, allocator); + break; + case VR::DS: + ss.str(std::string(str1, llen)); + ss >> vrds; + jsonArray.PushBack(rapidjson::Value(vrds), allocator); + break; + default: + assert(0); // programmer error + } + if (sep == NULL) + break; + str1 = sep + 1; + assert(checkbackslash); + } + } + else if (checkbackslash) + { + const char *str1 = value; + assert(str1); + while (1) + { + assert(str1 && (size_t)(str1 - value) <= len); + const char *sep = strchr(str1, '\\'); + const size_t llen = (sep != NULL) ? (sep - str1) : (value + len - str1); + // json_object_array_add(my_array, json_object_new_string_len(str1, llen)); + rapidjson::Value valueString; + valueString.SetString(str1, llen, allocator); + jsonArray.PushBack(valueString, allocator); + if (sep == NULL) + break; + str1 = sep + 1; + } + } + else // default + { + rapidjson::Value valueString; + valueString.SetString(value, len, allocator); + jsonArray.PushBack(valueString, allocator); + } + } + + const gdcm::Tag PIXEL_DATA_TAG = gdcm::Tag(0x7fe0, 0x0010); + rapidjson::Value *toJson(const gdcm::DataSet &dataSet, const Tags pickTags, const Tags skipTags, rapidjson::Value &dicomTagsObject, rapidjson::Document::AllocatorType &allocator) + { + for (gdcm::DataSet::ConstIterator it = dataSet.Begin(); it != dataSet.End(); ++it) + { + const gdcm::DataElement &de = *it; + VR::VRType vr = de.GetVR(); + const gdcm::Tag &t = de.GetTag(); + if (t.IsGroupLength() || t == PIXEL_DATA_TAG || skipTags.find(t) != skipTags.end()) + continue; // skip useless group length and pixel data tag + if (!pickTags.empty() && pickTags.find(t) == pickTags.end()) + continue; // skip tags that are not in the pick list if it has any + + const bool isSequence = vr == VR::SQ || de.IsUndefinedLength(); + const bool isPrivateCreator = t.IsPrivateCreator(); + if (isSequence) + vr = VR::SQ; + else if (isPrivateCreator) + vr = VR::LO; // always prefer VR::LO (over invalid/UN) + else if (vr == VR::INVALID) + vr = VR::UN; + const char *vr_str = VR::GetVRString(vr); + assert(VR::GetVRTypeFromFile(vr_str) != VR::INVALID); + + rapidjson::Value tagValue; + + if (vr == VR::SQ) + { + // Sequence Value Representations are nested datasets + SmartPointer sqi; + sqi = de.GetValueAsSQ(); + if (sqi) + { + tagValue.SetArray(); + int nitems = sqi->GetNumberOfItems(); + for (int i = 1; i <= nitems; ++i) + { + const Item &item = sqi->GetItem(i); + const DataSet &nested = item.GetNestedDataSet(); + rapidjson::Value sequenceObject(rapidjson::kObjectType); + // grab all nested tags, empty pick and skip tag sets + toJson(nested, {}, {}, sequenceObject, allocator); + tagValue.PushBack(sequenceObject, allocator); + } + } + + // Strange code from gdcmJSON.cxx + // else if (const SequenceOfFragments *sqf = de.GetSequenceOfFragments()) + // { + // tagValue.SetNull(); // FIXME + // assert(0); + // } + // else + // { + // assert(de.IsEmpty()); + // // json_object_array_add(my_array, NULL ); // F.2.5 req ? + // } + } + else if (VR::IsASCII(vr)) + { + dataElementToJSONArray(vr, de, tagValue, allocator); + } + else + { + tagValue.SetArray(); + + switch (vr) + { + case VR::FD: + { + Element el; + el.Set(de.GetValue()); + int ellen = el.GetLength(); + for (int i = 0; i < ellen; ++i) + { + rapidjson::Value elValue; + elValue.SetDouble(el.GetValue(i)); + tagValue.PushBack(elValue, allocator); + } + } + break; + case VR::FL: + { + Element el; + el.Set(de.GetValue()); + int ellen = el.GetLength(); + for (int i = 0; i < ellen; ++i) + { + rapidjson::Value elValue; + elValue.SetFloat(el.GetValue(i)); + tagValue.PushBack(elValue, allocator); + } + } + break; + case VR::SS: + { + Element el; + el.Set(de.GetValue()); + int ellen = el.GetLength(); + for (int i = 0; i < ellen; ++i) + { + rapidjson::Value elValue; + elValue.SetInt(el.GetValue(i)); + tagValue.PushBack(elValue, allocator); + } + } + break; + case VR::US: + { + Element el; + el.Set(de.GetValue()); + int ellen = el.GetLength(); + for (int i = 0; i < ellen; ++i) + { + rapidjson::Value elValue; + elValue.SetUint(el.GetValue(i)); + tagValue.PushBack(elValue, allocator); + } + } + break; + case VR::SL: + { + Element el; + el.Set(de.GetValue()); + int ellen = el.GetLength(); + for (int i = 0; i < ellen; ++i) + { + rapidjson::Value elValue; + elValue.SetInt(el.GetValue(i)); + tagValue.PushBack(elValue, allocator); + } + } + break; + case VR::UL: + { + Element el; + el.Set(de.GetValue()); + int ellen = el.GetLength(); + for (int i = 0; i < ellen; ++i) + { + rapidjson::Value elValue; + elValue.SetUint(el.GetValue(i)); + tagValue.PushBack(elValue, allocator); + } + } + break; + case VR::AT: + { + Element el; + el.Set(de.GetValue()); + int ellen = el.GetLength(); + for (int i = 0; i < ellen; ++i) + { + const std::string atstr = el.GetValue(i).PrintAsContinuousUpperCaseString(); + rapidjson::Value jsonElement; + jsonElement.SetString(atstr.c_str(), atstr.size(), allocator); + tagValue.PushBack(jsonElement, allocator); + } + } + break; + case VR::UN: + case VR::INVALID: + case VR::OD: + case VR::OF: + case VR::OB: + case VR::OW: + { + assert(!de.IsUndefinedLength()); // handled before + const gdcm::ByteValue *bv = de.GetByteValue(); + if (bv) + { + // base64 streams have to be a multiple of 4 bytes in length + int encodedLengthEstimate = 2 * bv->GetLength(); + encodedLengthEstimate = ((encodedLengthEstimate / 4) + 1) * 4; + + const auto bin = itk::make_unique_for_overwrite(encodedLengthEstimate); + auto encodedLengthActual = + static_cast(itksysBase64_Encode((const unsigned char *)bv->GetPointer(), + static_cast(bv->GetLength()), + (unsigned char *)bin.get(), + 0)); + std::string encodedValue(bin.get(), encodedLengthActual); + tagValue.SetString(encodedValue.c_str(), encodedValue.size(), allocator); + } + } + break; + default: + assert(0); // programmer error + } // end switch + } // end array else + + if (tagValue.IsArray()) + { + int arraySize = tagValue.Size(); + if (arraySize == 0) + { + continue; // skip empty arrays + } + else if (arraySize == 1) + { + // Unwrap array of size 1 + tagValue = tagValue[0]; // different from gdcmJSON.cxx + } + } + + const std::string &label = getLabelFromTag(t, dataSet); + rapidjson::Value tagName; + tagName.SetString(label.c_str(), label.size(), allocator); + dicomTagsObject.AddMember(tagName, tagValue, allocator); + } + return &dicomTagsObject; + } } using FileName = std::string; @@ -78,7 +471,6 @@ using FileName = std::string; struct DicomFile { FileName fileName; - TagMap tags; gdcm::DataSet dataSet; DicomFile(const FileName &fileName) @@ -87,10 +479,18 @@ struct DicomFile itk::DICOMTagReader tagReader; if (!tagReader.CanReadFile(fileName)) { - throw std::runtime_error("Could not read the input DICOM file: " + fileName); + throw std::runtime_error("Can not read the input DICOM file: " + fileName); } tagReader.SetFileName(fileName); - tags = tagReader.ReadAllTags(); + + gdcm::ImageReader reader; + reader.SetFileName(fileName.c_str()); + if (!reader.Read()) + { + throw std::runtime_error("Failed to read the input DICOM file: " + fileName); + } + const gdcm::File &f = reader.GetFile(); + dataSet = f.GetDataSet(); } bool operator==(const DicomFile &other) const @@ -108,11 +508,11 @@ struct dicomFileHash }; using DicomFiles = std::unordered_set; -DicomFiles loadFiles(const std::vector &files) +DicomFiles loadFiles(const std::vector &fileNames) { DicomFiles dicomFiles; itk::DICOMTagReader tagReader; - for (const FileName &fileName : files) + for (const FileName &fileName : fileNames) { dicomFiles.insert(DicomFile(fileName)); } @@ -123,17 +523,30 @@ using Volume = std::vector; using Volumes = std::vector; // aka ImageSet using ImageSets = std::vector; -bool compareTags(const TagMap &tags1, const TagMap &tags2, const TagKeys &tagKeys) +std::pair getTagBuffer(const gdcm::DataSet &ds, const gdcm::Tag &tag) +{ + if (!ds.FindDataElement(tag) || ds.GetDataElement(tag).IsEmpty()) + { + return std::make_pair(nullptr, 0); + } + const gdcm::DataElement de = ds.GetDataElement(tag); + const gdcm::ByteValue *bv = de.GetByteValue(); + const char *tagValue = bv->GetPointer(); + size_t len = bv->GetLength(); + return std::make_pair(tagValue, len); +} + +bool compareTags(const gdcm::DataSet &tagsA, const gdcm::DataSet &tagsB, const Tags &tagKeys) { for (const auto &tagKey : tagKeys) { - const auto tagA = tags1.find(tagKey); - const auto tagB = tags2.find(tagKey); - if (tagA == tags1.end() || tagB == tags2.end()) + const auto tagA = getTagBuffer(tagsA, tagKey); + const auto tagB = getTagBuffer(tagsB, tagKey); + if (tagA.first == nullptr || tagB.first == nullptr) { return false; } - if (tagA->second != tagB->second) + if (std::memcmp(tagA.first, tagB.first, tagB.second) != 0) { return false; } @@ -141,9 +554,10 @@ bool compareTags(const TagMap &tags1, const TagMap &tags2, const TagKeys &tagKey return true; } -bool isSameVolume(const TagMap &tagsA, const TagMap &tagsB) +bool isSameVolume(const gdcm::DataSet &tagsA, const gdcm::DataSet &tagsB) { - return compareTags(tagsA, tagsB, {SERIES_INSTANCE_UID, FRAME_OF_REFERENCE_UID, IMAGE_ORIENTATION_PATIENT}); + const Tags criteria = {SERIES_UID, FRAME_OF_REFERENCE_UID}; + return compareTags(tagsA, tagsB, criteria); } Volumes groupByVolume(const DicomFiles &dicomFiles) @@ -151,9 +565,9 @@ Volumes groupByVolume(const DicomFiles &dicomFiles) Volumes volumes; for (const DicomFile &dicomFile : dicomFiles) { - const auto tags = dicomFile.tags; - auto matchingVolume = std::find_if(volumes.begin(), volumes.end(), [&tags](const Volume &volume) - { return isSameVolume(volume.begin()->tags, tags); }); + const auto candidate = dicomFile.dataSet; + auto matchingVolume = std::find_if(volumes.begin(), volumes.end(), [&candidate](const Volume &volume) + { return isSameVolume(volume.begin()->dataSet, candidate); }); if (matchingVolume != volumes.end()) { @@ -173,11 +587,11 @@ ImageSets groupByImageSet(const Volumes &volumes) ImageSets imageSets; for (const Volume &volume : volumes) { - const auto volumeTags = volume.begin()->tags; - auto matchingImageSet = std::find_if(imageSets.begin(), imageSets.end(), [&volumeTags](const Volumes &volumes) + const gdcm::DataSet volumeDataSet = volume.begin()->dataSet; + auto matchingImageSet = std::find_if(imageSets.begin(), imageSets.end(), [&volumeDataSet](const Volumes &volumes) { - const TagMap imageSetTags = volumes.begin()->begin()->tags; - return compareTags(imageSetTags, volumeTags, {STUDY_INSTANCE_UID}); }); + const gdcm::DataSet imageSetDataSet = volumes.begin()->begin()->dataSet; + return compareTags(volumeDataSet, imageSetDataSet, {STUDY_UID}); }); if (matchingImageSet != imageSets.end()) { matchingImageSet->push_back(volume); @@ -218,11 +632,28 @@ Volumes sortSpatially(Volumes &volumes) return sortedVolumes; } +std::string getUID(const gdcm::DataSet &ds, const Tag &tag) +{ + if (!ds.FindDataElement(tag) || ds.GetDataElement(tag).IsEmpty()) + { + throw std::runtime_error("Tag not found"); + } + const gdcm::DataElement de = ds.GetDataElement(tag); + const gdcm::ByteValue *bv = de.GetByteValue(); + const char *tagValue = bv->GetPointer(); + size_t len = bv->GetLength(); + return std::string(tagValue, len); +} + rapidjson::Document toJson(const ImageSets &imageSets) { rapidjson::Document imageSetsJson(rapidjson::kArrayType); rapidjson::Document::AllocatorType &allocator = imageSetsJson.GetAllocator(); - TagMap dicomTags; + gdcm::DataSet dataSet; + Tags instanceSkipTags; // filter out patient, study, series tags from instance object + instanceSkipTags.insert(PATIENT_TAGS.begin(), PATIENT_TAGS.end()); + instanceSkipTags.insert(STUDY_TAGS.begin(), STUDY_TAGS.end()); + instanceSkipTags.insert(SERIES_TAGS.begin(), SERIES_TAGS.end()); for (const Volumes &volumes : imageSets) { rapidjson::Value seriesById(rapidjson::kObjectType); @@ -232,13 +663,9 @@ rapidjson::Document toJson(const ImageSets &imageSets) for (const auto &dicomFile : volume) { FileName file = dicomFile.fileName; - dicomTags = dicomFile.tags; - // filter out patient, study, series tags - TagMap instanceTags = remove(dicomTags, PATIENT_TAG_NAMES); - instanceTags = remove(instanceTags, STUDY_TAG_NAMES); - instanceTags = remove(instanceTags, SERIES_TAG_NAMES); - instanceTags = relabel(instanceTags); - rapidjson::Value instanceTagsJson = mapToJsonObj(instanceTags, allocator); + dataSet = dicomFile.dataSet; + rapidjson::Value instanceTagsJson(rapidjson::kObjectType); + toJson(dataSet, {}, instanceSkipTags, instanceTagsJson, allocator); rapidjson::Value instance(rapidjson::kObjectType); instance.AddMember("DICOM", instanceTagsJson, allocator); @@ -251,25 +678,22 @@ rapidjson::Document toJson(const ImageSets &imageSets) instance.AddMember("ImageFrames", imageFrames, allocator); // instance by UID under instances - TagMap::iterator it = dicomTags.find("0008|0018"); - if (it == dicomTags.end()) - { - throw std::runtime_error("Instance UID not found in dicomTags"); - } - const auto tag = it->second; + const std::string instanceUID = getUID(dataSet, INSTANCE_UID); rapidjson::Value instanceId; - instanceId.SetString(tag.c_str(), tag.size(), allocator); + instanceId.SetString(instanceUID.c_str(), instanceUID.size(), allocator); instances.AddMember(instanceId, instance, allocator); } // Series - rapidjson::Value seriesTags = jsonFromTags(dicomTags, SERIES_TAG_NAMES, allocator); + rapidjson::Value seriesTags(rapidjson::kObjectType); + toJson(dataSet, SERIES_TAGS, {}, seriesTags, allocator); rapidjson::Value series(rapidjson::kObjectType); series.AddMember("DICOM", seriesTags, allocator); series.AddMember("Instances", instances, allocator); int volumeIndex = std::distance(volumes.begin(), std::find(volumes.begin(), volumes.end(), volume)); - const std::string seriesId = dicomTags.at(SERIES_INSTANCE_UID) + '.' + std::to_string(volumeIndex); + const std::string seriesId = getUID(dataSet, SERIES_UID) + '.' + std::to_string(volumeIndex); + rapidjson::Value seriesIdJson; seriesIdJson.SetString(seriesId.c_str(), seriesId.size(), allocator); seriesById.AddMember(seriesIdJson, series, allocator); @@ -278,15 +702,17 @@ rapidjson::Document toJson(const ImageSets &imageSets) rapidjson::Value imageSet(rapidjson::kObjectType); // Patient + rapidjson::Value patientTags(rapidjson::kObjectType); + toJson(dataSet, PATIENT_TAGS, {}, patientTags, allocator); rapidjson::Value patient(rapidjson::kObjectType); - rapidjson::Value patientTags = jsonFromTags(dicomTags, PATIENT_TAG_NAMES, allocator); patient.AddMember("DICOM", patientTags, allocator); imageSet.AddMember("Patient", patient, allocator); // Study + rapidjson::Value studyTags(rapidjson::kObjectType); + toJson(dataSet, STUDY_TAGS, {}, studyTags, allocator); rapidjson::Value study(rapidjson::kObjectType); - rapidjson::Value studyTagsJson = jsonFromTags(dicomTags, STUDY_TAG_NAMES, allocator); - study.AddMember("DICOM", studyTagsJson, allocator); + study.AddMember("DICOM", studyTags, allocator); study.AddMember("Series", seriesById, allocator); imageSet.AddMember("Study", study, allocator);