From b8bf692a2c56864f2a924245c309c0403a53ce18 Mon Sep 17 00:00:00 2001 From: Suyash Kumar Date: Sun, 9 Jun 2024 23:13:30 -0400 Subject: [PATCH] Parse: If transfer syntax is missing, attempt to infer it by peeking next 100 bytes. (#330) This PR attempts to infer missing transfer syntax in dicoms during Parse. Specifically: * When transfer syntax is missing in dicom metadata, attempt to infer the correct transfer syntax by peeking the next 100bytes and trying to read an element without an error. This isn't foolproof, but one option to start with. * This also makes test updates to support testfiles/ that may not have PixelData. * This also introduces a write option to write dicoms without transfer syntax elements, in order to write some "roundtrip" unit tests for this behavior on Parse. I was able to successfully test using some test data from #327, but I need to do some more investigation to see if we can safely add those to our test files (licensing and otherwise). Things to consider in the future: * Try deflated little endian explicit as well. * Peek more/less than the initial 100 bytes, or move away from a fixed peek. --- parse.go | 64 ++++++++++++++++++++++++++++++++++++++---- parse_internal_test.go | 59 ++++++++++++++++++++++++++++++++++++++ parse_test.go | 46 +++++++++++++++++------------- write.go | 46 +++++++++++++++++++----------- 4 files changed, 173 insertions(+), 42 deletions(-) diff --git a/parse.go b/parse.go index d4ab5e3..b8318f6 100644 --- a/parse.go +++ b/parse.go @@ -22,8 +22,10 @@ package dicom import ( "bufio" + "bytes" "encoding/binary" "errors" + "fmt" "io" "os" @@ -170,14 +172,64 @@ func NewParser(in io.Reader, bytesToRead int64, frameChannel chan *frame.Frame, if tsStr == uid.DeflatedExplicitVRLittleEndian { p.reader.rawReader.SetDeflate() } - } else { - // No transfer syntax found, warn the user we're proceeding with the - // default Little Endian implicit. - debug.Log("WARN: could not find transfer syntax uid in metadata, proceeding with little endian implicit") + p.SetTransferSyntax(bo, implicit) + return &p, nil } - p.SetTransferSyntax(bo, implicit) - return &p, nil + // No transfer syntax found, so let's try to infer the transfer syntax by + // trying to read the next element under various transfer syntaxes. + next100, err := p.reader.rawReader.Peek(100) + if errors.Is(err, io.EOF) { + // DICOM is shorter than 100 bytes. + return nil, fmt.Errorf("dicom with missing transfer syntax metadata is shorter than 100 bytes, so cannot infer transfer syntax") + } + + syntaxes := []struct { + name string + bo binary.ByteOrder + implicit bool + }{ + { + name: "Little Endian Implicit", + bo: binary.LittleEndian, + implicit: true, + }, + { + name: "Big Endian Explicit", + bo: binary.BigEndian, + implicit: false, + }, + { + name: "Little Endian Explicit", + bo: binary.LittleEndian, + implicit: false, + }, + } + + for _, syntax := range syntaxes { + if canReadElementFromBytes(next100, optSet, syntax.bo, syntax.implicit) { + debug.Logf("WARN: could not find transfer syntax uid in metadata, proceeding with %v", syntax.name) + p.SetTransferSyntax(syntax.bo, syntax.implicit) + return &p, nil + } + } + // TODO(https://github.com/suyashkumar/dicom/issues/329): consider trying + // deflated parsing as a fallback as well. + return &p, errors.New("dicom missing transfer syntax uid in metadata, and it was not possible to successfully infer it using the next 100 bytes of the dicom") +} + +func canReadElementFromBytes(buf []byte, optSet parseOptSet, bo binary.ByteOrder, implicit bool) bool { + next100Reader := bytes.NewReader(buf) + subR := &reader{ + rawReader: dicomio.NewReader(bufio.NewReader(next100Reader), bo, int64(len(buf))), + opts: optSet, + } + subR.rawReader.SetTransferSyntax(bo, implicit) + _, err := subR.readElement(nil, nil) + if err == nil { + return true + } + return false } // Next parses and returns the next top-level element from the DICOM this Parser points to. diff --git a/parse_internal_test.go b/parse_internal_test.go index b239855..18645af 100644 --- a/parse_internal_test.go +++ b/parse_internal_test.go @@ -1,11 +1,18 @@ package dicom import ( + "bytes" + "errors" "os" "strings" "testing" + + "github.com/suyashkumar/dicom/pkg/tag" ) +// parse_internal_test.go holds tests that must exist in the dicom package (as +// opposed to dicom_test), in order to access internal entities. + // TestParseUntilEOFConformsToParse runs both the dicom.ParseUntilEOF and the dicom.Parse APIs against each // testdata file and ensures the outputs are the same. // This test lives in parse_internal_test.go because this test cannot live in the dicom_test package, due @@ -48,6 +55,58 @@ func TestParseUntilEOFConformsToParse(t *testing.T) { } } +func TestParse_InfersMissingTransferSyntax(t *testing.T) { + dsWithMissingTS := Dataset{Elements: []*Element{ + mustNewElement(tag.MediaStorageSOPClassUID, []string{"1.2.840.10008.5.1.4.1.1.1.2"}), + mustNewElement(tag.MediaStorageSOPInstanceUID, []string{"1.2.3.4.5.6.7"}), + mustNewElement(tag.PatientName, []string{"Bob", "Jones"}), + mustNewElement(tag.Rows, []int{128}), + mustNewElement(tag.FloatingPointValue, []float64{128.10}), + mustNewElement(tag.DimensionIndexPointer, []int{32, 36950}), + mustNewElement(tag.RedPaletteColorLookupTableData, make([]byte, 200)), + }} + + cases := []struct { + name string + overrideTransferSyntax string + }{ + { + name: "Little Endian Implicit", + overrideTransferSyntax: "1.2.840.10008.1.2", + }, + { + name: "Little Endian Explicit", + overrideTransferSyntax: "1.2.840.10008.1.2.1", + }, + { + name: "Big Endian Explicit", + overrideTransferSyntax: "1.2.840.10008.1.2.2", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + // Write out Dataset with OverrideMissingTransferSyntax option _and_ + // the skipWritingTransferSyntaxForTests to ensure no Transfer Syntax + // element is written to the test dicom. The test later verifies + // that no Transfer Syntax element was written to the metadata. + writtenDICOM := &bytes.Buffer{} + if err := Write(writtenDICOM, dsWithMissingTS, OverrideMissingTransferSyntax(tc.overrideTransferSyntax), skipWritingTransferSyntaxForTests()); err != nil { + t.Errorf("Write(OverrideMissingTransferSyntax(%v)) returned unexpected error: %v", tc.overrideTransferSyntax, err) + } + + parsedDS, err := ParseUntilEOF(writtenDICOM, nil) + if err != nil { + t.Fatalf("ParseUntilEOF returned unexpected error when reading written dataset back in: %v", err) + } + _, err = parsedDS.FindElementByTag(tag.TransferSyntaxUID) + if !errors.Is(err, ErrorElementNotFound) { + t.Fatalf("expected test dicom dataset to be missing explicit TransferSyntaxUID tag, but found one. got: %v, want: ErrorElementNotFound", err) + } + }) + } +} + func readTestdataFile(t *testing.T, name string) *os.File { dcm, err := os.Open("./testdata/" + name) if err != nil { diff --git a/parse_test.go b/parse_test.go index 4a4ae1f..b61efa0 100644 --- a/parse_test.go +++ b/parse_test.go @@ -100,18 +100,21 @@ func TestParseFile_SkipPixelData(t *testing.T) { runForEveryTestFile(t, func(t *testing.T, filename string) { dataset, err := dicom.ParseFile(filename, nil, dicom.SkipPixelData()) if err != nil { - t.Errorf("Unexpected error parsing dataset: %v", dataset) + t.Errorf("Unexpected error parsing dataset: %v, dataset: %v", err, dataset) } + // If PixelData present in this DICOM, check if it's populated + // correctly. The current test assumption is that if PixelData is + // missing, it was not originally in the dicom (which we should + // consider revisiting). el, err := dataset.FindElementByTag(tag.PixelData) - if err != nil { - t.Errorf("Unexpected error when finding PixelData in Dataset: %v", err) - } - pixelData := dicom.MustGetPixelDataInfo(el.Value) - if !pixelData.IntentionallySkipped { - t.Errorf("Expected pixelData.IntentionallySkipped=true, got false") - } - if got := len(pixelData.Frames); got != 0 { - t.Errorf("unexpected frames length. got: %v, want: %v", got, 0) + if err == nil { + pixelData := dicom.MustGetPixelDataInfo(el.Value) + if !pixelData.IntentionallySkipped { + t.Errorf("Expected pixelData.IntentionallySkipped=true, got false") + } + if got := len(pixelData.Frames); got != 0 { + t.Errorf("unexpected frames length. got: %v, want: %v", got, 0) + } } }) }) @@ -119,18 +122,21 @@ func TestParseFile_SkipPixelData(t *testing.T) { runForEveryTestFile(t, func(t *testing.T, filename string) { dataset, err := dicom.ParseFile(filename, nil) if err != nil { - t.Errorf("Unexpected error parsing dataset: %v", dataset) + t.Errorf("Unexpected error parsing dataset: %v, dataset: %v", err, dataset) } + // If PixelData present in this DICOM, check if it's populated + // correctly. The current test assumption is that if PixelData is + // missing, it was not originally in the dicom (which we should + // consider revisiting). el, err := dataset.FindElementByTag(tag.PixelData) - if err != nil { - t.Errorf("Unexpected error when finding PixelData in Dataset: %v", err) - } - pixelData := dicom.MustGetPixelDataInfo(el.Value) - if pixelData.IntentionallySkipped { - t.Errorf("Expected pixelData.IntentionallySkipped=false when SkipPixelData option not present, got true") - } - if len(pixelData.Frames) == 0 { - t.Errorf("unexpected frames length when SkipPixelData=false. got: %v, want: >0", len(pixelData.Frames)) + if err == nil { + pixelData := dicom.MustGetPixelDataInfo(el.Value) + if pixelData.IntentionallySkipped { + t.Errorf("Expected pixelData.IntentionallySkipped=false when SkipPixelData option not present, got true") + } + if len(pixelData.Frames) == 0 { + t.Errorf("unexpected frames length when SkipPixelData=false. got: %v, want: >0", len(pixelData.Frames)) + } } }) }) diff --git a/write.go b/write.go index 3b9be99..4175b18 100644 --- a/write.go +++ b/write.go @@ -156,12 +156,24 @@ func OverrideMissingTransferSyntax(transferSyntaxUID string) WriteOption { } } +// skipWritingTransferSyntaxForTests is a test WriteOption that cause Write to skip +// writing the transfer syntax uid element in the DICOM metadata. When used in +// combination with OverrideMissingTransferSyntax, this can be used to set the +// TransferSyntax for the written dataset without writing the actual transfer +// syntax element to the metadata. +func skipWritingTransferSyntaxForTests() WriteOption { + return func(set *writeOptSet) { + set.skipWritingTransferSyntaxForTests = true + } +} + // writeOptSet represents the flattened option set after all WriteOptions have been applied. type writeOptSet struct { - skipVRVerification bool - skipValueTypeVerification bool - defaultMissingTransferSyntax bool - overrideMissingTransferSyntaxUID string + skipVRVerification bool + skipValueTypeVerification bool + defaultMissingTransferSyntax bool + overrideMissingTransferSyntaxUID string + skipWritingTransferSyntaxForTests bool } func (w *writeOptSet) validate() error { @@ -203,21 +215,23 @@ func writeFileHeader(w *dicomio.Writer, ds *Dataset, metaElems []*Element, opts return err } - err = writeMetaElem(subWriter, tag.TransferSyntaxUID, ds, &tagsUsed, opts) + if !opts.skipWritingTransferSyntaxForTests { + err = writeMetaElem(subWriter, tag.TransferSyntaxUID, ds, &tagsUsed, opts) - if errors.Is(err, ErrorElementNotFound) && opts.defaultMissingTransferSyntax { - // Write the default transfer syntax - if err = writeElement(subWriter, mustNewElement(tag.TransferSyntaxUID, []string{uid.ImplicitVRLittleEndian}), opts); err != nil { - return err - } - } else if errors.Is(err, ErrorElementNotFound) && opts.overrideMissingTransferSyntaxUID != "" { - // Write the override transfer syntax - if err = writeElement(subWriter, mustNewElement(tag.TransferSyntaxUID, []string{opts.overrideMissingTransferSyntaxUID}), opts); err != nil { + if errors.Is(err, ErrorElementNotFound) && opts.defaultMissingTransferSyntax { + // Write the default transfer syntax + if err = writeElement(subWriter, mustNewElement(tag.TransferSyntaxUID, []string{uid.ImplicitVRLittleEndian}), opts); err != nil { + return err + } + } else if errors.Is(err, ErrorElementNotFound) && opts.overrideMissingTransferSyntaxUID != "" { + // Write the override transfer syntax + if err = writeElement(subWriter, mustNewElement(tag.TransferSyntaxUID, []string{opts.overrideMissingTransferSyntaxUID}), opts); err != nil { + return err + } + } else if err != nil { + // Return the error if none of the above conditions/overrides apply. return err } - } else if err != nil { - // Return the error if none of the above conditions/overrides apply. - return err } for _, elem := range metaElems {