Skip to content

Commit be3d2e5

Browse files
pichuancopybara-github
authored andcommitted
Add BUILD rules for GBZ reader. Add to make_examples.
PiperOrigin-RevId: 679000601
1 parent c5c3ad7 commit be3d2e5

File tree

6 files changed

+67
-12
lines changed

6 files changed

+67
-12
lines changed

BUILD

+5
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,13 @@ filegroup(
3939
"//third_party:boost.LICENSE",
4040
"@com_google_protobuf//:LICENSE",
4141
"@com_googlesource_code_re2//:LICENSE",
42+
"@gbwt//:LICENSE",
43+
"@gbwtgraph//:LICENSE",
4244
"@htslib//:LICENSE",
45+
"@libdivsufsort//:LICENSE",
4346
"@libssw//:README.md", # SSW license embedded in the README.
4447
"@org_tensorflow//:LICENSE",
48+
"@sdsl_lite//:COPYING",
4549
],
4650
)
4751

@@ -73,6 +77,7 @@ cc_library(
7377
"//third_party/nucleus/io/python:bedgraph_writer_cclib",
7478
"//third_party/nucleus/io/python:fastq_reader_cclib",
7579
"//third_party/nucleus/io/python:fastq_writer_cclib",
80+
"//third_party/nucleus/io/python:gbz_reader_cclib",
7681
"//third_party/nucleus/io/python:gff_reader_cclib",
7782
"//third_party/nucleus/io/python:gff_writer_cclib",
7883
"//third_party/nucleus/io/python:gfile_cclib",

third_party/nucleus/io/BUILD

+24
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ py_library(
120120
":genomics_reader",
121121
":genomics_writer",
122122
"//third_party/nucleus/io:clif_postproc",
123+
"//third_party/nucleus/io/python:gbz_reader",
123124
"//third_party/nucleus/io/python:sam_reader",
124125
"//third_party/nucleus/io/python:sam_writer",
125126
"//third_party/nucleus/protos:reads_py_pb2",
@@ -656,6 +657,29 @@ cc_library(
656657
],
657658
)
658659

660+
cc_library(
661+
name = "gbz_reader",
662+
srcs = ["gbz_reader.cc"],
663+
hdrs = ["gbz_reader.h"],
664+
deps = [
665+
":reader_base",
666+
"//third_party/nucleus/core:status",
667+
"//third_party/nucleus/core:statusor",
668+
"//third_party/nucleus/platform:types",
669+
"//third_party/nucleus/protos:cigar_cc_pb2",
670+
"//third_party/nucleus/protos:position_cc_pb2",
671+
"//third_party/nucleus/protos:range_cc_pb2",
672+
"//third_party/nucleus/protos:reads_cc_pb2",
673+
"//third_party/nucleus/util:cpp_utils",
674+
"@com_google_absl//absl/log",
675+
"@com_google_absl//absl/log:check",
676+
"@com_google_absl//absl/strings",
677+
"@gbwt",
678+
"@gbwtgraph",
679+
"@libhandlegraph",
680+
],
681+
)
682+
659683
cc_test(
660684
name = "sam_reader_test",
661685
size = "small",

third_party/nucleus/io/gbz_reader.cc

+9-10
Original file line numberDiff line numberDiff line change
@@ -20,23 +20,21 @@
2020

2121
#include <algorithm>
2222
#include <cstddef>
23+
#include <fstream> // IWYU pragma: keep
2324
#include <iostream>
2425
#include <memory>
2526
#include <regex>
2627
#include <string>
2728
#include <vector>
2829

29-
#include "file/base/file.h"
30-
#include "file/base/options.h"
31-
#include "file/iostream/file_iostream.h"
3230
#include "absl/log/check.h"
3331
#include "absl/log/log.h"
3432
#include "absl/strings/str_cat.h"
35-
#include "third_party/gbwt/include/gbwt/metadata.h"
36-
#include "third_party/gbwt/include/gbwt/support.h"
37-
#include "third_party/gbwt/include/gbwt/utils.h"
38-
#include "third_party/gbwtgraph/include/gbwtgraph/subgraph.h"
39-
#include "third_party/libhandlegraph/src/include/handlegraph/types.hpp"
33+
#include "include/gbwt/metadata.h"
34+
#include "include/gbwt/support.h"
35+
#include "include/gbwt/utils.h"
36+
#include "include/gbwtgraph/subgraph.h"
37+
#include "src/include/handlegraph/types.hpp"
4038
#include "third_party/nucleus/core/status.h"
4139
#include "third_party/nucleus/core/statusor.h"
4240
#include "third_party/nucleus/platform/types.h"
@@ -57,7 +55,8 @@ GbzReader::GbzReader(const std::string& gbz_path,
5755
double start = gbwt::readTimer();
5856

5957
// Open GBZ file in read mode.
60-
file::FileInStream in(file::OpenOrDie(gbz_path, "r", file::Defaults()));
58+
std::ifstream in(gbz_path);
59+
6160
// Create an empty GBZ object.
6261
this->gbz_ = gbwtgraph::GBZ();
6362
// Load the GBZ file into the GBZ object.
@@ -114,7 +113,7 @@ nucleus::StatusOr<std::vector<nucleus::genomics::v1::Read>> GbzReader::Query(
114113

115114
updateCache(reads);
116115

117-
return StatusOr<std::vector<nucleus::genomics::v1::Read>>(reads);
116+
return nucleus::StatusOr<std::vector<nucleus::genomics::v1::Read>>(reads);
118117
}
119118

120119
nucleus::StatusOr<std::shared_ptr<SamIterable>> GbzReader::Iterate() const {

third_party/nucleus/io/gbz_reader.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@
3636
#include <vector>
3737
#include <string>
3838

39-
#include "third_party/gbwtgraph/include/gbwtgraph/gbz.h"
40-
#include "third_party/gbwtgraph/include/gbwtgraph/subgraph.h"
39+
#include "include/gbwtgraph/gbz.h"
40+
#include "include/gbwtgraph/subgraph.h"
4141
#include "third_party/nucleus/core/statusor.h"
4242
#include "third_party/nucleus/io/reader_base.h"
4343
#include "third_party/nucleus/protos/cigar.pb.h"

third_party/nucleus/io/python/BUILD

+24
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,30 @@ py_test(
126126
],
127127
)
128128

129+
pybind_extension(
130+
name = "gbz_reader",
131+
srcs = ["gbz_reader_pybind.cc"],
132+
deps = [
133+
"//third_party/nucleus/core/python:type_caster_nucleus_status",
134+
"//third_party/nucleus/core/python:type_caster_nucleus_statusor",
135+
"//third_party/nucleus/io:gbz_reader",
136+
"//third_party/nucleus/util/python:type_caster_nucleus_proto_ptr",
137+
"@pybind11_protobuf//pybind11_protobuf:native_proto_caster",
138+
],
139+
)
140+
141+
pybind_library(
142+
name = "gbz_reader_cclib",
143+
srcs = ["gbz_reader_pybind.cc"],
144+
deps = [
145+
"//third_party/nucleus/core/python:type_caster_nucleus_status",
146+
"//third_party/nucleus/core/python:type_caster_nucleus_statusor",
147+
"//third_party/nucleus/io:gbz_reader",
148+
"//third_party/nucleus/util/python:type_caster_nucleus_proto_ptr",
149+
"@pybind11_protobuf//pybind11_protobuf:native_proto_caster",
150+
],
151+
)
152+
129153
pybind_extension(
130154
name = "sam_reader",
131155
srcs = ["sam_reader_pybind.cc"],

third_party/nucleus/io/sam.py

+3
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@
130130

131131
from third_party.nucleus.io import genomics_reader
132132
from third_party.nucleus.io import genomics_writer
133+
from third_party.nucleus.io.python import gbz_reader
133134
from third_party.nucleus.io.python import sam_reader
134135
from third_party.nucleus.io.python import sam_writer
135136
from third_party.nucleus.protos import reads_pb2
@@ -257,6 +258,8 @@ class SamReader(genomics_reader.DispatchingGenomicsReader):
257258
"""Class for reading Read protos from SAM/BAM/CRAM or TFRecord files."""
258259

259260
def _native_reader(self, input_path, ref_name='', **kwargs):
261+
if input_path.endswith('.gbz'):
262+
return gbz_reader.GbzReader(input_path, ref_name)
260263
return NativeSamReader(input_path, **kwargs)
261264

262265
def _record_proto(self):

0 commit comments

Comments
 (0)