Skip to content

Commit dbbb6e8

Browse files
Merge pull request wtsi-npg#867 from mgcam/lane_level_review
Simpifies autoqc function code and allows for lane-level review autoqc
2 parents 7f27799 + ea3a4d0 commit dbbb6e8

File tree

6 files changed

+163
-151
lines changed

6 files changed

+163
-151
lines changed

Changes

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
LIST OF CHANGES
22
---------------
33

4+
- npg_pipeline::function::autoqc
5+
- Simplified the flow of the code.
6+
- Made clearer logic for choosing QC checks to run, provided comments.
7+
- Allowed for running the review QC check on lanes.
8+
49
release 68.6.0 (2024-10-24)
510
- The runfolder_path attribute is passed to the constructor of the review
611
autoqc check object when deciding whether this check should run.

MANIFEST

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,6 +1042,7 @@ t/data/release/config/notify_off/product_release.yml
10421042
t/data/release/config/notify_on/product_release.yml
10431043
t/data/release/config/qc_review/product_release.yml
10441044
t/data/release/config/qc_review_default/product_release.yml
1045+
t/data/release/config/qc_review_default_with_lane/product_release.yml
10451046
t/data/release/config/pp_archival/product_release.yml
10461047
t/data/release/config/pp_archival/product_release_no_array.yml
10471048
t/data/release/config/pp_archival/product_release_no_filters.yml

lib/npg_pipeline/function/autoqc.pm

Lines changed: 77 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,23 @@ with q{npg_pipeline::function::util};
1919

2020
our $VERSION = '0';
2121

22-
Readonly::Scalar my $QC_SCRIPT_NAME => q{qc};
22+
Readonly::Scalar my $QC_SCRIPT_NAME => q{qc};
23+
2324
Readonly::Array my @CHECKS_NEED_PAIREDNESS_INFO => qw/
2425
insert_size
2526
gc_fraction
2627
qX_yield
2728
/;
2829

30+
Readonly::Scalar my $TARGET_FILE_CHECK_RE => qr{ \A
31+
adapter |
32+
bcfstats |
33+
verify_bam_id |
34+
genotype |
35+
pulldown_metrics |
36+
haplotag_metrics
37+
\Z }xms;
38+
2939
has q{qc_to_run} => (isa => q{Str},
3040
is => q{ro},
3141
required => 1,);
@@ -51,47 +61,6 @@ sub _build__check_uses_refrepos {
5161
->find_attribute_by_name('repository') ? 1 : 0;
5262
}
5363

54-
has q{_is_lane_level_check} => (
55-
isa => q{Bool},
56-
is => q{ro},
57-
required => 0,
58-
init_arg => undef,
59-
lazy_build => 1,);
60-
sub _build__is_lane_level_check {
61-
my $self = shift;
62-
return $self->qc_to_run() =~ /^ spatial_filter $/smx;
63-
}
64-
65-
has q{_is_lane_level_check4indexed_lane} => (
66-
isa => q{Bool},
67-
is => q{ro},
68-
required => 0,
69-
init_arg => undef,
70-
lazy_build => 1,);
71-
sub _build__is_lane_level_check4indexed_lane {
72-
my $self = shift;
73-
return $self->qc_to_run() =~ /^ tag_metrics $/smx;
74-
}
75-
76-
has q{_is_check4target_file} => (
77-
isa => q{Bool},
78-
is => q{ro},
79-
required => 0,
80-
init_arg => undef,
81-
lazy_build => 1,);
82-
sub _build__is_check4target_file {
83-
my $self = shift;
84-
##no critic (RegularExpressions::RequireBracesForMultiline)
85-
##no critic (RegularExpressions::ProhibitComplexRegexes)
86-
return $self->qc_to_run() =~ /^ adapter |
87-
bcfstats |
88-
verify_bam_id |
89-
genotype |
90-
pulldown_metrics |
91-
haplotag_metrics |
92-
review $/smx;
93-
}
94-
9564
sub BUILD {
9665
my $self = shift;
9766
load_class($self->_qc_module_name);
@@ -111,15 +80,19 @@ sub create {
11180
push @definitions, $self->_create_definition4interop();
11281
} else {
11382

83+
my $is_lane = 1;
11484
for my $lp (@{$self->products->{lanes}}) {
11585

11686
$self->debug(sprintf ' autoqc check %s for lane, rpt_list: %s, is_pool: %s',
11787
$self->qc_to_run(), $lp->rpt_list, ($lp->lims->is_pool? q[True]: q[False]));
11888

11989
$done_as_lane{$lp->rpt_list} = 1;
120-
push @definitions, $self->_create_definition($lp, 0); # is_plex is always 0 here
90+
if ($self->_should_run($lp, $is_lane)) {
91+
push @definitions, $self->_create_definition($lp);
92+
}
12193
}
12294

95+
$is_lane = 0;
12396
for my $dp (@{$self->products->{data_products}}) {
12497
# skip data_products that have already been processed as lanes
12598
# (i.e. libraries or single-sample pools)
@@ -133,7 +106,9 @@ sub create {
133106
$self->qc_to_run(), $dp->{rpt_list}, ($is_plex? q[True]: q[False]),
134107
($dp->lims->is_pool? q[True]: q[False]), ($is_plex? $tag_index: q[NONE]));
135108

136-
push @definitions, $self->_create_definition($dp, $is_plex);
109+
if ($self->_should_run($dp, $is_lane, $is_plex)) {
110+
push @definitions, $self->_create_definition($dp);
111+
}
137112
}
138113
}
139114

@@ -145,14 +120,17 @@ sub create {
145120
}
146121

147122
sub _create_definition {
148-
my ($self, $product, $is_plex) = @_;
149-
150-
if ($self->_should_run($is_plex, $product)) {
151-
my $command = $self->_generate_command($product);
152-
return $self->_create_definition_object($product, $command);
123+
my ($self, $product) = @_;
124+
125+
my $ref = {
126+
'job_name' => $self->_job_name(),
127+
'composition' => $product->composition,
128+
'command' => $self->_generate_command($product)
129+
};
130+
if ( ($self->qc_to_run eq 'adapter') || $self->_check_uses_refrepos() ) {
131+
$ref->{'command_preexec'} = $self->repos_pre_exec_string();
153132
}
154-
155-
return;
133+
return $self->create_definition($ref);
156134
}
157135

158136
sub _job_name {
@@ -190,23 +168,6 @@ sub _create_definition4interop {
190168
return $self->create_definition($ref);
191169
}
192170

193-
sub _create_definition_object {
194-
my ($self, $product, $command) = @_;
195-
196-
my $ref = {};
197-
my $qc_to_run = $self->qc_to_run;
198-
199-
$ref->{'job_name'} = $self->_job_name();
200-
$ref->{'composition'} = $product->composition;
201-
$ref->{'command'} = $command;
202-
203-
if ( ($qc_to_run eq 'adapter') || $self->_check_uses_refrepos() ) {
204-
$ref->{'command_preexec'} = $self->repos_pre_exec_string();
205-
}
206-
207-
return $self->create_definition($ref);
208-
}
209-
210171
sub _generate_command {
211172
my ($self, $dp) = @_;
212173

@@ -282,64 +243,64 @@ sub _generate_command {
282243
}
283244

284245
sub _should_run {
285-
my ($self, $is_plex, $product) = @_;
286-
287-
my $can_run = 1;
246+
my ($self, $product, $is_lane, $is_plex) = @_;
288247

289-
my $is_lane = !$is_plex; # if it's not a plex, it's a lane
290-
my $rpt_list = $product->rpt_list;
291-
my $is_pool = $product->lims->is_pool;
292-
my $is_tag_zero = $product->is_tag_zero_product;
248+
my $is_pool = $product->lims->is_pool; # Note - true for merged data.
293249

294-
if($self->qc_to_run() eq 'spatial_filter' and ($self->platform_NovaSeq or $self->platform_NovaSeqX)) {
295-
return 0;
250+
if ($self->qc_to_run() eq 'spatial_filter') {
251+
# Run on individual lanes only.
252+
return $is_lane && !($self->platform_NovaSeq || $self->platform_NovaSeqX) ? 1 : 0;
296253
}
297-
298-
if ($self->_is_lane_level_check()) {
299-
return !$is_plex;
254+
if ($self->qc_to_run() eq 'tag_metrics') {
255+
# For indexed runs, run on individual lanes if they are pools of samples.
256+
return $self->is_indexed() && $is_lane && $is_pool ? 1 : 0;
300257
}
301-
302-
if ($self->_is_lane_level_check4indexed_lane()) {
303-
return $is_lane && $is_pool;
258+
if ($self->qc_to_run() eq 'review') {
259+
# This check should never run on tag zero, other entities are fair game.
260+
if ($is_plex && $product->is_tag_zero_product) {
261+
return 0;
262+
}
304263
}
305-
306-
if ($self->_is_check4target_file()) {
307-
$can_run = (($is_lane && !$is_pool) ||
308-
($is_plex && !$is_tag_zero));
264+
if ($self->qc_to_run() =~ $TARGET_FILE_CHECK_RE) {
265+
# Do not run on tag zero files, unmerged or merged.
266+
# Do not run on lane-level data if the lane is a pool.
267+
# Run on lane-level data for a single library.
268+
# Run on merged plexes or lanes.
269+
if (($is_plex && $product->is_tag_zero_product) || ($is_lane && $is_pool)) {
270+
return 0;
271+
}
309272
}
310273

311-
if ($can_run) {
312-
my %init_hash = ( rpt_list => $rpt_list, lims => $product->lims );
313-
314-
if ($self->has_repository && $self->_check_uses_refrepos()) {
315-
$init_hash{'repository'} = $self->repository;
316-
}
317-
if ($self->qc_to_run() eq 'insert_size') {
318-
$init_hash{'is_paired_read'} = $self->is_paired_read() ? 1 : 0;
319-
} elsif ($self->qc_to_run() eq 'review') {
320-
$init_hash{'product_conf_file_path'} = $self->product_conf_file_path;
321-
$init_hash{'runfolder_path'} = $self->runfolder_path;
322-
} elsif ($self->qc_to_run() eq 'genotype') {
323-
my $ref_fasta = npg_pipeline::cache::reference->instance()
324-
->get_path($product, q(fasta), $self->repository());
325-
if ($ref_fasta) {
326-
$init_hash{'reference_fasta'} = $ref_fasta;
327-
}
274+
# The rest will abide by the return value of the can_run method of the
275+
# autoqc check. We have to provide enough argument to the constructor for
276+
# the object to be created and can_run to execute.
277+
my %init_hash = ( rpt_list => $product->rpt_list, lims => $product->lims );
278+
if ($self->has_repository && $self->_check_uses_refrepos()) {
279+
$init_hash{'repository'} = $self->repository;
280+
}
281+
if ($self->qc_to_run() eq 'insert_size') {
282+
$init_hash{'is_paired_read'} = $self->is_paired_read() ? 1 : 0;
283+
} elsif ($self->qc_to_run() eq 'review') {
284+
$init_hash{'product_conf_file_path'} = $self->product_conf_file_path;
285+
$init_hash{'runfolder_path'} = $self->runfolder_path;
286+
} elsif ($self->qc_to_run() eq 'genotype') {
287+
my $ref_fasta = npg_pipeline::cache::reference->instance()
288+
->get_path($product, q(fasta), $self->repository());
289+
if ($ref_fasta) {
290+
$init_hash{'reference_fasta'} = $ref_fasta;
328291
}
329-
330-
my $class = $self->_qc_module_name();
331-
my $check_obj;
332-
try {
333-
$check_obj = $class->new(\%init_hash);
334-
} catch {
335-
delete $init_hash{'lims'};
336-
$check_obj = $class->new(\%init_hash);
337-
};
338-
339-
$can_run = $check_obj->can_run();
340292
}
341293

342-
return $can_run;
294+
my $class = $self->_qc_module_name();
295+
my $check_obj;
296+
try {
297+
$check_obj = $class->new(\%init_hash);
298+
} catch {
299+
delete $init_hash{'lims'};
300+
$check_obj = $class->new(\%init_hash); # Allow to fail at this stage.
301+
};
302+
303+
return $check_obj->can_run();
343304
}
344305

345306
__PACKAGE__->meta->make_immutable;

0 commit comments

Comments
 (0)