-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnextflow_schema.json
More file actions
736 lines (736 loc) · 46.7 KB
/
Copy pathnextflow_schema.json
File metadata and controls
736 lines (736 loc) · 46.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/sfglab/dchichip/master/nextflow_schema.json",
"title": "sfglab/dchichip pipeline parameters",
"description": "Workflow description - to be filled",
"type": "object",
"$defs": {
"input_output_options": {
"title": "Input/output options",
"type": "object",
"fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.",
"required": ["input", "outdir"],
"properties": {
"input": {
"type": "string",
"format": "file-path",
"mimetype": "text/csv",
"pattern": "^\\S+\\.csv$",
"schema": "assets/schema_input.json",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.",
"fa_icon": "fas fa-file-csv"
},
"outdir": {
"type": "string",
"format": "directory-path",
"description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.",
"fa_icon": "fas fa-folder-open"
},
"email": {
"type": "string",
"description": "Email address for completion summary.",
"fa_icon": "fas fa-envelope",
"help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.",
"pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
},
"multiqc_title": {
"type": "string",
"description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.",
"fa_icon": "fas fa-file-signature"
}
}
},
"reference_annotation_options": {
"title": "Genome reference and annotation resources options",
"type": "object",
"fa_icon": "fas fa-dna",
"description": "Configuration for reference genome files and annotation resources used throughout the pipeline. These parameters ensure consistent genome build usage across mapping, feature annotation, and downstream analyses.",
"help_text": "Provide correct and consistent genome resources (FASTA index, GTF, chromosome sizes, and blacklist) matching your chosen reference build (e.g., hg38, mm10). Mismatched files can lead to coordinate errors or missing annotations.",
"properties": {
"fasta": {
"type": "string",
"format": "file-path",
"mimetype": "text/plain",
"pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
"description": "Path to FASTA genome file, fai and bwa indexes.",
"help_text": "This parameter is *mandatory*.",
"fa_icon": "far fa-file-code"
},
"ref_short": {
"type": "string",
"title": "Reference Genome Short Name",
"description": "A short identifier for the reference genome build used in the workflow. This is mainly used for labeling outputs and maintaining consistency across pipeline steps. Typical values are `hg38` (human) or `mm10` (mouse).",
"help_text": "Choose a short, standard genome code matching your input reference (e.g., `hg38`, `mm10`). It should correspond to the genome files (FASTA, GTF, chrom sizes) you provide.",
"fa_icon": "far fa-file-code",
"default": "hg38"
},
"jaspar_motif": {
"type": "string",
"title": "JASPAR Motif File (TSV)",
"description": "Specifies the URL or local path to the JASPAR motif file (TSV format) used for motif scanning and peak annotation — typically representing transcription factor binding motifs such as CTCF.",
"help_text": "Provide the path or direct download link to a JASPAR motif file compatible with your reference genome. The default points to the CTCF motif (MA0139.1) for hg38. Example: `--jaspar_motif https://jaspar.elixir.no/downloads/MA0139.1.tsv.gz`.",
"fa_icon": "fas fa-fingerprint",
"default": "http://expdata.cmmt.ubc.ca/JASPAR/downloads/UCSC_tracks/2022/hg38/MA0139.1.tsv.gz"
},
"blacklist": {
"type": "string",
"title": "ENCODE Blacklist Regions (BED)",
"description": "Path or URL to a BED file containing ENCODE blacklist regions that should be excluded from peak calling, loop detection, and coverage calculations. These regions are known to produce artificially high signal or mapping artifacts.",
"help_text": "Use the appropriate blacklist file for your genome build (e.g., hg19, hg38, mm10). The default points to the ENCODE hg38 blacklist (`ENCFF356LFX`). Example: `--blacklist /refs/hg38-blacklist.bed.gz`.",
"fa_icon": "fas fa-ban",
"default": "https://raw.githubusercontent.com/SFGLab/dcHiChIP/refs/heads/main/assets/ENCFF356LFX.bed.gz"
},
"gtf": {
"type": "string",
"title": "Gene Annotation File (GTF)",
"description": "Specifies the path or URL to the GTF file containing gene annotations for the reference genome. This file is used to assign peaks, loops, and other genomic features to known genes and transcripts.",
"help_text": "Provide a GTF file compatible with your chosen reference genome (e.g., Ensembl or GENCODE format). The default points to the GRCh38 annotation from the Illumina iGenomes collection. Example: `--gtf /data/genomes/GRCh38/genes.gtf`.",
"fa_icon": "fas fa-scroll",
"default": "s3://ngi-igenomes/igenomes/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf"
},
"chrom_size": {
"type": "string",
"title": "Chromosome Sizes File",
"description": "Path to a two-column file listing chromosome names and their corresponding lengths, used to define genomic bounds during binning and matrix construction.",
"help_text": "This file ensures that cooler and related modules apply consistent chromosome boundaries. It must match the same reference genome build as the input FASTA and GTF files. Example: `--chrom_size /refs/hg38.chrom.sizes`.",
"fa_icon": "fas fa-ruler-horizontal",
"default": "https://raw.githubusercontent.com/SFGLab/dcHiChIP/refs/heads/main/assets/hg38.chrom.sizes"
},
"genomics_features": {
"type": "string",
"title": "Genomic Features File (MAPS)",
"description": "Specifies the path or URL to the genomic features file required by MAPS for loop calling. This file contains mappability, GC content, and restriction enzyme fragment information used for bias correction.",
"help_text": "Use the appropriate MAPS genomic features file matching your genome build and restriction enzyme (e.g., MboI or DpnII). The default points to the hg38 MboI 10 kb resolution file. Example: `--genomics_features /refs/MAPS_features_hg38_MboI_10kb.txt`.",
"fa_icon": "fas fa-dna",
"default": "https://raw.githubusercontent.com/HuMingLab/MAPS/refs/heads/master/MAPS_data_files/hg38/genomic_features/F_GC_M_MboI_10Kb_el.GRCh38.txt"
}
}
},
"mapping_filtering_options": {
"title": "Alignment & Filtering",
"type": "object",
"fa_icon": "fas fa-dna",
"description": "Configuration options for read alignment, mapping quality filtering, duplicate removal, and sorting. These parameters control how raw HiChIP reads are aligned to the reference genome and preprocessed before downstream analyses.",
"help_text": "Adjust these settings to balance mapping stringency, computational cost, and data quality. For example, lowering the mapping quality (`mapq`) may include more reads but increase background noise, while increasing it improves precision.",
"properties": {
"mapq": {
"type": "integer",
"title": "Minimum Mapping Quality (MAPQ)",
"description": "Sets the minimum MAPQ threshold for retaining aligned reads. Reads with mapping quality below this value are filtered out before downstream analysis.",
"help_text": "Use higher MAPQ values (e.g., 30) to include only confidently aligned reads and reduce background noise. Lower values may retain more reads but can increase false positives. Typical range: 10–60.",
"fa_icon": "fas fa-filter",
"default": 30
},
"se_samtools_args": {
"type": "string",
"title": "SAMtools Arguments (Single-End)",
"description": "Custom command-line flags passed to SAMtools when processing single-end read alignments.",
"help_text": "Use this to fine-tune how SAMtools handles single-end BAM/SAM files — for example, adjusting compression, threading, or output format. Example: `--se_samtools_args \"-@ 8 -bh\"`. Leave empty (`null`) to use default SAMtools behavior.",
"fa_icon": "fas fa-terminal",
"default": null
},
"se_bwa_mem_args": {
"type": "string",
"title": "BWA-MEM Arguments (Single-End)",
"description": "Additional command-line parameters for BWA-MEM when aligning single-end reads.",
"help_text": "Use this to customize BWA-MEM behavior for single-end reads — for example, seed length (`-k`), mismatch penalties, or output verbosity. Example: `--se_bwa_mem_args \"-k 19 -B 4 -O 6,6 -E 1,1\"`. Leave empty (`null`) to use default alignment settings.",
"fa_icon": "fas fa-dna",
"default": null
},
"bwa_mem_args": {
"type": "string",
"title": "BWA-MEM Arguments (Paired-End)",
"description": "Custom command-line options for the BWA-MEM aligner used for paired-end read mapping.",
"help_text": "Use this to modify BWA-MEM parameters such as alignment scoring, read group tagging, or reporting options. The default `-M -v 0` marks shorter split hits as secondary and suppresses verbose output. Example: `--bwa_mem_args \"-M -K 100000000 -Y -R '@RG\\tID:sample\\tSM:sample'\"`.",
"fa_icon": "fas fa-dna",
"default": "-M -v 0"
},
"samtools_fixmate_args": {
"type": "string",
"title": "SAMtools Fixmate Arguments",
"description": "Specifies additional command-line options for the `samtools fixmate` command, which ensures that read-pair information is consistent in the alignment file.",
"help_text": "The default `-m` option marks missing mate reads and adds mate coordinate information to each read pair. Adjust this if you need to control how mate tags or secondary alignments are handled. Example: `--samtools_fixmate_args \"-m -O bam\"`.",
"fa_icon": "fas fa-tools",
"default": "-m"
},
"optical_duplicate_distance": {
"type": "integer",
"title": "Optical Duplicate Distance",
"description": "Defines the maximum pixel distance between clusters on the flowcell that are considered optical duplicates during duplicate marking.",
"help_text": "Set this value to detect and optionally remove optical duplicates generated by sequencing instruments. A value of `0` disables optical duplicate detection. Typical values range between `100` and `2500` depending on the sequencing platform. Example: `--optical_duplicate_distance 2500`.",
"fa_icon": "fas fa-ruler-combined",
"default": 0
},
"remove_duplicates_args": {
"type": "string",
"title": "Duplicate Removal Arguments",
"description": "Specifies custom command-line flags for the duplicate removal step, controlling how PCR or optical duplicates are identified and filtered.",
"help_text": "The default `-n` flag skips duplicate removal but still counts duplicates for statistics. Modify this if you want to fully remove duplicates or change behavior depending on your data. Example: `--remove_duplicates_args \"-n --stats dupstats.txt\"`.",
"fa_icon": "fas fa-clone",
"default": "-n"
},
"filter_quality_args": {
"type": "string",
"title": "Read Quality Filtering Arguments",
"description": "Additional command-line options for filtering reads based on mapping or sequence quality before downstream analysis.",
"help_text": "Use this to apply extra filtering thresholds beyond MAPQ, such as minimum alignment length or mismatch rate. Leave empty (`null`) to use default filtering behavior. Example: `--filter_quality_args \"--min-MAPQ 10 --min-len 30\"`.",
"fa_icon": "fas fa-filter",
"default": null
},
"filter_paires_args": {
"type": "string",
"title": "Pair Filtering Arguments",
"description": "Custom options for filtering valid read pairs based on distance, orientation, or pairing criteria during HiChIP read preprocessing.",
"help_text": "Use this to refine which read pairs are retained for contact map generation. For example, you can exclude pairs beyond a distance threshold or with invalid orientations. Example: `--filter_paires_args \"--max-dist 2000 --min-dist 100\"`. Leave empty (`null`) to keep default pair filtering behavior.",
"fa_icon": "fas fa-exchange-alt",
"default": null
},
"samtools_markdup_args": {
"type": "string",
"title": "SAMtools Markdup Arguments",
"description": "Specifies additional options for the `samtools markdup` command, which marks or removes duplicate reads in BAM files.",
"help_text": "Use this to control duplicate marking behavior, threading, or reporting options. For example, adding `-r` removes duplicates instead of marking them, and `-@ 8` sets the number of threads. Example: `--samtools_markdup_args \"-r -@ 8\"`. Leave empty (`null`) to use SAMtools default settings.",
"fa_icon": "fas fa-tags",
"default": null
},
"samtools_sort_2_args": {
"type": "string",
"title": "SAMtools Sort (Second Pass) Arguments",
"description": "Specifies additional parameters for the second sorting step performed by `samtools sort`, typically used for name-sorting or coordinate-sorting read pairs.",
"help_text": "The default `-n` flag sorts alignments by read name, which is often required for pairwise operations. You can modify this to coordinate-sort (`-o`) or add threading options. Example: `--samtools_sort_2_args \"-@ 8 -T tmp -o sorted.bam\"`.",
"fa_icon": "fas fa-sort-amount-down",
"default": "-n"
},
"bwa_mem_samtools_args": {
"type": "string",
"title": "BWA-MEM + SAMtools Combined Arguments",
"description": "Specifies additional command-line options applied jointly to the BWA-MEM alignment output and subsequent SAMtools processing steps.",
"help_text": "Use this to adjust output conversion and compression parameters after BWA-MEM alignment. For example, `-bh` converts to BAM format with header included, or `-@ 8` enables multithreading. Leave `null` to use default behavior.\n\nExample: `--bwa_mem_samtools_args \"-bh -@ 8\"`.",
"fa_icon": "fas fa-code-branch",
"default": null
}
}
},
"peaks_loops_options": {
"title": "Peaks and Loops options",
"type": "object",
"fa_icon": "fas fa-project-diagram",
"description": "Configuration of parameters related to peak calling and chromatin loop detection. These settings control how enriched regions (peaks) and chromatin interactions (loops) are identified from HiChIP data.",
"help_text": "Adjust these options to fine-tune sensitivity and specificity in peak and loop detection. For example, you can modify the p-value threshold for MACS3 or enable MAPS loop calling for specific experimental setups.",
"properties": {
"peak_quality": {
"type": "number",
"title": "Peak Significance Threshold",
"description": "Sets the significance cutoff (p-value or q-value, depending on MACS3 configuration) for peak calling to identify enriched regions.",
"help_text": "Lower values (e.g., 0.01 or 1e-5) increase stringency, reducing false positives but possibly missing weaker peaks. The default of 0.05 is a balanced threshold. Example: `--peak_quality 1e-5`.",
"fa_icon": "fas fa-signal",
"default": 0.05
},
"genome_size": {
"type": "string",
"title": "Genome Size (MACS3)",
"description": "Specifies the genome size shortcut for MACS3 peak calling, corresponding to the total mappable genome length.",
"help_text": "Use MACS3-supported short codes like `hs` (human), `mm` (mouse), or provide an exact value (e.g., 2.7e9). Must match your reference genome build. Example: `--genome_size mm`.",
"fa_icon": "fas fa-globe",
"default": "hs"
},
"macs3_callpeak_args": {
"type": "string",
"title": "MACS3 Additional Arguments",
"description": "Optional custom arguments passed directly to the MACS3 `callpeak` command for advanced configuration.",
"help_text": "Use this to customize peak calling — for example, enabling model-free mode, adjusting shift/extension sizes, or specifying control input. Example: `--macs3_callpeak_args \"-q 0.01 --nomodel --shift -75 --extsize 150\"`.",
"fa_icon": "fas fa-chart-area",
"default": null
},
"maps_args": {
"type": "string",
"title": "MAPS Loop Calling Arguments",
"description": "Additional command-line flags for the MAPS (Model-based Analysis of PLAC-seq/HiChIP) tool, used for chromatin loop detection.",
"help_text": "Adjust MAPS behavior such as bin size, distance range, or resolution. Example: `--maps_args \"--bin-size 5000 --cis-only\"`. Leave empty (`null`) to use default MAPS settings.",
"fa_icon": "fas fa-project-diagram",
"default": null
},
"skip_maps": {
"type": "boolean",
"title": "Skip MAPS Module",
"description": "Determines whether to skip the MAPS loop calling module during the pipeline execution.",
"help_text": "Set this to `true` to disable loop calling (useful for QC or peak-only runs). Set to `false` to perform full MAPS-based loop analysis. Example: `--skip_maps false`.",
"fa_icon": "fas fa-toggle-off",
"default": true
}
}
},
"matrices_options": {
"title": "Contact Matrices & Binning",
"type": "object",
"fa_icon": "fas fa-th",
"description": "Settings controlling the generation, binning, and normalization of contact matrices from HiChIP data. These parameters determine how raw read pairs are converted into multi-resolution .cool files for downstream analysis.",
"help_text": "Adjust these options to tune the resolution and structure of the resulting chromatin contact maps. For example, smaller bin sizes (e.g., 1 kb) provide higher resolution but require more memory and processing time, while larger bins (e.g., 10 kb or 25 kb) yield smoother matrices for large-scale analyses.",
"properties": {
"cool_bin": {
"type": "integer",
"title": "Base Bin Size (bp)",
"description": "Defines the base bin size, in base pairs, used for generating contact matrices in the .cool format.",
"help_text": "Smaller bin sizes (e.g., 1000 bp) provide higher resolution but increase memory and runtime requirements. Larger bins (e.g., 5000–25000 bp) are recommended for lower-depth datasets. Example: `--cool_bin 5000`.",
"fa_icon": "fas fa-border-all",
"default": 1000
},
"cooler_cload_args": {
"type": "string",
"title": "Cooler Cload Arguments",
"description": "Specifies command-line options passed to the `cooler cload` command for loading read pairs into a .cool file.",
"help_text": "Use this to modify how columns from the pairs file are interpreted when generating contact matrices. The default is configured for standard pairtools output. Example: `--cooler_cload_args \"pairs --zero-based -c1 1 -p1 2 -c2 3 -p2 4\"`.",
"fa_icon": "fas fa-database",
"default": "pairs --zero-based -c1 2 -p1 3 -c2 4 -p2 5"
},
"cooler_zoomify_res": {
"type": "string",
"title": "Zoomify Resolution Preset",
"description": "Specifies the resolution preset key used for multi-resolution cooler files (`cooler zoomify`). Each key corresponds to a predefined set of bin sizes defined in `insulation_resultions`.",
"help_text": "Use this to select which resolution set to apply during matrix zoomification. For example, `1000N` corresponds to 1 kb–500 kb bins by default. Example: `--cooler_zoomify_res 5000N`.",
"fa_icon": "fas fa-layer-group",
"default": "1000N"
},
"cooler_zoomify_args": {
"type": "string",
"title": "Cooler Zoomify Additional Arguments",
"description": "Extra command-line options for the `cooler zoomify` command that generates multi-resolution cooler files.",
"help_text": "Use this to control balancing, overwrite behavior, or other zoomify parameters. Example: `--cooler_zoomify_args \"--balance --force\"`. Leave empty (`\"\"`) to use defaults.",
"fa_icon": "fas fa-sliders-h",
"default": null
},
"insulation_resultions": {
"type": "object",
"title": "Insulation Score Resolutions",
"description": "Specifies the resolution presets (in base pairs) used for insulation score and TAD boundary calculations. Each key defines a named preset containing multiple window sizes.",
"help_text": "Provide one or more resolution sets, where each key (e.g., `1000N`) maps to a list of window sizes. These values determine the granularity of TAD detection. Example:\n```\n--insulation_resultions '{\"5000N\": \"5000 10000 25000 50000 100000\"}'\n```",
"fa_icon": "fas fa-ruler",
"default": { "1000N": "1000 2000 5000 10000 20000 50000 100000 200000 500000" }
},
"cooltools_eigscis_args": {
"type": "string",
"title": "Cooltools Eigs-Cis Arguments",
"description": "Additional command-line flags for the `cooltools eigs-cis` command, which computes eigenvectors for A/B compartment analysis.",
"help_text": "Modify this to control the number of eigenvectors or specify contact type (cis/trans). Example: `--cooltools_eigscis_args \"--n-eigs 2 --contact-type cis\"`.",
"fa_icon": "fas fa-wave-square",
"default": "--n-eigs 1"
},
"cooler_eigscis_resultion": {
"type": "integer",
"title": "Eigenvector Resolution (bp)",
"description": "Specifies the resolution, in base pairs, used for eigenvector computation in A/B compartment analysis.",
"help_text": "Smaller values provide higher resolution but require denser contact maps. Example: `--cooler_eigscis_resultion 250000`.",
"fa_icon": "fas fa-grip-lines",
"default": 100000
},
"calder_bin": {
"type": "integer",
"title": "CALDER Bin Size",
"description": "Sets the bin size used for subcompartment calling by CALDER.",
"help_text": "Provide bin size as a number (e.g., 10000) or scientific notation (e.g., `10E3`). Higher bin sizes reduce resolution but speed up computation. Example: `--calder_bin 25000`.",
"fa_icon": "fas fa-cubes",
"default": "10000"
},
"calder_chrom": {
"type": "string",
"title": "CALDER chromosomes",
"description": "the chromosomes to be analyzed by CALDER.",
"help_text": "Provide a chromosome or multiple as numbers. For multiple chromosmes, they need to be comma `,` separated. Example: `--calder_chrom 1` or `--calder_chrom 1,2,3,19,20`.",
"fa_icon": "fas fa-cubes",
"default": "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23"
},
"gstripe_args": {
"type": "string",
"title": "gStripe Detection Arguments",
"description": "Extra command-line options for the `gStripe` tool, which detects stripe-like chromatin interaction patterns.",
"help_text": "Modify detection thresholds or bin correction behavior. The default `--fix_bin_start` ensures stripe detection starts from bin-aligned positions. Example: `--gstripe_args \"--fix_bin_start --minlen 3 --qval 0.05\"`.",
"fa_icon": "fas fa-grip-lines-vertical",
"default": "--fix_bin_start"
}
}
},
"visualization_options": {
"title": "Visualization & QC",
"type": "object",
"fa_icon": "fas fa-chart-bar",
"description": "Configuration for plotting and quality-control steps (e.g., FastQC, deepTools, Juicer Tools, pairtools). These options affect coverage plots, sample correlations, and other diagnostic visualizations.",
"help_text": "Tune these to control plot styles, correlation methods, and output formats. For large cohorts, consider lighter settings (e.g., skipping numbers on heatmaps) to speed up rendering.",
"properties": {
"plot_method": {
"type": "string",
"title": "Correlation Method for Plots",
"description": "Defines the statistical method used to compute sample-to-sample correlations in deepTools visualizations.",
"help_text": "Choose between `spearman` (rank-based, robust to outliers) or `pearson` (linear correlation). The default `spearman` is typically used for read-count correlation heatmaps. Example: `--plot_method pearson`.",
"fa_icon": "fas fa-chart-line",
"default": "spearman"
},
"plot_type": {
"type": "string",
"title": "Plot Type for Correlation Visualization",
"description": "Specifies the visualization format for correlation results — typically a heatmap or scatter plot.",
"help_text": "Select the style best suited to your comparison. `heatmap` provides an overview of pairwise correlations, while `scatter` highlights relationships between specific samples. Example: `--plot_type scatter`.",
"fa_icon": "fas fa-th-large",
"default": "heatmap"
},
"fastqc_args": {
"type": "string",
"title": "FastQC Arguments",
"description": "Specifies additional command-line options for FastQC quality control analysis of raw sequencing reads.",
"help_text": "Use this to control FastQC behavior, such as verbosity or output compression. The default `--quiet` suppresses detailed logging. Example: `--fastqc_args \"--quiet --nogroup\"`.",
"fa_icon": "fas fa-vial",
"default": "--quiet"
},
"deeptools_plotcoverage_args": {
"type": "string",
"title": "deepTools PlotCoverage Arguments",
"description": "Additional flags for the deepTools `plotCoverage` module, which visualizes genome-wide read coverage across samples.",
"help_text": "Customize output options such as file format, scaling, or whether to skip empty bins. The default `--skipZeros` ignores regions with zero coverage. Example: `--deeptools_plotcoverage_args \"--skipZeros --plotFileFormat pdf\"`.",
"fa_icon": "fas fa-chart-area",
"default": "--skipZeros"
},
"deeptools_plotcorrelation_args": {
"type": "string",
"title": "deepTools PlotCorrelation Arguments",
"description": "Custom flags for the deepTools `plotCorrelation` command, which computes and visualizes sample correlation matrices.",
"help_text": "Use this to modify correlation type, color scheme, and plot annotations. The default parameters produce a Spearman correlation heatmap with labeled values. Example: `--deeptools_plotcorrelation_args \"--corMethod pearson --colorMap RdBu --what pairwise\"`.",
"fa_icon": "fas fa-border-none",
"default": "--skipZeros --plotTitle \"Spearman Correlation of Read Counts\" --colorMap RdYlBu --plotNumbers "
},
"juicertools_args": {
"type": "string",
"title": "Juicer Tools Arguments",
"description": "Specifies extra command-line parameters for Juicer Tools utilities used for Hi-C/HiChIP contact map analysis.",
"help_text": "Set parameters for operations like normalization, balancing, or matrix extraction. Example: `--juicertools_args \"pre --threads 8 --resolutions 5000\"`. Leave `null` to use default Juicer settings.",
"fa_icon": "fas fa-compact-disc",
"default": null
},
"pairtools_parse2_args": {
"type": "string",
"title": "Pairtools Parse2 Arguments",
"description": "Additional options for the `pairtools parse2` command used to convert alignment files into standardized pairs format.",
"help_text": "Modify column selection, filtering, or threading for parsing steps. Example: `--pairtools_parse2_args \"--add-columns chr1,chr2,pos1,pos2 --nproc 8\"`. Leave `null` for default settings.",
"fa_icon": "fas fa-random",
"default": null
}
}
},
"multimm_options": {
"title": "3D Genome Modelling (MultiMM)",
"type": "object",
"fa_icon": "fas fa-cube",
"description": "Configuration options for 3D genome reconstruction and visualization using the MultiMM module. These parameters define which genomic regions are modeled, the computational platform used (CPU or GPU), and optional fine-grained settings for chromosomal or locus-level simulations.",
"help_text": "Adjust these parameters to specify the modeling scope (whole chromosome, gene region, or specific locus) and computational mode. MultiMM integrates chromatin contact data to predict spatial genome structures. For large models, prefer GPU acceleration if available.",
"properties": {
"multimm_platform": {
"type": "string",
"title": "MultiMM Computational Platform",
"description": "Specifies the compute platform used for MultiMM 3D genome modeling — either CPU or GPU, depending on system availability and dataset size.",
"help_text": "Use `CPU` for standard desktop or cluster execution. Choose `GPU` to accelerate simulations on compatible hardware, especially for large or high-resolution models. Example: `--multimm_platform GPU`.",
"fa_icon": "fas fa-microchip",
"default": "CPU"
},
"multimm_modelling_level": {
"type": "string",
"title": "Modelling Granularity Level",
"description": "Specifies the genomic scale at which MultiMM performs 3D genome modeling — ranging from a single gene to the entire genome.",
"help_text": "Choose the modeling depth according to your analysis goal and computational resources.\n\nThe following modelling levels are available:\n\n• **GENE** — Provide a gene of interest (with an associated .bedpe file path). MultiMM models the gene with a default ±100 kb window around it. Compartment forces are *not* considered at this level.\n\n• **REGION** — Specify a chromosome and genomic coordinates (start–end). Compartment interactions can optionally be included. Only the selected region is modeled.\n\n• **CHROMOSOME** — Provide a chromosome name; MultiMM automatically determines start and end coordinates. Compartment data can be imported for large-scale organization.\n\n• **GW (Genome-Wide)** — Models the entire genome. No input for chromosome or coordinates is needed. This is the most computationally intensive mode, potentially taking minutes to hours depending on system performance.\n\nExample: `--multimm_modelling_level region`",
"fa_icon": "fas fa-layer-group",
"default": "chrom"
},
"multimm_gene_name": {
"type": "string",
"title": "Gene Name for Modelling",
"description": "Specifies the gene symbol or identifier to be modeled in 3D when the modelling level is set to `gene`.",
"help_text": "Provide a valid gene symbol that exists within your reference annotation (e.g., `CTCF`, `RUNX1`). This parameter is ignored if the modelling level is set to `chrom` or `region`. Example: `--multimm_gene_name RUNX1`.",
"fa_icon": "fas fa-dna",
"default": null
},
"multimm_chrom": {
"type": "string",
"title": "Chromosome to Model",
"description": "Defines which chromosome should be used for 3D genome modeling. Required for both chromosome-level and locus-level modeling.",
"help_text": "Use standard chromosome naming consistent with your reference genome (e.g., `chr1`, `chr21`, `chrX`). Example: `--multimm_chrom chr10`.",
"fa_icon": "fas fa-stream",
"default": "chr21"
},
"multimm_loc_start": {
"type": "integer",
"title": "Locus Start Coordinate (bp)",
"description": "Specifies the genomic start coordinate (in base pairs) for locus-level 3D modeling.",
"help_text": "Used only when modelling level is set to `locus`. Must be a valid coordinate within the selected chromosome. Example: `--multimm_loc_start 28000000`.",
"fa_icon": "fas fa-crosshairs",
"default": null
},
"multimm_loc_end": {
"type": "integer",
"title": "Locus End Coordinate (bp)",
"description": "Specifies the genomic end coordinate (in base pairs) for locus-level 3D modeling.",
"help_text": "Used together with `multimm_loc_start` to define the genomic window for 3D reconstruction. Example: `--multimm_loc_end 32000000`.",
"fa_icon": "fas fa-arrows-alt-h",
"default": null
},
"multimm_args": {
"type": "string",
"title": "MultiMM Additional Arguments",
"description": "Custom command-line arguments passed directly to MultiMM for fine-tuning modeling behavior and output.",
"help_text": "Use this for advanced control — e.g., number of models, iteration limits, or output directory. Example: `--multimm_args \"--n-models 50 --max-iters 2000\"`. Leave empty (`null`) to use defaults.",
"fa_icon": "fas fa-terminal",
"default": null
}
}
},
"ccd_caller_options": {
"title": "Chromatin Contact Domain (CCD) Calling",
"type": "object",
"fa_icon": "fas fa-braille",
"description": "Configuration options for calling chromatin contact domains (CCDs) from loop or contact data. These parameters control the minimum loop support and the minimal genomic span required for a CCD.",
"help_text": "Use these options to tune how stringent CCD detection should be. By default, the pipeline calls compact, well-supported domains (≥ 2 overlapping loops and ≥ 15 kb length, corresponding to ~3 contiguous 5 kb bins).\n\nIf `ccd_caller_args` is left as null, the pipeline automatically constructs the CCD caller command as:\n\n`--summits_only --min_loops <min_loops> --min_length <min_length>`\n\nSetting `ccd_caller_args` overrides this automatic construction entirely and passes the provided string directly to the CCD caller.",
"properties": {
"ccd_caller_args": {
"type": "string",
"title": "Custom CCD Caller Arguments",
"description": "Override the default CCD caller command-line arguments.",
"help_text": "Leave as null to use the pipeline defaults. When set, this string is passed verbatim to the CCD caller process and completely replaces the auto-generated arguments. Use this option for full manual control (e.g. disabling summit filtering or adjusting thresholds explicitly).\n\nExample (no summit filtering): `--min_loops 3 --min_length 20000`\nExample (using summits only): `--summits_only --min_loops 3 --min_length 20000`.",
"fa_icon": "fas fa-terminal",
"default": null
},
"min_loops": {
"type": "integer",
"title": "Minimum Supporting Loops per CCD",
"description": "Minimum number of overlapping loops required for a genomic region to be considered part of a CCD.",
"help_text": "This parameter controls the loop density required to define a CCD. At 5 kb resolution, this corresponds to the minimum loop coverage per bin. Higher values produce fewer, higher-confidence domains; lower values are more permissive. This value is inserted into the default CCD caller arguments as `--min_loops <min_loops>` when `ccd_caller_args` is null.",
"fa_icon": "fas fa-project-diagram",
"default": 2
},
"min_length": {
"type": "integer",
"title": "Minimum CCD Length (bp)",
"description": "Minimum genomic length (in base pairs) required for a CCD to be reported.",
"help_text": "Domains shorter than this threshold are filtered out. With 5 kb bins, for example, a minimum length of 15,000 bp corresponds to approximately three consecutive bins. Increase this value to restrict CCDs to larger structural domains or decrease it to retain more local domains. This value is inserted into the default arguments as `--min_length <min_length>` when `ccd_caller_args` is null.",
"fa_icon": "fas fa-ruler-horizontal",
"default": 15000
}
}
},
"institutional_config_options": {
"title": "Institutional config options",
"type": "object",
"fa_icon": "fas fa-university",
"description": "Parameters used to describe centralised config profiles. These should not be edited.",
"help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.",
"properties": {
"custom_config_version": {
"type": "string",
"description": "Git commit id for Institutional configs.",
"default": "master",
"hidden": true,
"fa_icon": "fas fa-users-cog"
},
"custom_config_base": {
"type": "string",
"description": "Base directory for Institutional configs.",
"default": "https://raw.githubusercontent.com/nf-core/configs/master",
"hidden": true,
"help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.",
"fa_icon": "fas fa-users-cog"
},
"config_profile_name": {
"type": "string",
"description": "Institutional config name.",
"hidden": true,
"fa_icon": "fas fa-users-cog"
},
"config_profile_description": {
"type": "string",
"description": "Institutional config description.",
"hidden": true,
"fa_icon": "fas fa-users-cog"
},
"config_profile_contact": {
"type": "string",
"description": "Institutional config contact information.",
"hidden": true,
"fa_icon": "fas fa-users-cog"
},
"config_profile_url": {
"type": "string",
"description": "Institutional config URL link.",
"hidden": true,
"fa_icon": "fas fa-users-cog"
}
}
},
"max_job_request_options": {
"title": "Max job request options",
"type": "object",
"fa_icon": "fab fa-acquisitions-incorporated",
"description": "Set the top limit for requested resources for any single job.",
"help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.",
"properties": {
"max_cpus": {
"type": "integer",
"description": "Maximum number of CPUs that can be requested for any single job.",
"default": 16,
"fa_icon": "fas fa-microchip",
"hidden": true,
"help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`"
},
"max_memory": {
"type": "string",
"description": "Maximum amount of memory that can be requested for any single job.",
"default": "128.GB",
"fa_icon": "fas fa-memory",
"pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$",
"hidden": true,
"help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`"
},
"max_time": {
"type": "string",
"description": "Maximum amount of time that can be requested for any single job.",
"default": "240.h",
"fa_icon": "far fa-clock",
"pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$",
"hidden": true,
"help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`"
}
}
},
"generic_options": {
"title": "Generic options",
"type": "object",
"fa_icon": "fas fa-file-import",
"description": "Less common options for the pipeline, typically set in a config file.",
"help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.",
"properties": {
"help": {
"type": "boolean",
"description": "Display help text.",
"fa_icon": "fas fa-question-circle",
"hidden": true
},
"version": {
"type": "boolean",
"description": "Display version and exit.",
"fa_icon": "fas fa-question-circle",
"hidden": true
},
"publish_dir_mode": {
"type": "string",
"default": "copy",
"description": "Method used to save pipeline results to output directory.",
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
"fa_icon": "fas fa-copy",
"enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
"hidden": true
},
"email_on_fail": {
"type": "string",
"description": "Email address for completion summary, only when pipeline fails.",
"fa_icon": "fas fa-exclamation-triangle",
"pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$",
"help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.",
"hidden": true
},
"plaintext_email": {
"type": "boolean",
"description": "Send plain-text email instead of HTML.",
"fa_icon": "fas fa-remove-format",
"hidden": true
},
"max_multiqc_email_size": {
"type": "string",
"description": "File size limit when attaching MultiQC reports to summary emails.",
"pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$",
"default": "25.MB",
"fa_icon": "fas fa-file-upload",
"hidden": true
},
"monochrome_logs": {
"type": "boolean",
"description": "Do not use coloured log outputs.",
"fa_icon": "fas fa-palette",
"hidden": true
},
"hook_url": {
"type": "string",
"description": "Incoming hook URL for messaging service",
"fa_icon": "fas fa-people-group",
"help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.",
"hidden": true
},
"multiqc_config": {
"type": "string",
"description": "Custom config file to supply to MultiQC.",
"fa_icon": "fas fa-cog",
"hidden": true
},
"multiqc_logo": {
"type": "string",
"description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file",
"fa_icon": "fas fa-image",
"hidden": true
},
"multiqc_methods_description": {
"type": "string",
"description": "Custom MultiQC yaml file containing HTML including a methods description.",
"fa_icon": "fas fa-cog"
},
"tracedir": {
"type": "string",
"description": "Directory to keep pipeline Nextflow logs and reports.",
"default": "${params.outdir}/pipeline_info",
"fa_icon": "fas fa-cogs",
"hidden": true
},
"validate_params": {
"type": "boolean",
"description": "Boolean whether to validate parameters against the schema at runtime",
"default": true,
"fa_icon": "fas fa-check-square",
"hidden": true
},
"show_hidden_params": {
"type": "boolean",
"fa_icon": "far fa-eye-slash",
"description": "Show all params when using `--help`",
"hidden": true,
"help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters."
}
}
}
},
"allOf": [
{
"$ref": "#/$defs/input_output_options"
},
{
"$ref": "#/$defs/reference_annotation_options"
},
{
"$ref": "#/$defs/mapping_filtering_options"
},
{
"$ref": "#/$defs/peaks_loops_options"
},
{
"$ref": "#/$defs/matrices_options"
},
{
"$ref": "#/$defs/visualization_options"
},
{
"$ref": "#/$defs/multimm_options"
},
{
"$ref": "#/$defs/institutional_config_options"
},
{
"$ref": "#/$defs/max_job_request_options"
},
{
"$ref": "#/$defs/generic_options"
}
]
}