From a02c35046802557322a71b1a9077b9c9ffc91a46 Mon Sep 17 00:00:00 2001
From: Elena Buscaroli <ele.buscaroli@gmail.com>
Date: Wed, 29 Jan 2025 16:15:35 +0100
Subject: [PATCH 1/8] removing obsolete modules.config

---
 .nf-core.yml        |  4 ++++
 conf/modules.config | 34 ----------------------------------
 2 files changed, 4 insertions(+), 34 deletions(-)
 delete mode 100644 conf/modules.config

diff --git a/.nf-core.yml b/.nf-core.yml
index d3220f5..f21e986 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -13,3 +13,7 @@ template:
   org: nf-core
   outdir: .
   version: 1.0dev
+lint:
+  modules_config: False
+  files_exist:
+    - conf/modules.config
diff --git a/conf/modules.config b/conf/modules.config
deleted file mode 100644
index d203d2b..0000000
--- a/conf/modules.config
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Config file for defining DSL2 per module options and publishing paths
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Available keys to override module options:
-        ext.args   = Additional arguments appended to command in module.
-        ext.args2  = Second set of arguments appended to command in module (multi-tool modules).
-        ext.args3  = Third set of arguments appended to command in module (multi-tool modules).
-        ext.prefix = File name prefix for output files.
-----------------------------------------------------------------------------------------
-*/
-
-process {
-
-    publishDir = [
-        path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
-        mode: params.publish_dir_mode,
-        saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-    ]
-
-    withName: FASTQC {
-        ext.args = '--quiet'
-    }
-
-    withName: 'MULTIQC' {
-        ext.args   = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
-        publishDir = [
-            path: { "${params.outdir}/multiqc" },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]
-    }
-
-}

From c76acb9b4cb8338bc7f93528bae4d0e163df8655 Mon Sep 17 00:00:00 2001
From: Elena Buscaroli <ele.buscaroli@gmail.com>
Date: Wed, 29 Jan 2025 16:15:49 +0100
Subject: [PATCH 2/8] updating test.config params

---
 conf/test.config | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index bb94a8e..4b2e5b1 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -43,10 +43,10 @@ params {
 
     sparsesignatures_K = "2:3"
     sparsesignatures_nmf_runs = "3"
-    sparsesignatures_iterations = "10"
+    sparsesignatures_iterations = "5"
     sparsesignatures_max_iterations_lasso = "80"
-    sparsesignatures_cross_validation_repetitions = "8"
-    sparsesignatures_cross_validation_iterations = "5"
+    sparsesignatures_cross_validation_repetitions = "5"
+    sparsesignatures_cross_validation_iterations = "3"
     sparsesignatures_lambda_values_alpha = "c(0.00, 0.01)"
     sparsesignatures_lambda_values_beta = "c(0.01, 0.05)"
 

From 0a42b17710a696a373ed3fec1dda1a807cd00d1e Mon Sep 17 00:00:00 2001
From: Elena Buscaroli <ele.buscaroli@gmail.com>
Date: Wed, 29 Jan 2025 16:29:43 +0100
Subject: [PATCH 3/8] update modules.json removing old nf-core modules

---
 modules.json | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/modules.json b/modules.json
index 05615ab..011365e 100644
--- a/modules.json
+++ b/modules.json
@@ -23,16 +23,6 @@
                         "installed_by": ["modules", "vcf_annotate_ensemblvep"],
                         "patch": "modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff"
                     },
-                    "fastqc": {
-                        "branch": "master",
-                        "git_sha": "08108058ea36a63f141c25c4e75f9f872a5b2296",
-                        "installed_by": ["modules"]
-                    },
-                    "multiqc": {
-                        "branch": "master",
-                        "git_sha": "f0719ae309075ae4a291533883847c3f7c441dad",
-                        "installed_by": ["modules"]
-                    },
                     "tabix/tabix": {
                         "branch": "master",
                         "git_sha": "9502adb23c0b97ed8e616bbbdfa73b4585aec9a1",

From eb273f6fd6dbb275db9780a76d7ea01809b9423c Mon Sep 17 00:00:00 2001
From: Elena Buscaroli <ele.buscaroli@gmail.com>
Date: Wed, 29 Jan 2025 16:30:09 +0100
Subject: [PATCH 4/8] adding templates

---
 .../local/CNAqc2tsv/templates/main_script.R   |  94 ++++
 .../SparseSignatures/templates/main_script.R  | 248 ++++++++++
 .../local/cna2CNAqc/templates/main_script.R   |  63 +++
 .../local/mobsterh/templates/main_script.R    | 199 ++++++++
 .../local/pyclonevi/templates/main_script.py  | 132 +++++
 .../local/vcf2CNAqc/templates/main_script.R   | 454 ++++++++++++++++++
 modules/local/viber/templates/main_script.R   | 236 +++++++++
 7 files changed, 1426 insertions(+)
 create mode 100644 modules/local/CNAqc2tsv/templates/main_script.R
 create mode 100644 modules/local/SparseSignatures/templates/main_script.R
 create mode 100644 modules/local/cna2CNAqc/templates/main_script.R
 create mode 100644 modules/local/mobsterh/templates/main_script.R
 create mode 100644 modules/local/pyclonevi/templates/main_script.py
 create mode 100644 modules/local/vcf2CNAqc/templates/main_script.R
 create mode 100644 modules/local/viber/templates/main_script.R

diff --git a/modules/local/CNAqc2tsv/templates/main_script.R b/modules/local/CNAqc2tsv/templates/main_script.R
new file mode 100644
index 0000000..75ff610
--- /dev/null
+++ b/modules/local/CNAqc2tsv/templates/main_script.R
@@ -0,0 +1,94 @@
+#!/usr/bin/env Rscript
+
+opt = list(
+    prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix')
+)
+
+# Auxiliary functions #####
+
+get_sample = function(m_cnaqc_obj, sample, which_obj) {
+    if (class(m_cnaqc_obj) != "m_cnaqc") {
+        wrong_class_all = class(m_cnaqc_obj)
+
+        cli::cli_abort(
+            c("cnaqc_objs must be a {.field m_cnaqc} object",
+            "x" = "{.var m_cnaqc_obj} is a {.cls {class(m_cnaqc_obj)}}")
+        )
+    }
+
+    consented_obj = c("shared", "original")
+    if ((which_obj %in% consented_obj) == FALSE) {
+        cli::cli_abort("{.var which_obj} must be one of {.val shared} or {.val original}")
+    }
+
+    # define the element names
+    if (which_obj == "original") {
+        type = "original_cnaqc_objc"
+        # check if the original cnaqc obj exist
+        check_or = any(names(m_cnaqc_obj) == type)
+        if (check_or == FALSE) {
+            cli::cli_abort(c("mCNAqc object was build without keeping original CNAqc objects"),
+                        "x" = "It is not possible to retrieve the required samples")
+        } else {
+            cli::cli_h1("Retrieving original {.cls CNAqc} objects")
+            cnaqc_samples = m_cnaqc_obj[[type]][sample]
+        }
+
+    } else {
+        type = "cnaqc_obj_new_segmentation"
+        cli::cli_h1("Retrieving {.cls CNAqc} objects with the new segmentation")
+        cnaqc_samples = m_cnaqc_obj[[type]][sample]
+    }
+    return(cnaqc_samples)
+}
+
+get_sample_name = function(x) {
+    if (class(x) == "m_cnaqc") {
+        lapply(x[["cnaqc_obj_new_segmentation"]], function(y) {
+            y[["sample"]]
+        }) %>% unlist() %>% unname()
+    } else if (class(x) == "cnaqc") {
+        x[["sample"]]
+    } else {
+        wrong_class_all = class(x)
+        cli::cli_abort(
+            c("must provide a {.field m_cnaqc} object",
+            "x" = "{.var x} is a {.cls {class(x)}}")
+        )
+    }
+}
+
+get_mCNAqc_stats = function(m_cnaqc_obj){
+    stats = m_cnaqc_obj[["m_cnaqc_stats"]]
+    return(stats)
+}
+
+# Script ####
+
+library(dplyr)
+library(CNAqc)
+
+multi_cnaqc = readRDS(file = "$rds_join")
+mutations_multisample = get_sample(m_cnaqc_obj = multi_cnaqc,sample = get_sample_name(multi_cnaqc),
+                                which_obj = "original")
+multisample_jointTable = list()
+
+for (s in get_sample_name(multi_cnaqc)){
+    purity = mutations_multisample[[s]][["purity"]]
+    multisample_jointTable[[s]] = mutations_multisample[[s]][["mutations"]] %>%
+        dplyr::mutate(purity=purity)
+}
+
+joint_table = bind_rows(multisample_jointTable)
+
+write.table(joint_table, file = paste0(opt[["prefix"]], "_joint_table.tsv"), append = F, quote = F, sep = "\t", row.names = FALSE)
+
+# version export
+f = file("versions.yml","w")
+dplyr_version = sessionInfo()\$otherPkgs\$dplyr\$Version
+cnaqc_version = sessionInfo()\$otherPkgs\$CNAqc\$Version
+writeLines(paste0('"', "$task.process", '"', ":"), f)
+writeLines(paste("    dplyr:", dplyr_version), f)
+writeLines(paste("    CNAqc:", cnaqc_version), f)
+close(f)
+
diff --git a/modules/local/SparseSignatures/templates/main_script.R b/modules/local/SparseSignatures/templates/main_script.R
new file mode 100644
index 0000000..a0f1c90
--- /dev/null
+++ b/modules/local/SparseSignatures/templates/main_script.R
@@ -0,0 +1,248 @@
+#!/usr/bin/env Rscript
+
+parse_args = function(x) {
+    x = gsub("\\\\[","",x)
+    x = gsub("\\\\]","",x)
+    # giving errors when we have lists like c(xxx, xxx) since it will separate it
+    # args_list = unlist(strsplit(x, ', ')[[1]])
+    args_list = unlist(strsplit(x, ", (?=[^)]*(?:\\\\(|\$))", perl=TRUE))
+    # args_vals = lapply(args_list, function(x) strsplit(x, split=":")[[1]])
+    args_vals = lapply(args_list, function(x) {
+        x_splt = strsplit(x, split=":")[[1]]
+        c(x_splt[1],  paste(x_splt[2:length(x_splt)], collapse=":"))
+    })
+
+    # Ensure the option vectors are length 2 (key/ value) to catch empty ones
+    args_vals = lapply(args_vals, function(z){ length(z) = 2; z})
+
+    parsed_args = structure(lapply(args_vals, function(x) x[2]), names = lapply(args_vals, function(x) x[1]))
+    parsed_args[! is.na(parsed_args)]
+}
+
+opt = list(
+    prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix')
+)
+args_opt = parse_args('$task.ext.args')
+for ( ao in names(args_opt)) opt[[ao]] = args_opt[[ao]]
+
+# Auxiliary functions #####
+get_sample = function(m_cnaqc_obj, sample, which_obj) {
+    if (class(m_cnaqc_obj) != "m_cnaqc") {
+        wrong_class_all = class(m_cnaqc_obj)
+        cli::cli_abort(
+            c("cnaqc_objs must be a {.field m_cnaqc} object",
+            "x" = "{.var m_cnaqc_obj} is a {.cls {class(m_cnaqc_obj)}}")
+        )
+    }
+
+    consented_obj = c("shared", "original")
+    if ((which_obj %in% consented_obj) == FALSE) {
+        cli::cli_abort("{.var which_obj} must be one of {.val shared} or {.val original}")
+    }
+
+    # define the element names
+    if (which_obj == "original") {
+        type = "original_cnaqc_objc"
+        # check if the original cnaqc obj exist
+        check_or = any(names(m_cnaqc_obj) == type)
+        if (check_or == FALSE) {
+            cli::cli_abort(c("mCNAqc object was build without keeping original CNAqc objects"),
+                        "x" = "It is not possible to retrieve the required samples")
+        } else {
+            cli::cli_h1("Retrieving original {.cls CNAqc} objects")
+            cnaqc_samples = m_cnaqc_obj[[type]][sample]
+        }
+    } else {
+        type = "cnaqc_obj_new_segmentation"
+        cli::cli_h1("Retrieving {.cls CNAqc} objects with the new segmentation")
+        cnaqc_samples = m_cnaqc_obj[[type]][sample]
+    }
+    return(cnaqc_samples)
+}
+
+get_sample_name =function(x) {
+    if (class(x) == "m_cnaqc") {
+        lapply(x[["cnaqc_obj_new_segmentation"]], function(y) {
+            y[["sample"]]
+        }) %>% unlist() %>% unname()
+    } else if (class(x) == "cnaqc") {
+        x[["sample"]]
+    } else {
+        wrong_class_all = class(x)
+        cli::cli_abort(
+            c("must provide a {.field m_cnaqc} object",
+            "x" = "{.var x} is a {.cls {class(x)}}")
+        )
+    }
+}
+
+get_mCNAqc_stats = function(m_cnaqc_obj){
+    stats = m_cnaqc_obj[["m_cnaqc_stats"]]
+    return(stats)
+}
+
+# Script #####
+
+library(SparseSignatures)
+library(ggplot2)
+library(stringr)
+library(patchwork)
+library(dplyr)
+
+n_procs = parse(text=opt[["num_processes"]])
+if (n_procs == "all"){
+    n_procs = as.double("Inf")
+} else {
+    n_procs = eval(n_procs)
+}
+
+
+patients_tsv = strsplit("$tsv_join", " ")[[1]]
+tables = lapply(patients_tsv, FUN = function(p_table){
+    read.delim(p_table, sep = "\\t", header=T) %>%
+        mutate(across(everything(), as.character))
+}
+)
+multisample_table = dplyr::bind_rows(tables)
+
+
+#Extract input data information
+input_data = multisample_table[,c("Indiv","chr","from","to","ref","alt")]
+input_data = setNames(input_data, c("sample","chrom","start","end","ref","alt"))
+input_data[["end"]] = input_data[["start"]]
+input_data = input_data %>% mutate(start = as.integer(start), end = as.integer(end))
+
+#Generate the patient vs mutation count matrix from mutation data
+#Load reference human-genome specification.
+#The user must select, among the available choices, the reference genome consistent with the mutation dataset.
+
+load_genome = function(genome, input_data) {
+    if (genome == "GRCh37") {
+        library(BSgenome.Hsapiens.1000genomes.hs37d5)
+        bsg = BSgenome.Hsapiens.1000genomes.hs37d5::hs37d5
+        input_data[["chrom"]] = substring(input_data[["chrom"]], 4, 5)
+
+    } else if (genome == "GRCh38") {
+        library(BSgenome.Hsapiens.UCSC.hg38)
+        bsg = BSgenome.Hsapiens.UCSC.hg38
+
+        # Leave 'chrom' unchanged for GRCh38
+    }
+    return(list(bsg = bsg, input_data = input_data))
+}
+
+data_list = load_genome("$params.genome", input_data)
+bsg = data_list[["bsg"]]
+input_data = data_list[["input_data"]]
+
+mut_counts = SparseSignatures::import.trinucleotides.counts(data=input_data, reference=bsg)
+
+# Load a reference SBS5 background signature from COSMIC
+data(background)
+
+# Estimate the initial values of beta
+starting_betas = SparseSignatures::startingBetaEstimation(x = mut_counts,
+                                                        K = eval(parse(text=opt[["K"]])),
+                                                        background_signature = background,
+                                                        nmf_runs = as.integer(opt[["nmf_runs"]]))
+
+# Find the optimal number of signatures and sparsity level: rely on cross-validation
+# higher number of CV repetitions corresponds to more accurate parameter estimates
+
+cv_out = SparseSignatures::nmfLassoCV(
+    x = mut_counts,
+    K = eval(parse(text=opt[["K"]])),
+    starting_beta = starting_betas,
+    background_signature = background,
+    normalize_counts = as.logical(opt[["normalize_counts"]]),
+    nmf_runs = as.integer(opt[["nmf_runs"]]),
+    lambda_values_alpha = eval(parse(text=opt[["lambda_values_alpha"]])),
+    lambda_values_beta = eval(parse(text=opt[["lambda_values_beta"]])),
+    cross_validation_entries = as.numeric(opt[["cross_validation_entries"]]),
+    cross_validation_iterations = as.integer(opt[["cross_validation_iterations"]]),
+    cross_validation_repetitions = as.integer(opt[["cross_validation_repetitions"]]),
+    iterations = as.integer(opt[["iterations"]]),
+    max_iterations_lasso = as.integer(opt[["max_iterations_lasso"]]),
+    num_processes = n_procs,
+    verbose = as.logical(opt[["verbose"]]),
+    seed = as.integer(opt[["seed"]])
+)
+
+# Analyze the mean squared error results averaging over cross-validation repetitions
+cv_mses = cv_out[["grid_search_mse"]][1,,]
+cv_means_mse = matrix(sapply(cv_mses, FUN = mean),
+                    nrow = dim(cv_mses)[1]
+)
+
+dimnames(cv_means_mse) = dimnames(cv_mses)
+
+# Find the combination of parameters that yields the lowest MSE
+
+print("CV MEAN MSE")
+print(cv_means_mse)
+
+print("MIN MEANS MSE")
+print(min(cv_means_mse, na.rm = TRUE))
+
+min_ii = which(cv_means_mse == min(cv_means_mse, na.rm = TRUE), arr.ind = TRUE)
+min_Lambda_beta = rownames(cv_means_mse)[min_ii[1]]
+min_Lambda_beta = as.numeric(gsub("_Lambda_Beta", "", min_Lambda_beta))
+min_K = colnames(cv_means_mse)[min_ii[2]]
+min_K = as.numeric(gsub("_Signatures", "", min_K))
+best_params_config = data.frame(min_K, min_Lambda_beta)
+
+saveRDS(object = cv_means_mse, file = paste0(opt[["prefix"]], "_cv_means_mse.rds"))
+saveRDS(object = best_params_config, file = paste0(opt[["prefix"]], "_best_params_config.rds"))
+
+# Discovering the signatures within the dataset: NMF Lasso
+# Compute the signatures for the best configuration.
+
+print(paste("MIN K =", min_K))
+
+nmf_Lasso_out = SparseSignatures::nmfLasso(
+    x = mut_counts,
+    K = min_K,
+    beta = eval(parse(text=opt[["beta"]])),
+    background_signature = background,
+    normalize_counts = as.logical(opt[["normalize_counts"]]),
+    lambda_rate_alpha = eval(parse(text=opt[["lambda_rate_alpha"]])),
+    lambda_rate_beta = min_Lambda_beta,
+    iterations = as.integer(opt[["iterations"]]),
+    max_iterations_lasso = as.integer(opt[["max_iterations_lasso"]]),
+    verbose = as.logical(opt[["verbose"]])
+)
+
+saveRDS(object = nmf_Lasso_out, file =  paste0(opt[["prefix"]], "_nmf_Lasso_out.rds"))
+
+# Signature visualization
+signatures = nmf_Lasso_out[["beta"]]
+plot_signatures = SparseSignatures::signatures.plot(beta=signatures, xlabels=FALSE)
+
+plot_exposure = nmf_Lasso_out[["alpha"]] %>%
+    as.data.frame() %>%
+    tibble::rownames_to_column(var="PatientID") %>%
+    tidyr::pivot_longer(cols=!"PatientID", names_to="Signatures", values_to="Exposures") %>%
+
+    ggplot() +
+    geom_bar(aes(x=PatientID, y=Exposures, fill=Signatures),
+            position="stack", stat="identity") +
+    theme(axis.text.x=element_text(angle=90,hjust=1),
+            panel.background=element_blank(),
+            axis.line=element_line(colour="black"))
+
+plt_all = patchwork::wrap_plots(plot_exposure, plot_signatures, ncol=2) + patchwork::plot_annotation(title = "$meta.id")
+ggplot2::ggsave(plot = plt_all, filename = paste0(opt[["prefix"]], "_plot_all.pdf"), width = 210, height = 297, units="mm", dpi = 200)
+saveRDS(object = plt_all, file = paste0(opt[["prefix"]], "_plot_all.rds"))
+
+# version export
+f = file("versions.yml","w")
+SparseSignatures_version = sessionInfo()\$otherPkgs\$SparseSignatures\$Version
+dplyr_version = sessionInfo()\$otherPkgs\$dplyr\$Version
+ggplot2_version = sessionInfo()\$otherPkgs\$ggplot2\$Version
+patchwork_version = sessionInfo()\$otherPkgs\$patchwork\$Version
+writeLines(paste0('"', "$task.process", '"', ":"), f)
+writeLines(paste("    SparseSignatures:", SparseSignatures_version), f)
+writeLines(paste("    dplyr:", dplyr_version), f)
+writeLines(paste("    ggplot2:", ggplot2_version), f)
+writeLines(paste("    patchwork:", patchwork_version), f)
+close(f)
diff --git a/modules/local/cna2CNAqc/templates/main_script.R b/modules/local/cna2CNAqc/templates/main_script.R
new file mode 100644
index 0000000..d9468e6
--- /dev/null
+++ b/modules/local/cna2CNAqc/templates/main_script.R
@@ -0,0 +1,63 @@
+#!/usr/bin/env Rscript
+
+opt = list(
+    prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix')
+)
+
+library(dplyr)
+library(readr)
+
+parse_Sequenza = function(segments_file, extra_file){
+    # Extract the segments information
+    segments = readr::read_tsv(segments_file, col_types = readr::cols()) %>%
+        dplyr::rename(
+            chr = chromosome,
+            from = start.pos,
+            to = end.pos,
+            Major = A,
+            minor = B) %>%
+        dplyr::select(chr, from, to, Major, minor, dplyr::everything())
+
+    solutions = readr::read_tsv(extra_file, col_types = readr::cols())
+    purity = solutions[["cellularity"]][2]
+    ploidy = solutions[["ploidy.estimate"]][2]
+    return(list(segments = segments, purity = purity, ploidy = ploidy))
+}
+
+parse_ASCAT = function(segments_file, extra_file){
+    # Extract the segments information
+    segments = readr::read_tsv(segments_file, col_types = readr::cols()) %>%
+        dplyr::mutate(chr = paste0("chr",chr)) %>%
+        dplyr::rename(
+            from = startpos,
+            to = endpos,
+            Major = nMajor,
+            minor = nMinor) %>%
+        dplyr::select(chr, from, to, Major, minor)
+
+    solutions = readr::read_tsv(extra_file, col_types = readr::cols())
+    purity = solutions[["AberrantCellFraction"]]
+    ploidy = solutions[["Ploidy"]]
+    return(list(segments = segments, purity = purity, ploidy = ploidy))
+}
+
+if ("$meta.cna_caller" == 'sequenza'){
+CNA = parse_Sequenza(segments = "$cna_segs", extra = "$cna_extra")
+
+} else if ("$meta.cna_caller" == 'ASCAT'){
+CNA = parse_ASCAT(segments = "$cna_segs", extra = "$cna_extra")
+
+} else {
+stop('Copy Number Caller not supported.')
+}
+
+saveRDS(object = CNA, file = paste0(opt[["prefix"]], "_cna.rds"))
+
+# version export
+f = file("versions.yml","w")
+readr_version = sessionInfo()\$otherPkgs\$readr\$Version
+dplyr_version = sessionInfo()\$otherPkgs\$dplyr\$Version
+writeLines(paste0('"', "$task.process", '"', ":"), f)
+writeLines(paste("    readr:", readr_version), f)
+writeLines(paste("    dplyr:", dplyr_version), f)
+close(f)
diff --git a/modules/local/mobsterh/templates/main_script.R b/modules/local/mobsterh/templates/main_script.R
new file mode 100644
index 0000000..83de713
--- /dev/null
+++ b/modules/local/mobsterh/templates/main_script.R
@@ -0,0 +1,199 @@
+#!/usr/bin/env Rscript
+
+parse_args = function(x) {
+    x = gsub("\\\\[","",x)
+    x = gsub("\\\\]","",x)
+    # giving errors when we have lists like c(xxx, xxx) since it will separate it
+    # args_list = unlist(strsplit(x, ', ')[[1]])
+    args_list = unlist(strsplit(x, ", (?=[^)]*(?:\\\\(|\$))", perl=TRUE))
+    # args_vals = lapply(args_list, function(x) strsplit(x, split=":")[[1]])
+    args_vals = lapply(args_list, function(x) {
+        x_splt = strsplit(x, split=":")[[1]]
+        c(x_splt[1],  paste(x_splt[2:length(x_splt)], collapse=":"))
+    })
+
+    # Ensure the option vectors are length 2 (key/ value) to catch empty ones
+    args_vals = lapply(args_vals, function(z){ length(z) = 2; z})
+
+    parsed_args = structure(lapply(args_vals, function(x) x[2]), names = lapply(args_vals, function(x) x[1]))
+    parsed_args[! is.na(parsed_args)]
+}
+
+opt = list(
+    prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix')
+)
+args_opt = parse_args('$task.ext.args')
+for ( ao in names(args_opt)) opt[[ao]] = args_opt[[ao]]
+
+
+# Auxiliary functions #####
+
+get_sample = function(m_cnaqc_obj,
+                    sample,
+                    which_obj) {
+    if (class(m_cnaqc_obj) != "m_cnaqc") {
+        cli::cli_abort(c(
+        "mCNAqc object was build without keeping original CNAqc objects",
+        "x" = "It is not possible to retrieve the required samples"
+        ))
+    }
+
+    consented_obj = c("shared", "original")
+
+    if ((which_obj %in% consented_obj) == FALSE) {
+        cli::cli_abort("{.var which_obj} must be one of 'shared' or 'original'")
+    }
+    # define the element names
+    if (which_obj == "original") {
+        type = "original_cnaqc_objc"
+        # check if the original cnaqc obj exist
+        check_or = any(names(m_cnaqc_obj) == type)
+        if (check_or == FALSE) {
+        cli::cli_abort(c(
+            "mCNAqc object was build without keeping original CNAqc objects",
+            "x" = "It is not possible to retrieve the required samples"
+        ))
+        } else {
+        cli::cli_h1("Retrieving original {.cls CNAqc} objects")
+        cnaqc_samples = m_cnaqc_obj[[type]][sample]
+        }
+    } else {
+        type = "cnaqc_obj_new_segmentation"
+        cli::cli_h1("Retrieving {.cls CNAqc} objects with the new segmentation")
+        cnaqc_samples = m_cnaqc_obj[[type]][sample]
+    }
+    return(cnaqc_samples)
+}
+
+get_sample_name = function(x) {
+    if (class(x) == "m_cnaqc") {
+        lapply(x[["cnaqc_obj_new_segmentation"]], function(y) {
+        y[["sample"]]
+        }) %>% unlist() %>% unname()
+
+    } else if (class(x) == "cnaqc") {
+        x[["sample"]]
+    } else {
+        cli::cli_abort(c(
+        "Must provide a {.field m_cnaqc} object",
+        "x" = "{.var x} is a {.cls {class(x)}}"
+        ))
+    }
+}
+
+get_mCNAqc_stats = function(m_cnaqc_obj) {
+    stats = m_cnaqc_obj[["m_cnaqc_stats"]]
+    return(stats)
+}
+
+
+# Script #####
+
+library(CNAqc)
+library(mobster)
+library(dplyr)
+library(ggplot2)
+
+patientID = description = "$meta.patient"
+samples = strsplit(x = "$meta.tumour_sample", ",") %>% unlist()  # list of samples
+
+## read mCNAqc object
+if ( grepl(".rds\$", tolower("$rds_join")) ) {
+    obj = readRDS("$rds_join")
+    if (class(obj) == "m_cnaqc") {
+        original = obj %>% get_sample(sample=samples, which_obj="original")
+        input_table = lapply(names(original),
+                                function(sample_name)
+                                original[[sample_name]] %>%
+                                    # keep only mutations on the diploid karyotype
+                                    CNAqc::subset_by_segment_karyotype("1:1") %>%
+                                    CNAqc::Mutations() %>%
+                                    dplyr::mutate(sample_id=sample_name)
+                                ) %>% dplyr::bind_rows()
+    } else {
+        cli::cli_abort("Object of class {class($rds_join)} not supported.")
+    }
+} else {
+    input_table = read.csv("$rds_join")
+}
+
+## Function to run a single mobster fit
+run_mobster_fit = function(joint_table, descr) {
+    # get input table for the single patient
+    inp_tb = joint_table %>%
+        dplyr::filter(VAF < 1) %>%
+        # dplyr::mutate(VAF=replace(VAF, VAF==0, 1e-7)) %>%
+        dplyr::filter(VAF!=0) %>%
+        dplyr::filter(karyotype=="1:1")
+        # dplyr::rename(variantID=gene) %>%
+        # dplyr::rename(is.driver=is_driver) %>%
+        # dplyr::rename(tumour_content=purity) %>%
+        # dplyr::rename(is_driver=is.driver)
+        # dplyr::rename(is_driver=is.driver, driver_label=variantID)
+
+# mobster_fit(x = inp_tb,
+#             K = eval(parse(text="$K")),
+#             samples = as.integer("$samples"),
+#             init = "$init",
+#             tail = eval(parse(text="$tail")),
+#             epsilon = as.numeric("$epsilon"),
+#             maxIter = as.integer("$maxIter"),
+#             fit.type = "$fit_type",
+#             seed = as.integer("$seed"),
+#             model.selection = "$model_selection",
+#             trace = as.logical("$trace"),
+#             parallel = as.logical("$parallel"),
+#             pi_cutoff = as.numeric("$pi_cutoff"),
+#             N_cutoff = as.integer("$n_cutoff"),
+#             auto_setup = eval(parse(text="$auto_setup")),
+#             silent = as.logical("$silent"),
+#             description = descr)
+# }
+
+mobster_fit(x = inp_tb,
+            K = eval(parse(text=opt[["K"]])),
+            samples = as.integer(opt[["samples"]]),
+            init = opt[["init"]],
+            tail = eval(parse(text=opt[["tail"]])),
+            epsilon = as.numeric(opt[["epsilon"]]),
+            maxIter = as.integer(opt[["maxIter"]]),
+            fit.type = opt[["fit_type"]],
+            seed = as.integer(opt[["seed"]]),
+            model.selection = opt[["model_selection"]],
+            trace = as.logical(opt[["trace"]]),
+            parallel = as.logical(opt[["parallel"]]),
+            pi_cutoff = as.numeric(opt[["pi_cutoff"]]),
+            N_cutoff = as.integer(opt[["n_cutoff"]]),
+            auto_setup = eval(parse(text=opt[["auto_setup"]])),
+            silent = as.logical(opt[["silent"]]),
+            description = descr)
+}
+
+lapply(samples, function(sample_name) {
+
+    fit = run_mobster_fit(joint_table=input_table %>% dplyr::filter(sample_id == !!sample_name),
+                        descr=description)
+
+    best_fit = fit[["best"]]
+    plot_fit = plot(best_fit)
+
+    saveRDS(object=fit, file=paste0(opt[["prefix"]], "_mobsterh_st_fit.rds"))
+    saveRDS(object=best_fit, file=paste0(opt[["prefix"]], "_mobsterh_st_best_fit.rds"))
+    saveRDS(object=plot_fit, file=paste0(opt[["prefix"]], "_mobsterh_st_best_fit_plots.rds"))
+
+    # save report plots
+    report_fig = mobster::plot_model_selection(fit)
+    saveRDS(report_fig, file=paste0(opt[["prefix"]], "_REPORT_plots_mobster.rds"))
+    ggplot2::ggsave(plot=report_fig, filename=paste0(opt[["prefix"]], "_REPORT_plots_mobster.pdf"), height=210, width=210, units="mm", dpi = 200)
+    ggplot2::ggsave(plot=report_fig, filename=paste0(opt[["prefix"]], "_REPORT_plots_mobster.png"), height=210, width=210, units="mm", dpi = 200)
+})
+
+
+# version export
+f = file("versions.yml","w")
+cnaqc_version = sessionInfo()\$otherPkgs\$CNAqc\$Version
+mobster_version = sessionInfo()\$otherPkgs\$mobster\$Version
+writeLines(paste0('"', "$task.process", '"', ":"), f)
+writeLines(paste("    CNAqc:", cnaqc_version), f)
+writeLines(paste("    mobster:", mobster_version), f)
+close(f)
diff --git a/modules/local/pyclonevi/templates/main_script.py b/modules/local/pyclonevi/templates/main_script.py
new file mode 100644
index 0000000..612a79b
--- /dev/null
+++ b/modules/local/pyclonevi/templates/main_script.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python
+
+def parse_args(x):
+    x = x.replace("[","")
+    x = x.replace("]","")
+
+    args_list = x.split(", ")
+    parsed_args = {i.split(":")[0]:i.split(":")[1] for i in args_list if i.split(":")[1] is not None}
+    return parsed_args
+
+opt = dict()
+opt["prefix"] = "$task.ext.prefix" if "$task.ext.prefix" == None else "$meta.id"
+args_opt = parse_args("$task.ext.args")
+for ao_k, ao_v in args_opt.items():
+    opt[ao_k] = ao_v
+
+print("$tumour_samples")
+
+# Script #####
+
+import sys
+import pandas as pd
+import os
+import subprocess
+
+#input data preprocessing
+def create_pyclone_input(input_data, patient_id, output_data):
+    df = pd.read_csv(input_data, sep = '\t',header=0)
+    # df = df.query('karyotype=="1:1"')
+    df['normal_cn'] = 2
+    df['patient_id'] = patient_id
+    df['mutation_id'] = df.apply(lambda row: f"{patient_id}:{row['chr'][3:]}:{row['from']}:{row['alt']}", axis=1)
+    df[['major_cn', 'minor_cn']] = df['karyotype'].str.split(':', expand=True)
+    df = df.drop(columns=['karyotype'])
+    df = df.rename(columns={'Indiv': 'sample_id', 'NR': 'ref_counts', 'NV': 'alt_counts', 'purity': 'tumour_content'})
+
+    column_names = ['mutation_id', 'patient_id','sample_id', 'ref_counts', 'alt_counts', 'normal_cn', 'major_cn','minor_cn','tumour_content','driver_label','is_driver']
+    column_names_red = list(set(column_names) & set(df.columns))
+
+    df = df.loc[:, column_names_red]
+    df.to_csv(output_data, sep="\t",index=False, header=df.columns)
+
+
+def pyclone_ctree(joint, best_fit, ctree_input):
+    # parser = argparse.ArgumentParser(description='Process input and output files.')
+    # parser.add_argument('--joint', help='Joint table path')
+    # parser.add_argument('--best_fit', help='Pyclone-vi best fit path')
+    # parser.add_argument('--ctree_input', help='Table for ctree path')
+    # args = parser.parse_args()
+
+    ## Read pyclone best fit table
+    best_fit_file = best_fit
+    df_output = pd.read_csv(best_fit_file, sep='\t')
+
+    ## Read pyclone input table
+    joint_table = joint
+    #df_input = pd.read_csv(sys.argv[1], sep = '\t')
+    df_input = pd.read_csv(joint_table, sep = '\t')
+    ## Caluclate number of mutations per cluster and add to the subset orginal df_output dataframe
+
+    df_output_small = df_output[['mutation_id','sample_id','cluster_id','cellular_prevalence']]
+    df_output_small['nMuts'] = df_output_small.groupby('cluster_id')['mutation_id'].transform('nunique')
+
+    ## Find the clonal cluster and add the colum 'is.clonal' to identify it
+
+    samples = df_output_small['sample_id'].unique()
+    top_clusters = 0
+    for s in samples:
+        ind = df_output_small[df_output_small['sample_id']==s]['cellular_prevalence'].idxmax()
+        top_cluster = df_output_small.loc[ind,'cluster_id']
+        if (top_clusters != top_cluster):
+            top_clusters = top_cluster
+
+    i = df_output_small[df_output_small['cluster_id']==top_clusters].index
+
+    df_output_small['is.clonal'] = "F"
+    df_output_small.loc[i,'is.clonal'] = "T"
+
+    ## Merge the input information about driver genes to the clusters
+    if 'driver_label' not in df_input.columns or 'is_driver' not in df_input.columns:
+        df_input['driver_label'] = "NA"
+        df_input['is_driver'] = False
+        df_input.loc[1,'driver_label'] = ""
+        df_input.loc[1,'is_driver'] = True
+
+    df_merged = pd.merge(df_output_small, df_input[['mutation_id','sample_id','patient_id','driver_label','is_driver']], on = ["mutation_id","sample_id"], how="inner")
+    df_final = df_merged.drop_duplicates(subset=['sample_id', 'driver_label','cluster_id','is.clonal','is_driver'])
+    df_final = df_final.rename(columns={'cellular_prevalence':'CCF','cluster_id': 'cluster','driver_label':'variantID','patient_id':'patientID','is_driver':'is.driver'})
+    df_final['is.driver'] = df_final['is.driver'].replace({False: 'F', True: 'T'})
+
+    ## Final ctree input dataframe
+    final_jt_file = ctree_input
+    df_final.to_csv(final_jt_file, sep="\t",index=False, header=True)
+
+
+if __name__ == "__main__":
+    create_pyclone_input(input_data="$rds_join", patient_id="$meta.patient",
+                         output_data=opt["prefix"]+"_pyclone_input_all_samples.tsv")
+
+    column_name = "sample_id"
+    input_all_samples = pd.read_csv(opt["prefix"] + "_pyclone_input_all_samples.tsv", sep="\t")
+
+    samples_list = "$tumour_samples".replace("[","")
+    samples_list = samples_list.replace("]","")
+    samples_list = samples_list.split(sep=", ")
+    input_filtered = input_all_samples[input_all_samples[column_name].isin(samples_list)]
+    input_filtered.to_csv(opt["prefix"] + "_pyclone_input.tsv", sep="\t")
+
+    # Run pyclone
+    pyclonevi_run = "pyclone-vi fit -i " + opt["prefix"] + "_pyclone_input.tsv -o " + \
+                    opt["prefix"] + "_all_fits.h5 -c " + opt["n_cluster"] + " -d "+ \
+                    opt["density"] + " --num-grid-points " + opt["n_grid_point"] + \
+                    " --num-restarts " + opt["n_restarts"]
+    subprocess.run(pyclonevi_run, shell=True)
+
+    pyclonevi_write = "pyclone-vi write-results-file -i " + opt["prefix"] + "_all_fits.h5 -o " + \
+                      opt["prefix"] + "_best_fit.txt"
+    subprocess.run(pyclonevi_write, shell=True)
+
+    # # Pyclone ctree
+    pyclone_ctree(joint=opt["prefix"] + "_pyclone_input.tsv",
+                  best_fit=opt["prefix"] + "_best_fit.txt",
+                  ctree_input=opt["prefix"] + "_cluster_table.csv")
+
+    # Version
+    version = subprocess.check_output("pip show pyclone-vi | grep Version | awk '{print \$NF}'", shell=True).decode().split("\\n")[0]
+    print(version)
+    f = open("versions.yml", "a")
+    f.write("$task.process:")
+    f.write("    pyclone-vi: {}\\n".format(version))
+    f.close()
+
diff --git a/modules/local/vcf2CNAqc/templates/main_script.R b/modules/local/vcf2CNAqc/templates/main_script.R
new file mode 100644
index 0000000..d2fd031
--- /dev/null
+++ b/modules/local/vcf2CNAqc/templates/main_script.R
@@ -0,0 +1,454 @@
+#!/usr/bin/env Rscript
+
+parse_args = function(x) {
+    x = gsub("\\\\[","",x)
+    x = gsub("\\\\]","",x)
+    # giving errors when we have lists like c(xxx, xxx) since it will separate it
+    # args_list = unlist(strsplit(x, ', ')[[1]])
+    args_list = unlist(strsplit(x, ", (?=[^)]*(?:\\\\(|\$))", perl=TRUE))
+    # args_vals = lapply(args_list, function(x) strsplit(x, split=":")[[1]])
+    args_vals = lapply(args_list, function(x) {
+        x_splt = strsplit(x, split=":")[[1]]
+        c(x_splt[1],  paste(x_splt[2:length(x_splt)], collapse=":"))
+    })
+
+    # Ensure the option vectors are length 2 (key/ value) to catch empty ones
+    args_vals = lapply(args_vals, function(z){ length(z) = 2; z})
+
+    parsed_args = structure(lapply(args_vals, function(x) x[2]), names = lapply(args_vals, function(x) x[1]))
+    parsed_args[! is.na(parsed_args)]
+}
+
+opt = list(
+    prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix')
+)
+args_opt = parse_args('$task.ext.args')
+for ( ao in names(args_opt)) opt[[ao]] = args_opt[[ao]]
+
+print("\n\n\n")
+print(opt)
+print("\n\n\n")
+print("$task.ext.args")
+print("\n\n\n")
+
+# Auxiliriaty functions ####
+
+parse_FreeBayes = function(vcf, tumour_id, normal_id, filter_mutations = FALSE) {
+    tb = vcfR::vcfR2tidy(vcf)
+
+    gt_field = tb[["gt"]] %>%
+        tidyr::separate(gt_AD, sep = ",", into = c("NR", "NV")) %>%
+        dplyr::mutate(
+            NR = as.numeric(NR),
+            NV = as.numeric(NV),
+            DP = NV + NR,
+            VAF = NV/DP) %>%
+        dplyr::rename(sample = Indiv)
+
+    samples_list = gt_field[["sample"]] %>% unique
+
+    if ("CSQ" %in% tb[["meta"]][["ID"]]){
+    # VEP specific field extraction
+    # Take CSQ field names and split by |
+
+        vep_field = tb[["meta"]] %>%
+                dplyr::filter(ID == "CSQ") %>%
+                dplyr::select(Description) %>%
+                dplyr::pull()
+
+        vep_field = strsplit(vep_field, split = "|", fixed = TRUE)[[1]]
+        print(vep_field)
+        vep_field = vep_field[2:length(vep_field)-1]
+
+    # Tranform the fix field by splittig the CSQ and select the columns needed
+        fix_field = tb[["fix"]] %>%
+            dplyr::rename(
+                chr = CHROM,
+                from = POS,
+                ref = REF,
+                alt = ALT) %>%
+            dplyr::rowwise() %>%
+            dplyr::mutate(
+                from = as.numeric(from),
+                to = from + nchar(alt)) %>%
+            dplyr::ungroup() %>%
+            dplyr::select(chr, from, to, ref, alt, CSQ, dplyr::everything(),  -ChromKey, -DP) %>%
+            tidyr::separate(CSQ, vep_field, sep = "|") %>%
+            dplyr::select(chr, from, to, ref, alt, IMPACT, SYMBOL, Gene, dplyr::everything())
+        print(fix_field)
+
+    } else {
+        fix_field = tb[["fix"]] %>%
+            dplyr::rename(
+            chr = CHROM,
+            from = POS,
+            ref = REF,
+            alt = ALT
+            ) %>%
+            dplyr::rowwise() %>%
+            dplyr::mutate(
+            from = as.numeric(from),
+            to = from + nchar(alt)) %>%
+            dplyr::ungroup() %>%
+            dplyr::select(chr, from, to, ref, alt, dplyr::everything(), -ChromKey, -DP)
+    }
+
+    # if have to filter mutations
+    if (filter_mutations){
+        filter = c('PASS')
+    } else {
+        filter = fix_field[["FILTER"]] %>% unique()
+    }
+
+    calls = lapply(
+        samples_list,
+        function(s){
+        gt_field_s = gt_field %>% dplyr::filter(sample == s)
+
+        if(nrow(fix_field) != nrow(gt_field_s))
+            stop("Mismatch between the VCF fixed fields and the genotypes, will not process this file.")
+
+        fits = list()
+        fits[["sample"]] = s
+        fits[["mutations"]] = dplyr::bind_cols(fix_field, gt_field_s) %>%
+            dplyr::select(chr, from, to, ref, alt, NV, DP, VAF, dplyr::everything()) %>%
+            dplyr::filter(FILTER %in% filter)
+        fits
+        })
+
+    names(calls) = samples_list
+    samples = c(tumour_id, normal_id)
+    calls = calls[samples]
+    return(calls)
+}
+
+
+parse_Mutect = function(vcf, tumour_id, normal_id, filter_mutations = FALSE){
+    # Transform vcf to tidy
+    tb = vcfR::vcfR2tidy(vcf)
+
+    # Extract gt field and obtain coverage (DP) and variant allele frequency (VAF) fields
+    gt_field = tb[["gt"]] %>%
+        tidyr::separate(gt_AD, sep = ",", into = c("NR", "NV")) %>%
+        dplyr::mutate(
+            NR = as.numeric(NR),
+            NV = as.numeric(NV),
+            DP = NV + NR,
+            VAF = NV/DP) %>%
+        dplyr::rename(sample = Indiv)
+
+    # Extract sample names
+    samples_list = gt_field[["sample"]] %>% unique
+
+    # check if VCF is annotated with VEP
+    if ("CSQ" %in% tb[["meta"]][["ID"]]){
+    # VEP specific field extraction
+    # Take CSQ field names and split by |
+
+        vep_field = tb[["meta"]] %>%
+                    dplyr::filter(ID == "CSQ") %>%
+                    dplyr::select(Description) %>%
+                    dplyr::pull()
+
+        vep_field = strsplit(vep_field, split = "|", fixed = TRUE)[[1]]
+        print(vep_field)
+        vep_field = vep_field[2:length(vep_field)-1]
+
+    # Tranform the fix field by splittig the CSQ and select the columns needed
+        fix_field = tb[["fix"]] %>%
+            dplyr::rename(
+                chr = CHROM,
+                from = POS,
+                ref = REF,
+                alt = ALT) %>%
+            dplyr::rowwise() %>%
+            dplyr::mutate(
+                from = as.numeric(from),
+                to = from + nchar(alt)) %>%
+            dplyr::ungroup() %>%
+            dplyr::select(chr, from, to, ref, alt, CSQ, dplyr::everything()) %>%
+            tidyr::separate(CSQ, vep_field, sep = "|") %>%
+            dplyr::select(chr, from, to, ref, alt, IMPACT, SYMBOL, Gene, dplyr::everything(), -DP) #can add other thing, CSQ, HGSP
+        print(fix_field)
+
+    } else {
+        # Take from fix field some columns
+        fix_field = tb[["fix"]] %>%
+            dplyr::rename(
+                chr = CHROM,
+                from = POS,
+                ref = REF,
+                alt = ALT) %>%
+            dplyr::rowwise() %>%
+            dplyr::mutate(
+                from = as.numeric(from),
+                to = from + nchar(alt)) %>%
+            dplyr::ungroup() %>%
+            dplyr::select(chr, from, to, ref, alt, dplyr::everything(), -ChromKey, -DP) #-DP
+    }
+
+    if (filter_mutations){
+        filter = c('PASS')
+    } else {
+        filter = fix_field[["FILTER"]] %>% unique()
+    }
+
+    # For each sample create the table of mutations
+    calls = lapply(
+            samples_list,
+            function(s){
+            gt_field_s = gt_field %>% dplyr::filter(sample == s)
+
+            if(nrow(fix_field) != nrow(gt_field_s))
+                stop("Mismatch between the VCF fixed fields and the genotypes, will not process this file.")
+
+            fits = list()
+            fits[["sample"]] = s
+            fits[["mutations"]] = dplyr::bind_cols(fix_field, gt_field_s) %>%
+                dplyr::select(chr, from, to, ref, alt, NV, DP, VAF, dplyr::everything()) %>%
+                dplyr::filter(FILTER %in% filter)
+            fits
+            })
+
+    names(calls) = samples_list
+    samples = c(tumour_id, normal_id)
+    calls = calls[samples]
+    return(calls)
+}
+
+retrieve_ref_alt = function(row){
+    ref = row[["ref"]]
+    alt = row[["alt"]]
+    if (ref == 'A'){NR=as.integer(strsplit(row[["gt_AU"]], split=',')[[1]][1])}
+    if (ref == 'T'){NR=as.integer(strsplit(row[["gt_TU"]], split=',')[[1]][1])}
+    if (ref == 'G'){NR=as.integer(strsplit(row[["gt_GU"]], split=',')[[1]][1])}
+    if (ref == 'C'){NR=as.integer(strsplit(row[["gt_CU"]], split=',')[[1]][1])}
+
+    if (alt == 'A'){NV=as.integer(strsplit(row[["gt_AU"]], split=',')[[1]][1])}
+    if (alt == 'T'){NV=as.integer(strsplit(row[["gt_TU"]], split=',')[[1]][1])}
+    if (alt == 'G'){NV=as.integer(strsplit(row[["gt_GU"]], split=',')[[1]][1])}
+    if (alt == 'C'){NV=as.integer(strsplit(row[["gt_GU"]], split=',')[[1]][1])}
+
+    ref_alt = paste0(NR, ',', NV)
+    ref_alt
+}
+
+parse_Strelka = function(vcf, tumour_id, normal_id, filter_mutations = FALSE){
+    tb = vcfR::vcfR2tidy(vcf)
+    gt_field = tb[["gt"]] %>% rename(sample = Indiv)
+    samples_list = gt_field[["sample"]] %>% unique
+
+    if ("CSQ" %in% tb[["meta"]][["ID"]]){
+    # VEP specific field extraction
+    # Take CSQ field names and split by |
+
+        vep_field = tb[["meta"]] %>%
+                dplyr::filter(ID == "CSQ") %>%
+                dplyr::select(Description) %>%
+                dplyr::pull()
+
+    vep_field = strsplit(vep_field, split = "|", fixed = TRUE)[[1]]
+    print(vep_field)
+    vep_field = vep_field[2:length(vep_field)-1]
+
+    # Tranform the fix field by splittig the CSQ and select the columns needed
+    fix_field = tb[["fix"]] %>%
+        dplyr::rename(
+            chr = CHROM,
+            from = POS,
+            ref = REF,
+            alt = ALT) %>%
+        dplyr::rowwise() %>%
+        dplyr::mutate(
+            from = as.numeric(from),
+            to = from + nchar(alt)) %>%
+        dplyr::ungroup() %>%
+        dplyr::select(chr, from, to, ref, alt, CSQ, dplyr::everything(), -ChromKey) %>%
+        tidyr::separate(CSQ, vep_field, sep = "|") %>%
+        dplyr::select(chr, from, to, ref, alt, IMPACT, SYMBOL, Gene, dplyr::everything()) #can add other thing, CSQ, HGSP
+    print(fix_field)
+
+    } else {
+        # Take from fix field some columns
+        fix_field = tb[["fix"]] %>%
+            dplyr::rename(
+                chr = CHROM,
+                from = POS,
+                ref = REF,
+                alt = ALT) %>%
+            dplyr::rowwise() %>%
+            dplyr::mutate(
+                from = as.numeric(from),
+                to = from + nchar(alt)) %>%
+            dplyr::ungroup() %>%
+            dplyr::select(chr, from, to, ref, alt, dplyr::everything(), -ChromKey) #-DP
+    }
+
+    if (filter_mutations){
+        filter = c('PASS')
+    } else {
+        filter = fix_field[["FILTER"]] %>% unique()
+    }
+
+    calls = lapply(
+        samples_list,
+        function(s){
+            gt_field_s = gt_field %>% dplyr::filter(sample == s)
+
+            fits = list()
+            fits[["sample"]] = s
+            mutations = dplyr::bind_cols(fix_field, gt_field_s)
+            ref_alt = lapply(1:nrow(mutations), function(r){
+                retrieve_ref_alt(mutations[r,])
+        })
+
+        ref_alt = ref_alt %>% unlist()
+        mutations[["ref_alt"]] = ref_alt
+        mutations = mutations %>%
+            tidyr::separate(ref_alt, into = c('NR', 'NV')) %>%
+            dplyr::mutate(NR = as.integer(NR),
+                        NV = as.integer(NV)) %>%
+            dplyr::mutate(DP = NR+NV) %>%
+            dplyr::mutate(VAF = NV/DP) %>%
+            dplyr::select(chr, from, to, ref, alt, NV, DP, VAF, everything()) %>%
+            dplyr::filter(FILTER %in% filter)
+
+        fits[["mutations"]] = mutations
+        fits
+        })
+
+    names(calls) = samples_list
+    samples = c(tumour_id, normal_id)
+    calls = calls[samples]
+    return(calls)
+}
+
+
+parse_Platypus = function(vcf, tumour_id, normal_id, filter_mutations = FALSE){
+    tb = vcfR::vcfR2tidy(vcf)
+
+    gt_field = tb[["gt"]] %>%
+        dplyr::mutate(
+            DP = as.numeric(gt_NR),
+            NV = as.numeric(gt_NV),
+            VAF = NV/DP) %>%
+        dplyr::rename(sample = Indiv)
+
+    # Extract each sample
+    samples_list = gt_field[["sample"]] %>% unique
+
+    # check if VCF is annotated with VEP
+    if ("CSQ" %in% tb[["meta"]][["ID"]]){
+    # VEP specific field extraction
+    # Take CSQ field names and split by |
+
+        vep_field = tb[["meta"]] %>%
+                dplyr::filter(ID == "CSQ") %>%
+                dplyr::select(Description) %>%
+                dplyr::pull()
+
+    vep_field = strsplit(vep_field, split = "|", fixed = TRUE)[[1]]
+    print(vep_field)
+    vep_field = vep_field[2:length(vep_field)-1]
+
+    # Tranform the fix field by splittig the CSQ and select the columns needed
+    fix_field = tb[["fix"]] %>%
+        dplyr::rename(
+            chr = CHROM,
+            from = POS,
+            ref = REF,
+            alt = ALT) %>%
+        dplyr::rowwise() %>%
+        dplyr::mutate(
+            from = as.numeric(from),
+            to = from + nchar(alt)) %>%
+        dplyr::ungroup() %>%
+        dplyr::select(chr, from, to, ref, alt, CSQ, dplyr::everything(),  -ChromKey) %>%
+        tidyr::separate(CSQ, vep_field, sep = "|") %>%
+        dplyr::select(chr, from, to, ref, alt, IMPACT, SYMBOL, Gene, dplyr::everything())
+    print(fix_field)
+
+    } else {
+        fix_field = tb[["fix"]] %>%
+            dplyr::rename(
+            chr = CHROM,
+            from = POS,
+            ref = REF,
+            alt = ALT) %>%
+            dplyr::rowwise() %>%
+            dplyr::mutate(
+            from = as.numeric(from),
+            to = from + nchar(alt)) %>%
+            dplyr::ungroup() %>%
+            dplyr::select(chr, from, to, ref, alt, dplyr::everything(), -ChromKey)
+    }
+
+    if (filter_mutations){
+        filter = c('PASS')
+    } else {
+        filter = fix_field[["FILTER"]] %>% unique()
+    }
+
+    calls = lapply(
+        samples_list,
+        function(s){
+        gt_field_s = gt_field %>%
+            dplyr::filter(sample == s)
+
+        if(nrow(fix_field) != nrow(gt_field_s))
+            stop("Mismatch between the VCF fixed fields and the genotypes, will not process this file.")
+
+        fits = list()
+        fits[["sample"]] = s
+        fits[["mutations"]] = dplyr::bind_cols(fix_field, gt_field_s) %>%
+            dplyr::select(chr, from, to, ref, alt, NV, DP, VAF, dplyr::everything()) %>%
+            dplyr::filter(FILTER %in% filter)
+        fits
+        })
+
+    names(calls) = samples_list
+    samples = c(tumour_id, normal_id)
+    calls = calls[samples]
+    return(calls)
+}
+
+
+library(dplyr)
+library(tidyr)
+library(vcfR)
+
+
+# Read vcf file
+vcf = vcfR::read.vcfR("$vcf")
+
+# Check from which caller the .vcf has been produced
+source = vcfR::queryMETA(vcf, element = 'source')[[1]]
+
+if (TRUE %in% grepl(pattern = 'Mutect', x = source)){
+    calls = parse_Mutect(vcf, tumour_id = "$meta.tumour_sample", normal_id = "$meta.normal_sample", filter_mutations = as.logical(opt[["filter_mutations"]]))
+
+} else if (TRUE %in% grepl(pattern = 'strelka', x = source)){
+    calls = parse_Strelka(vcf, tumour_id = "$meta.tumour_sample", normal_id = "$meta.normal_sample", filter_mutations = as.logical(opt[["filter_mutations"]]))
+
+} else if (TRUE %in% grepl(pattern = 'Platypus', x = source)){
+    calls = parse_Platypus(vcf, tumour_id = "$meta.tumour_sample", normal_id = "$meta.normal_sample", filter_mutations = as.logical(opt[["filter_mutations"]]))
+
+} else if (TRUE %in% grepl(pattern = 'freeBayes', x = source)){
+    calls = parse_FreeBayes(vcf, tumour_id = "$meta.tumour_sample", normal_id = "$meta.normal_sample", filter_mutations = as.logical(opt[["filter_mutations"]]))
+
+} else {
+    stop('Variant Caller not supported.')
+}
+
+saveRDS(object = calls, file = paste0(opt[["prefix"]], "_snv.rds"))
+
+# version export
+f <- file("versions.yml","w")
+dplyr_version <- sessionInfo()\$otherPkgs\$dplyr\$Version
+tidyr_version <- sessionInfo()\$otherPkgs\$tidyr\$Version
+vcfR_version <- sessionInfo()\$otherPkgs\$vcfR\$Version
+writeLines(paste0('"', "$task.process", '"', ":"), f)
+writeLines(paste("    dplyr:", dplyr_version), f)
+writeLines(paste("    tidyr:", tidyr_version), f)
+writeLines(paste("    vcfR:", vcfR_version), f)
+close(f)
diff --git a/modules/local/viber/templates/main_script.R b/modules/local/viber/templates/main_script.R
new file mode 100644
index 0000000..d0a43ea
--- /dev/null
+++ b/modules/local/viber/templates/main_script.R
@@ -0,0 +1,236 @@
+#!/usr/bin/env Rscript
+
+parse_args = function(x) {
+    x = gsub("\\\\[","",x)
+    x = gsub("\\\\]","",x)
+    # giving errors when we have lists like c(xxx, xxx) since it will separate it
+    # args_list = unlist(strsplit(x, ', ')[[1]])
+    args_list = unlist(strsplit(x, ", (?=[^)]*(?:\\\\(|\$))", perl=TRUE))
+    # args_vals = lapply(args_list, function(x) strsplit(x, split=":")[[1]])
+    args_vals = lapply(args_list, function(x) {
+        x_splt = strsplit(x, split=":")[[1]]
+        c(x_splt[1],  paste(x_splt[2:length(x_splt)], collapse=":"))
+    })
+
+    # Ensure the option vectors are length 2 (key/ value) to catch empty ones
+    args_vals = lapply(args_vals, function(z){ length(z) = 2; z})
+
+    parsed_args = structure(lapply(args_vals, function(x) x[2]), names = lapply(args_vals, function(x) x[1]))
+    parsed_args[! is.na(parsed_args)]
+}
+
+opt = list(
+    prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix')
+)
+args_opt = parse_args('$task.ext.args')
+for ( ao in names(args_opt)) opt[[ao]] = args_opt[[ao]]
+
+
+# Auxiliary functions #####
+
+get_sample = function(m_cnaqc_obj, sample, which_obj) {
+    if (class(m_cnaqc_obj) != "m_cnaqc") {
+        wrong_class_all = class(m_cnaqc_obj)
+        cli::cli_abort(
+            c("cnaqc_objs must be a {.field m_cnaqc} object",
+                "x" = "{.var m_cnaqc_obj} is a {.cls {class(m_cnaqc_obj)}}")
+        )
+    }
+
+    consented_obj = c("shared", "original")
+    if ((which_obj %in% consented_obj) == FALSE) {
+        cli::cli_abort("{.var which_obj} must be one of {.val shared} or {.val original}")
+    }
+
+    # define the element names
+    if (which_obj == "original") {
+        type = "original_cnaqc_objc"
+        # check if the original cnaqc obj exist
+        check_or = any(names(m_cnaqc_obj) == type)
+        if (check_or == FALSE) {
+            cli::cli_abort(c("mCNAqc object was build without keeping original CNAqc objects"),
+                        "x" = "It is not possible to retrieve the required samples")
+        } else {
+            cli::cli_h1("Retrieving original {.cls CNAqc} objects")
+            cnaqc_samples = m_cnaqc_obj[[type]][sample]
+        }
+
+    } else {
+        type = "cnaqc_obj_new_segmentation"
+        cli::cli_h1("Retrieving {.cls CNAqc} objects with the new segmentation")
+        cnaqc_samples = m_cnaqc_obj[[type]][sample]
+    }
+    return(cnaqc_samples)
+}
+
+get_sample_name = function(x) {
+    if (class(x) == "m_cnaqc") {
+        lapply(x[["cnaqc_obj_new_segmentation"]], function(y) {
+            y[["sample"]]
+        }) %>% unlist() %>% unname()
+    } else if (class(x) == "cnaqc") {
+        x[["sample"]]
+    } else {
+        wrong_class_all = class(x)
+        cli::cli_abort(
+            c("must provide a {.field m_cnaqc} object",
+            "x" = "{.var x} is a {.cls {class(x)}}")
+        )
+    }
+}
+
+get_mCNAqc_stats = function(m_cnaqc_obj){
+    stats = m_cnaqc_obj[["m_cnaqc_stats"]]
+    return(stats)
+}
+
+# Script #####
+
+library(VIBER)
+library(dplyr)
+library(tidyverse)
+library(ggplot2)
+library(CNAqc)
+
+patientID = "$meta.patient"
+samples = substr("$tumour_samples", 2, nchar("$tumour_samples")-1)
+samples = strsplit(samples, ", ")[[1]]
+print("$tumour_samples")
+print("$rds_join")
+print(samples)
+
+if ( grepl(".rds\$", tolower("$rds_join")) ) {
+    input_obj = readRDS("$rds_join")
+    if (class(input_obj) == "m_cnaqc") {
+        shared = input_obj %>% get_sample(sample=samples, which_obj="shared")
+        joint_table = lapply(names(shared),
+                        function(sample_name)
+                        shared[[sample_name]] %>%
+                            # keep only mutations on the diploid karyotype
+                            CNAqc::subset_by_segment_karyotype("1:1") %>%
+                            CNAqc::Mutations() %>%
+                            dplyr::mutate(sample_id=sample_name)
+                        ) %>% dplyr::bind_rows()
+} else {
+    cli::cli_alert_warning("Object of class {class(input_obj)} not supported.")
+    return()
+    }
+}
+
+print("Subset joint done")
+
+## TODO : add drivers to `input_tab`
+
+## Read input joint table
+input_tab = joint_table %>%
+dplyr::mutate(VAF=replace(VAF, VAF==0, 1e-7))
+
+## Convert the input table into longer format
+reads_data = input_tab %>%
+dplyr::select(chr, from, ref, alt, NV, DP, VAF, sample_id) %>%
+tidyr::pivot_wider(names_from="sample_id",
+                    values_from=c("NV","DP","VAF"), names_sep=".")
+
+## Extract DP (depth)
+dp = reads_data %>%
+# dplyr::filter(mutation_id %in% non_tail) %>% ## this step should be managed before by other module
+dplyr::select(dplyr::starts_with("DP")) %>%
+dplyr::mutate(dplyr::across(.cols=dplyr::everything(),
+                            .fns=function(x) replace(x, is.na(x), 0))) %>%
+dplyr::rename_all(function(x) stringr::str_remove_all(x,"DP."))
+
+## Extract NV (alt_counts)
+nv = reads_data %>%
+# dplyr::filter(mutation_id %in% non_tail) %>% ## this step should be managed before by other module
+dplyr::select(dplyr::starts_with("NV")) %>%
+dplyr::mutate(dplyr::across(.cols=dplyr::everything(),
+                            .fns=function(x) replace(x, is.na(x), 0))) %>%
+dplyr::rename_all(function(x) stringr::str_remove_all(x,"NV."))
+
+# Standard fit
+viber_K = eval(parse(text=opt[["K"]]))
+viber_K[which.min(viber_K)] = 2
+st_fit = VIBER::variational_fit(nv, dp,
+                                K=viber_K,
+                                data=reads_data,
+                                # %>% dplyr::filter(mutation_id %in% non_tail)
+                                alpha_0=as.numeric(opt[["alpha_0"]]),
+                                a_0=as.integer(opt[["a_0"]]),
+                                b_0=as.integer(opt[["b_0"]]),
+                                max_iter=as.integer(opt[["maxIter"]]),
+                                epsilon_conv=as.numeric(opt[["epsilon_conv"]]),
+                                samples=as.integer(opt[["samples"]]),
+                                q_init=opt[["q_init"]],
+                                trace=as.logical(opt[["trace"]]),
+                                description=""
+                                )
+
+best_fit = best_fit_heuristic = st_fit
+
+# If all clusters are removed -> keep the origianl best fit
+tryCatch(expr = {
+    # Apply the heuristic
+    best_fit_heuristic = VIBER::choose_clusters(st_fit,
+        binomial_cutoff=as.numeric(opt[["binomial_cutoff"]]),
+        dimensions_cutoff=as.integer(opt[["dimensions_cutoff"]]),
+        pi_cutoff=as.numeric(opt[["pi_cutoff"]]),
+        re_assign=as.logical(opt[["re_assign"]])
+        )
+    }, error = function(e) {
+        print(e)
+        best_fit_heuristic <<- st_fit
+    } )
+
+# Save fits
+saveRDS(best_fit, file=paste0(opt[["prefix"]], "_viber_best_st_fit.rds"))
+saveRDS(best_fit_heuristic, file = paste0(opt[["prefix"]], "_viber_best_st_heuristic_fit.rds"))
+
+# Save plots
+if ("$n_samples" >1) { #mutlisample mode on
+print("multisample mode on")
+plot_fit = plot(best_fit)
+plot_fit_heuristic = plot(best_fit_heuristic)
+
+saveRDS(plot_fit, file=paste0(opt[["prefix"]], "_viber_best_st_fit_plots.rds"))
+saveRDS(plot_fit_heuristic, file=paste0(opt[["prefix"]], "_viber_best_st_heuristic_fit_plots.rds"))
+} else {
+plot_fit_mixing = plot_mixing_proportions(best_fit)
+plot_fit_mixing_heuristic = plot_mixing_proportions(best_fit_heuristic)
+
+saveRDS(plot_fit_mixing, file=paste0(opt[["prefix"]], "_viber_best_st_mixing_plots.rds"))
+saveRDS(plot_fit_mixing_heuristic, file=paste0(opt[["prefix"]], "_viber_best_st_heuristic_mixing_plots.rds"))
+}
+
+# Save report plot
+n_samples = ncol(best_fit[["x"]]) - 1
+marginals = multivariate = ggplot()
+
+try(expr = {marginals <<- VIBER::plot_1D(best_fit)} )
+#try(expr = {multivariate = plot(best_fit) %>% patchwork::wrap_plots()} )
+#top_p = patchwork::wrap_plots(marginals, multivariate, design=ifelse(n_samples>2, "ABB", "AAB"))
+
+try(expr = {multivariate = plot(best_fit)})
+try(expr = {multivariate = ggpubr::ggarrange(plotlist = multivariate)})
+top_p = ggpubr::ggarrange(plotlist = list(marginals, multivariate), widths=ifelse(n_samples>2, c(1,2), c(2,1)))
+
+mix_p = VIBER::plot_mixing_proportions(best_fit)
+binom = VIBER::plot_peaks(best_fit)
+elbo = VIBER::plot_ELBO(best_fit)
+#bottom_p = patchwork::wrap_plots(mix_p, binom, elbo, design="ABBBC")
+bottom_p = ggpubr::ggarrange(plotlist = list(mix_p, binom, elbo), widths = c(1,2,1), nrow = 1)
+
+#report_fig = patchwork::wrap_plots(top_p, bottom_p, design=ifelse(n_samples>2, "AAAB", "AAB"))
+report_fig = ggpubr::ggarrange(top_p, bottom_p, nrow = 2, heights = ifelse(n_samples>2, c(3,1), c(2,1)))
+saveRDS(report_fig, file=paste0(opt[["prefix"]], "_REPORT_plots_viber.rds"))
+ggplot2::ggsave(plot=report_fig, filename=paste0(opt[["prefix"]], "_REPORT_plots_viber.pdf"), height=210, width=210, units="mm", dpi = 200)
+ggplot2::ggsave(plot=report_fig, filename=paste0(opt[["prefix"]], "_REPORT_plots_viber.png"), height=210, width=210, units="mm", dpi = 200)
+
+
+# version export
+f = file("versions.yml","w")
+cnaqc_version = sessionInfo()\$otherPkgs\$CNAqc\$Version
+viber_version = sessionInfo()\$otherPkgs\$VIBER\$Version
+writeLines(paste0('"', "$task.process", '"', ":"), f)
+writeLines(paste("    CNAqc:", cnaqc_version), f)
+writeLines(paste("    VIBER:", viber_version), f)
+close(f)

From c22238470c8a2f832b67c6500c03a486e94c6b7e Mon Sep 17 00:00:00 2001
From: Elena Buscaroli <ele.buscaroli@gmail.com>
Date: Wed, 29 Jan 2025 16:30:42 +0100
Subject: [PATCH 5/8] deleting auxiliary scripts included in templates

---
 modules/local/CNAqc2tsv/utils.R          |  84 -----
 modules/local/cna2CNAqc/parser_CNA.R     |  37 ---
 modules/local/mobsterh/getters.R         |  57 ----
 modules/local/pyclonevi/pyclone_ctree.py |  93 ------
 modules/local/pyclonevi/pyclone_utils.py |  72 -----
 modules/local/vcf2CNAqc/parser_vcf.R     | 374 -----------------------
 modules/local/viber/getters.R            |  82 -----
 7 files changed, 799 deletions(-)
 delete mode 100644 modules/local/CNAqc2tsv/utils.R
 delete mode 100644 modules/local/cna2CNAqc/parser_CNA.R
 delete mode 100644 modules/local/mobsterh/getters.R
 delete mode 100755 modules/local/pyclonevi/pyclone_ctree.py
 delete mode 100644 modules/local/pyclonevi/pyclone_utils.py
 delete mode 100644 modules/local/vcf2CNAqc/parser_vcf.R
 delete mode 100644 modules/local/viber/getters.R

diff --git a/modules/local/CNAqc2tsv/utils.R b/modules/local/CNAqc2tsv/utils.R
deleted file mode 100644
index 647a201..0000000
--- a/modules/local/CNAqc2tsv/utils.R
+++ /dev/null
@@ -1,84 +0,0 @@
-## This source file is temporary, only needed in
-## absence of the updated CNAqc package
-
-get_sample <- function(m_cnaqc_obj,
-                    sample,
-                    which_obj) {
-    if (class(m_cnaqc_obj) != "m_cnaqc") {
-        wrong_class_all = class(m_cnaqc_obj)
-
-        cli::cli_abort(
-            c("cnaqc_objs must be a {.field m_cnaqc} object",
-            "x" = "{.var m_cnaqc_obj} is a {.cls {class(m_cnaqc_obj)}}")
-        )
-    }
-
-    consented_obj <- c("shared", "original")
-
-    if ((which_obj %in% consented_obj) == FALSE) {
-
-        cli::cli_abort("{.var which_obj} must be one of {.val shared} or {.val original}")
-
-    }
-
-    # define the element names
-
-    if (which_obj == "original") {
-
-        type = "original_cnaqc_objc"
-
-        # check if the original cnaqc obj exist
-
-        check_or = any(names(m_cnaqc_obj) == type)
-
-        if (check_or == FALSE) {
-            cli::cli_abort(c("mCNAqc object was build without keeping original CNAqc objects"),
-                        "x" = "It is not possible to retrieve the required samples")
-        } else {
-
-            cli::cli_h1("Retrieving original {.cls CNAqc} objects")
-            cnaqc_samples = m_cnaqc_obj[[type]][sample]
-
-        }
-
-    } else {
-
-        type = "cnaqc_obj_new_segmentation"
-
-        cli::cli_h1("Retrieving {.cls CNAqc} objects with the new segmentation")
-
-        cnaqc_samples = m_cnaqc_obj[[type]][sample]
-    }
-
-    return(cnaqc_samples)
-}
-
-get_sample_name <- function(x) {
-
-    if (class(x) == "m_cnaqc") {
-
-        lapply(x$cnaqc_obj_new_segmentation, function(y) {
-
-            y$sample
-
-        }) %>% unlist() %>% unname()
-
-    } else if (class(x) == "cnaqc") {
-
-        x$sample
-
-    } else {
-
-        wrong_class_all = class(x)
-
-        cli::cli_abort(
-            c("must provide a {.field m_cnaqc} object",
-            "x" = "{.var x} is a {.cls {class(x)}}")
-        )
-    }
-}
-
-get_mCNAqc_stats <- function(m_cnaqc_obj){
-    stats = m_cnaqc_obj[["m_cnaqc_stats"]]
-    return(stats)
-}
diff --git a/modules/local/cna2CNAqc/parser_CNA.R b/modules/local/cna2CNAqc/parser_CNA.R
deleted file mode 100644
index ee46373..0000000
--- a/modules/local/cna2CNAqc/parser_CNA.R
+++ /dev/null
@@ -1,37 +0,0 @@
-library(dplyr)
-library(readr)
-
-parse_Sequenza = function(segments_file, extra_file){
-    # Extract the segments information
-    segments = readr::read_tsv(segments_file, col_types = readr::cols()) %>%
-        dplyr::rename(
-            chr = chromosome,
-            from = start.pos,
-            to = end.pos,
-            Major = A,
-            minor = B) %>%
-        dplyr::select(chr, from, to, Major, minor, dplyr::everything())
-
-    solutions = readr::read_tsv(extra_file, col_types = readr::cols())
-    purity = solutions$cellularity[2]
-    ploidy = solutions$ploidy.estimate[2]
-    return(list(segments = segments, purity = purity, ploidy = ploidy))
-}
-
-
-parse_ASCAT = function(segments_file, extra_file){
-    # Extract the segments information
-    segments = readr::read_tsv(segments_file, col_types = readr::cols()) %>%
-        dplyr::mutate(chr = paste0("chr",chr)) %>%
-        dplyr::rename(
-            from = startpos,
-            to = endpos,
-            Major = nMajor,
-            minor = nMinor) %>%
-        dplyr::select(chr, from, to, Major, minor)
-
-    solutions = readr::read_tsv(extra_file, col_types = readr::cols())
-    purity = solutions$AberrantCellFraction
-    ploidy = solutions$Ploidy
-    return(list(segments = segments, purity = purity, ploidy = ploidy))
-}
diff --git a/modules/local/mobsterh/getters.R b/modules/local/mobsterh/getters.R
deleted file mode 100644
index 673fbfe..0000000
--- a/modules/local/mobsterh/getters.R
+++ /dev/null
@@ -1,57 +0,0 @@
-get_sample = function(m_cnaqc_obj,
-                    sample,
-                    which_obj) {
-    if (class(m_cnaqc_obj) != "m_cnaqc") {
-        cli::cli_abort(c(
-        "mCNAqc object was build without keeping original CNAqc objects",
-        "x" = "It is not possible to retrieve the required samples"
-        ))
-    }
-
-    consented_obj = c("shared", "original")
-
-    if ((which_obj %in% consented_obj) == FALSE) {
-        cli::cli_abort("{.var which_obj} must be one of 'shared' or 'original'")
-    }
-    # define the element names
-    if (which_obj == "original") {
-        type = "original_cnaqc_objc"
-        # check if the original cnaqc obj exist
-        check_or = any(names(m_cnaqc_obj) == type)
-        if (check_or == FALSE) {
-        cli::cli_abort(c(
-            "mCNAqc object was build without keeping original CNAqc objects",
-            "x" = "It is not possible to retrieve the required samples"
-        ))
-        } else {
-        cli::cli_h1("Retrieving original {.cls CNAqc} objects")
-        cnaqc_samples = m_cnaqc_obj[[type]][sample]
-        }
-    } else {
-        type = "cnaqc_obj_new_segmentation"
-        cli::cli_h1("Retrieving {.cls CNAqc} objects with the new segmentation")
-        cnaqc_samples = m_cnaqc_obj[[type]][sample]
-    }
-    return(cnaqc_samples)
-}
-
-get_sample_name = function(x) {
-    if (class(x) == "m_cnaqc") {
-        lapply(x$cnaqc_obj_new_segmentation, function(y) {
-        y$sample
-        }) %>% unlist() %>% unname()
-
-    } else if (class(x) == "cnaqc") {
-        x$sample
-    } else {
-        cli::cli_abort(c(
-        "Must provide a {.field m_cnaqc} object",
-        "x" = "{.var x} is a {.cls {class(x)}}"
-        ))
-    }
-}
-
-get_mCNAqc_stats = function(m_cnaqc_obj) {
-    stats = m_cnaqc_obj[["m_cnaqc_stats"]]
-    return(stats)
-}
diff --git a/modules/local/pyclonevi/pyclone_ctree.py b/modules/local/pyclonevi/pyclone_ctree.py
deleted file mode 100755
index 077790e..0000000
--- a/modules/local/pyclonevi/pyclone_ctree.py
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-import pandas as pd
-import argparse
-## Read command line args
-
-parser = argparse.ArgumentParser(description='Process input and output files.')
-parser.add_argument('--joint', help='Joint table path')
-parser.add_argument('--best_fit', help='Pyclone-vi best fit path')
-parser.add_argument('--ctree_input', help='Table for ctree path')
-#parser.add_argument('--pyclone_joint', help='Cluster-annotated joint table')
-args = parser.parse_args()
-
-
-## Read pyclone best fit table
-#best_fit_file=sys.argv[2]
-best_fit_file = args.best_fit
-df_output = pd.read_csv(best_fit_file, sep='\t')
-
-## Read pyclone input table
-joint_table = args.joint
-#df_input = pd.read_csv(sys.argv[1], sep = '\t')
-df_input = pd.read_csv(joint_table, sep = '\t')
-## Caluclate number of mutations per cluster and add to the subset orginal df_output dataframe
-
-df_output_small = df_output[['mutation_id','sample_id','cluster_id','cellular_prevalence']]
-df_output_small['nMuts'] = df_output_small.groupby('cluster_id')['mutation_id'].transform('nunique')
-
-## Find the clonal cluster and add the colum 'is.clonal' to identify it
-
-samples = df_output_small['sample_id'].unique()
-top_clusters = 0
-for s in samples:
-  ind = df_output_small[df_output_small['sample_id']==s]['cellular_prevalence'].idxmax()
-  top_cluster = df_output_small.loc[ind,'cluster_id']
-  if (top_clusters != top_cluster):
-    top_clusters = top_cluster
-
-i = df_output_small[df_output_small['cluster_id']==top_clusters].index
-
-df_output_small['is.clonal'] = "F"
-df_output_small.loc[i,'is.clonal'] = "T"
-
-## Merge the input information about driver genes to the clusters
-if 'driver_label' not in df_input.columns or 'is_driver' not in df_input.columns:
-    df_input['driver_label'] = "NA"
-    df_input['is_driver'] = False
-    df_input.loc[1,'driver_label'] = ""
-    df_input.loc[1,'is_driver'] = True
-
-df_merged = pd.merge(df_output_small, df_input[['mutation_id','sample_id','patient_id','driver_label','is_driver']], on = ["mutation_id","sample_id"], how="inner")
-df_final = df_merged.drop_duplicates(subset=['sample_id', 'driver_label','cluster_id','is.clonal','is_driver'])
-df_final = df_final.rename(columns={'cellular_prevalence':'CCF','cluster_id': 'cluster','driver_label':'variantID','patient_id':'patientID','is_driver':'is.driver'})
-df_final['is.driver'] = df_final['is.driver'].replace({False: 'F', True: 'T'})
-
-######### OLD SCRIPT #########
-## Find clusters with driver genes and find cluster wihtout driver genes
-## For the ones without drivers add the 'NA' to variantID and False to is.driver
-#driver_genes_df = df_final[df_final['is_driver'] == True]
-
-#non_driver_genes_df = df_final[df_final['is_driver'] == False].drop_duplicates(subset=['sample_id','cluster_id']).copy()
-#non_driver_genes_df['gene'] = 'NA'
-#non_driver_genes_df['is_driver'] = False
-#result_df = pd.concat([driver_genes_df, non_driver_genes_df], ignore_index=True)
-
-## Identify clusters for which is.driver is both True and False
-#conflicting_clusters = result_df.groupby(['sample_id', 'cluster_id'])['is_driver'].nunique() == 2
-#conflicting_clusters = conflicting_clusters[conflicting_clusters].index
-
-## Create a new DataFrame with only True values for conflicting clusters
-#true_values_df = result_df[(result_df['is_driver'] == True) & result_df.set_index(['sample_id', 'cluster_id']).index.isin(conflicting_clusters)]
-
-## Create a new DataFrame with all values for non-conflicting clusters
-#non_conflicting_df = result_df[~result_df.set_index(['sample_id', 'cluster_id']).index.isin(conflicting_clusters)]
-
-## Concatenate the DataFrames
-#result_df = pd.concat([true_values_df, non_conflicting_df], ignore_index=True)
-#result_df['is_driver'] = result_df['is_driver'].replace({False: 'F', True: 'T'})
-
-#result_df = result_df.rename(columns={'cellular_prevalence':'CCF','cluster_id': 'cluster'})
-#result_df['tool'] = pd.Series(["pyclonevi" for x in range(len(result_df.index))])
-#ctree_input = result_df[['patient_id','gene','is_driver','is.clonal','cluster','nMuts','sample_id','CCF','tool']]
-
-## Final ctree input dataframe
-final_jt_file = args.ctree_input
-df_final.to_csv(final_jt_file, sep="\t",index=False, header=True)
-
-## Annotate input joint table with pyclone-vi assignments
-#df2 = pd.merge(df_input, df_output[['mutation_id','sample_id','cluster_id','cellular_prevalence','cluster_assignment_prob']], on=['mutation_id','sample_id'])
-#df_joint_new=df2.rename(columns={"cluster_id": "pyclonevi_cluster_id", "cellular_prevalence": "pyclonevi_cellular_prevalence", "cluster_assignment_prob": "pyclone_vi_cluster_assignment_prob"})
-#annotated_joint=args.pyclone_joint
-#df_joint_new.to_csv(annotated_joint, sep="\t",index=False, header=True)
diff --git a/modules/local/pyclonevi/pyclone_utils.py b/modules/local/pyclonevi/pyclone_utils.py
deleted file mode 100644
index cfaf3a3..0000000
--- a/modules/local/pyclonevi/pyclone_utils.py
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-import pandas as pd
-import argparse
-
-# Define your functions here
-# def add(a, b):
-#     """Add two numbers."""
-#     return a + b
-
-#input data preprocessing
-def create_pyclone_input(input_data, patient_id,output_data):
-    df = pd.read_csv(input_data, sep = '\t',header=0)
-    # df = df.query('karyotype=="1:1"')
-    df['normal_cn'] = 2
-    df['patient_id'] = patient_id
-    df['mutation_id'] = df.apply(lambda row: f"{patient_id}:{row['chr'][3:]}:{row['from']}:{row['alt']}", axis=1)
-    df[['major_cn', 'minor_cn']] = df['karyotype'].str.split(':', expand=True)
-    df = df.drop(columns=['karyotype'])
-    df = df.rename(columns={'Indiv': 'sample_id', 'NR': 'ref_counts', 'NV': 'alt_counts', 'purity': 'tumour_content'})
-
-    column_names = ['mutation_id', 'patient_id','sample_id', 'ref_counts', 'alt_counts', 'normal_cn', 'major_cn','minor_cn','tumour_content','driver_label','is_driver']
-    column_names_red = list(set(column_names) & set(df.columns))
-
-    df = df.loc[:, column_names_red]
-    df.to_csv(output_data, sep="\t",index=False, header=df.columns)
-    print(df)
-
-# def divide(a, b):
-#     """Divide two numbers."""
-#     if b == 0:
-#         raise ValueError("Cannot divide by zero!")
-#     return a / b
-
-def main():
-    # Create the top-level parser
-    parser = argparse.ArgumentParser(description='Calculator script')
-    subparsers = parser.add_subparsers(dest='command', help='Available commands')
-
-    # Create sub-parser for the "create_pyclone_input" command
-    parser_create_pyclone_input = subparsers.add_parser('create_pyclone_input', help='Add two numbers')
-    parser_create_pyclone_input.add_argument('input_data', help='Joint table path')
-    parser_create_pyclone_input.add_argument('patient_id', help='Patient identifier')
-    parser_create_pyclone_input.add_argument('output_data', help='Output path for new table')
-
-    # # Create sub-parser for the "divide" command
-    # parser_divide = subparsers.add_parser('divide', help='Divide two numbers')
-    # parser_divide.add_argument('a', type=float, help='First number')
-    # parser_divide.add_argument('b', type=float, help='Second number')
-
-    # Parse the arguments
-    args = parser.parse_args()
-
-    # Call the appropriate function based on the command
-    if args.command == 'create_pyclone_input':
-        result = create_pyclone_input(args.input_data, args.patient_id, args.output_data)
-    # elif args.command == 'subtract':
-    #     result = subtract(args.a, args.b)
-    # elif args.command == 'multiply':
-    #     result = multiply(args.a, args.b)
-    # elif args.command == 'divide':
-    #     result = divide(args.a, args.b)
-    else:
-        parser.print_help()
-        return
-
-    # Print the result
-    print(f"The result is: {result}")
-
-if __name__ == '__main__':
-    main()
diff --git a/modules/local/vcf2CNAqc/parser_vcf.R b/modules/local/vcf2CNAqc/parser_vcf.R
deleted file mode 100644
index 1f93248..0000000
--- a/modules/local/vcf2CNAqc/parser_vcf.R
+++ /dev/null
@@ -1,374 +0,0 @@
-library(tidyr)
-library(dplyr)
-library(vcfR)
-
-parse_FreeBayes = function(vcf, tumour_id, normal_id, filter_mutations = FALSE){
-    tb = vcfR::vcfR2tidy(vcf)
-
-    gt_field = tb$gt %>%
-        tidyr::separate(gt_AD, sep = ",", into = c("NR", "NV")) %>%
-        dplyr::mutate(
-            NR = as.numeric(NR),
-            NV = as.numeric(NV),
-            DP = NV + NR,
-            VAF = NV/DP) %>%
-        dplyr::rename(sample = Indiv)
-
-    samples_list = gt_field$sample %>% unique
-
-    if ("CSQ" %in% tb$meta$ID){
-    # VEP specific field extraction
-    # Take CSQ field names and split by |
-
-        vep_field = tb$meta %>%
-                dplyr::filter(ID == "CSQ") %>%
-                dplyr::select(Description) %>%
-                dplyr::pull()
-
-        vep_field = strsplit(vep_field, split = "|", fixed = TRUE)[[1]]
-        vep_field = vep_field[2:length(vep_field)-1]
-
-    # Tranform the fix field by splittig the CSQ and select the columns needed
-        fix_field = tb$fix %>%
-        dplyr::rename(
-            chr = CHROM,
-            from = POS,
-            ref = REF,
-            alt = ALT) %>%
-        dplyr::rowwise() %>%
-        dplyr::mutate(
-            from = as.numeric(from),
-            to = from + nchar(alt)) %>%
-        dplyr::ungroup() %>%
-        dplyr::select(chr, from, to, ref, alt, CSQ, dplyr::everything(),  -ChromKey, -DP) %>%
-        tidyr::separate(CSQ, vep_field, sep = "\\|") %>%
-        dplyr::select(chr, from, to, ref, alt, IMPACT, SYMBOL, Gene, dplyr::everything())
-
-    } else {
-        fix_field = tb$fix %>%
-            dplyr::rename(
-            chr = CHROM,
-            from = POS,
-            ref = REF,
-            alt = ALT
-            ) %>%
-            dplyr::rowwise() %>%
-            dplyr::mutate(
-            from = as.numeric(from),
-            to = from + nchar(alt)) %>%
-            dplyr::ungroup() %>%
-            dplyr::select(chr, from, to, ref, alt, dplyr::everything(), -ChromKey, -DP)
-    }
-
-    # if have to filter mutations
-    if (filter_mutations){
-        filter = c('PASS')
-    } else {
-        filter = fix_field$FILTER %>% unique()
-    }
-
-    calls = lapply(
-        samples_list,
-        function(s){
-        gt_field_s = gt_field %>% dplyr::filter(sample == s)
-
-        if(nrow(fix_field) != nrow(gt_field_s))
-            stop("Mismatch between the VCF fixed fields and the genotypes, will not process this file.")
-
-        fits = list()
-        fits$sample = s
-        fits$mutations = dplyr::bind_cols(fix_field, gt_field_s) %>%
-            dplyr::select(chr, from, to, ref, alt, NV, DP, VAF, dplyr::everything()) %>%
-            dplyr::filter(FILTER %in% filter)
-        fits
-        })
-
-    names(calls) = samples_list
-    samples = c(tumour_id, normal_id)
-    calls = calls[samples]
-    return(calls)
-}
-
-
-parse_Mutect = function(vcf, tumour_id, normal_id, filter_mutations = FALSE){
-    # Transform vcf to tidy
-    tb = vcfR::vcfR2tidy(vcf)
-
-    # Extract gt field and obtain coverage (DP) and variant allele frequency (VAF) fields
-    gt_field = tb$gt %>%
-        tidyr::separate(gt_AD, sep = ",", into = c("NR", "NV")) %>%
-        dplyr::mutate(
-            NR = as.numeric(NR),
-            NV = as.numeric(NV),
-            DP = NV + NR,
-            VAF = NV/DP) %>%
-        dplyr::rename(sample = Indiv)
-
-    # Extract sample names
-    samples_list = gt_field$sample %>% unique
-
-    # check if VCF is annotated with VEP
-    if ("CSQ" %in% tb$meta$ID){
-    # VEP specific field extraction
-    # Take CSQ field names and split by |
-
-        vep_field = tb$meta %>%
-                    dplyr::filter(ID == "CSQ") %>%
-                    dplyr::select(Description) %>%
-                    dplyr::pull()
-
-        vep_field = strsplit(vep_field, split = "|", fixed = TRUE)[[1]]
-        vep_field = vep_field[2:length(vep_field)-1]
-
-    # Tranform the fix field by splittig the CSQ and select the columns needed
-        fix_field = tb$fix %>%
-            dplyr::rename(
-                chr = CHROM,
-                from = POS,
-                ref = REF,
-                alt = ALT) %>%
-            dplyr::rowwise() %>%
-            dplyr::mutate(
-                from = as.numeric(from),
-                to = from + nchar(alt)) %>%
-            dplyr::ungroup() %>%
-            dplyr::select(chr, from, to, ref, alt, CSQ, dplyr::everything()) %>%
-            tidyr::separate(CSQ, vep_field, sep = "\\|") %>%
-            dplyr::select(chr, from, to, ref, alt, IMPACT, SYMBOL, Gene, dplyr::everything(), -DP) #can add other thing, CSQ, HGSP
-
-    } else {
-        # Take from fix field some columns
-        fix_field = tb$fix %>%
-            dplyr::rename(
-                chr = CHROM,
-                from = POS,
-                ref = REF,
-                alt = ALT) %>%
-            dplyr::rowwise() %>%
-            dplyr::mutate(
-                from = as.numeric(from),
-                to = from + nchar(alt)) %>%
-            dplyr::ungroup() %>%
-            dplyr::select(chr, from, to, ref, alt, dplyr::everything(), -ChromKey, -DP) #-DP
-    }
-
-    if (filter_mutations){
-        filter = c('PASS')
-    } else {
-        filter = fix_field$FILTER %>% unique()
-    }
-
-    # For each sample create the table of mutations
-    calls = lapply(
-            samples_list,
-            function(s){
-            gt_field_s = gt_field %>% dplyr::filter(sample == s)
-
-            if(nrow(fix_field) != nrow(gt_field_s))
-                stop("Mismatch between the VCF fixed fields and the genotypes, will not process this file.")
-
-            fits = list()
-            fits$sample = s
-            fits$mutations = dplyr::bind_cols(fix_field, gt_field_s) %>%
-                dplyr::select(chr, from, to, ref, alt, NV, DP, VAF, dplyr::everything()) %>%
-                dplyr::filter(FILTER %in% filter)
-            fits
-            })
-
-    names(calls) = samples_list
-    samples = c(tumour_id, normal_id)
-    calls = calls[samples]
-    return(calls)
-}
-
-retrieve_ref_alt = function(row){
-    ref = row$ref
-    alt = row$alt
-    if (ref == 'A'){NR=as.integer(strsplit(row$gt_AU, split=',')[[1]][1])}
-    if (ref == 'T'){NR=as.integer(strsplit(row$gt_TU, split=',')[[1]][1])}
-    if (ref == 'G'){NR=as.integer(strsplit(row$gt_GU, split=',')[[1]][1])}
-    if (ref == 'C'){NR=as.integer(strsplit(row$gt_CU, split=',')[[1]][1])}
-
-    if (alt == 'A'){NV=as.integer(strsplit(row$gt_AU, split=',')[[1]][1])}
-    if (alt == 'T'){NV=as.integer(strsplit(row$gt_TU, split=',')[[1]][1])}
-    if (alt == 'G'){NV=as.integer(strsplit(row$gt_GU, split=',')[[1]][1])}
-    if (alt == 'C'){NV=as.integer(strsplit(row$gt_GU, split=',')[[1]][1])}
-
-    ref_alt = paste0(NR, ',', NV)
-    ref_alt
-}
-
-parse_Strelka = function(vcf, tumour_id, normal_id, filter_mutations = FALSE){
-    tb = vcfR::vcfR2tidy(vcf)
-    gt_field = tb$gt %>% rename(sample = Indiv)
-    samples_list = gt_field$sample %>% unique
-
-    if ("CSQ" %in% tb$meta$ID){
-    # VEP specific field extraction
-    # Take CSQ field names and split by |
-
-        vep_field = tb$meta %>%
-                dplyr::filter(ID == "CSQ") %>%
-                dplyr::select(Description) %>%
-                dplyr::pull()
-
-    vep_field = strsplit(vep_field, split = "|", fixed = TRUE)[[1]]
-    vep_field = vep_field[2:length(vep_field)-1]
-
-    # Tranform the fix field by splittig the CSQ and select the columns needed
-    fix_field = tb$fix %>%
-        dplyr::rename(
-            chr = CHROM,
-            from = POS,
-            ref = REF,
-            alt = ALT) %>%
-        dplyr::rowwise() %>%
-        dplyr::mutate(
-            from = as.numeric(from),
-            to = from + nchar(alt)) %>%
-        dplyr::ungroup() %>%
-        dplyr::select(chr, from, to, ref, alt, CSQ, dplyr::everything(), -ChromKey) %>%
-        tidyr::separate(CSQ, vep_field, sep = "\\|") %>%
-        dplyr::select(chr, from, to, ref, alt, IMPACT, SYMBOL, Gene, dplyr::everything()) #can add other thing, CSQ, HGSP
-
-    } else {
-        # Take from fix field some columns
-        fix_field = tb$fix %>%
-            dplyr::rename(
-                chr = CHROM,
-                from = POS,
-                ref = REF,
-                alt = ALT) %>%
-            dplyr::rowwise() %>%
-            dplyr::mutate(
-                from = as.numeric(from),
-                to = from + nchar(alt)) %>%
-            dplyr::ungroup() %>%
-            dplyr::select(chr, from, to, ref, alt, dplyr::everything(), -ChromKey) #-DP
-    }
-
-    if (filter_mutations){
-        filter = c('PASS')
-    } else {
-        filter = fix_field$FILTER %>% unique()
-    }
-
-    calls = lapply(
-        samples_list,
-        function(s){
-            gt_field_s = gt_field %>% dplyr::filter(sample == s)
-
-            fits = list()
-            fits$sample = s
-            mutations = dplyr::bind_cols(fix_field, gt_field_s)
-            ref_alt = lapply(1:nrow(mutations), function(r){
-                retrieve_ref_alt(mutations[r,])
-        })
-
-        ref_alt = ref_alt %>% unlist()
-        mutations$ref_alt = ref_alt
-        mutations = mutations %>%
-            tidyr::separate(ref_alt, into = c('NR', 'NV')) %>%
-            dplyr::mutate(NR = as.integer(NR),
-                        NV = as.integer(NV)) %>%
-            dplyr::mutate(DP = NR+NV) %>%
-            dplyr::mutate(VAF = NV/DP) %>%
-            dplyr::select(chr, from, to, ref, alt, NV, DP, VAF, everything()) %>%
-            dplyr::filter(FILTER %in% filter)
-
-        fits$mutations = mutations
-        fits
-        })
-
-    names(calls) = samples_list
-    samples = c(tumour_id, normal_id)
-    calls = calls[samples]
-    return(calls)
-}
-
-
-parse_Platypus = function(vcf, tumour_id, normal_id, filter_mutations = FALSE){
-    tb = vcfR::vcfR2tidy(vcf)
-
-    gt_field = tb$gt %>%
-        dplyr::mutate(
-            DP = as.numeric(gt_NR),
-            NV = as.numeric(gt_NV),
-            VAF = NV/DP) %>%
-        dplyr::rename(sample = Indiv)
-
-    # Extract each sample
-    samples_list = gt_field$sample %>% unique
-
-    # check if VCF is annotated with VEP
-    if ("CSQ" %in% tb$meta$ID){
-    # VEP specific field extraction
-    # Take CSQ field names and split by |
-
-        vep_field = tb$meta %>%
-                dplyr::filter(ID == "CSQ") %>%
-                dplyr::select(Description) %>%
-                dplyr::pull()
-
-    vep_field = strsplit(vep_field, split = "|", fixed = TRUE)[[1]]
-    vep_field = vep_field[2:length(vep_field)-1]
-
-    # Tranform the fix field by splittig the CSQ and select the columns needed
-    fix_field = tb$fix %>%
-        dplyr::rename(
-            chr = CHROM,
-            from = POS,
-            ref = REF,
-            alt = ALT) %>%
-        dplyr::rowwise() %>%
-        dplyr::mutate(
-            from = as.numeric(from),
-            to = from + nchar(alt)) %>%
-        dplyr::ungroup() %>%
-        dplyr::select(chr, from, to, ref, alt, CSQ, dplyr::everything(),  -ChromKey) %>%
-        tidyr::separate(CSQ, vep_field, sep = "\\|") %>%
-        dplyr::select(chr, from, to, ref, alt, IMPACT, SYMBOL, Gene, dplyr::everything())
-
-    } else {
-        fix_field = tb$fix %>%
-            dplyr::rename(
-            chr = CHROM,
-            from = POS,
-            ref = REF,
-            alt = ALT) %>%
-            dplyr::rowwise() %>%
-            dplyr::mutate(
-            from = as.numeric(from),
-            to = from + nchar(alt)) %>%
-            dplyr::ungroup() %>%
-            dplyr::select(chr, from, to, ref, alt, dplyr::everything(), -ChromKey)
-    }
-
-    if (filter_mutations){
-        filter = c('PASS')
-    } else {
-        filter = fix_field$FILTER %>% unique()
-    }
-
-    calls = lapply(
-        samples_list,
-        function(s){
-        gt_field_s = gt_field %>%
-            dplyr::filter(sample == s)
-
-        if(nrow(fix_field) != nrow(gt_field_s))
-            stop("Mismatch between the VCF fixed fields and the genotypes, will not process this file.")
-
-        fits = list()
-        fits$sample = s
-        fits$mutations = dplyr::bind_cols(fix_field, gt_field_s) %>%
-            dplyr::select(chr, from, to, ref, alt, NV, DP, VAF, dplyr::everything()) %>%
-            dplyr::filter(FILTER %in% filter)
-        fits
-        })
-
-    names(calls) = samples_list
-    samples = c(tumour_id, normal_id)
-    calls = calls[samples]
-    return(calls)
-}
diff --git a/modules/local/viber/getters.R b/modules/local/viber/getters.R
deleted file mode 100644
index 30a926b..0000000
--- a/modules/local/viber/getters.R
+++ /dev/null
@@ -1,82 +0,0 @@
-get_sample <- function(m_cnaqc_obj,
-                        sample,
-                        which_obj) {
-    if (class(m_cnaqc_obj) != "m_cnaqc") {
-        wrong_class_all = class(m_cnaqc_obj)
-
-        cli::cli_abort(
-            c("cnaqc_objs must be a {.field m_cnaqc} object",
-                "x" = "{.var m_cnaqc_obj} is a {.cls {class(m_cnaqc_obj)}}")
-        )
-    }
-
-    consented_obj <- c("shared", "original")
-
-    if ((which_obj %in% consented_obj) == FALSE) {
-
-        cli::cli_abort("{.var which_obj} must be one of {.val shared} or {.val original}")
-
-    }
-
-    # define the element names
-
-    if (which_obj == "original") {
-
-        type = "original_cnaqc_objc"
-
-        # check if the original cnaqc obj exist
-
-        check_or = any(names(m_cnaqc_obj) == type)
-
-        if (check_or == FALSE) {
-            cli::cli_abort(c("mCNAqc object was build without keeping original CNAqc objects"),
-                        "x" = "It is not possible to retrieve the required samples")
-        } else {
-
-            cli::cli_h1("Retrieving original {.cls CNAqc} objects")
-            cnaqc_samples = m_cnaqc_obj[[type]][sample]
-
-        }
-
-    } else {
-
-        type = "cnaqc_obj_new_segmentation"
-
-        cli::cli_h1("Retrieving {.cls CNAqc} objects with the new segmentation")
-
-        cnaqc_samples = m_cnaqc_obj[[type]][sample]
-    }
-
-    return(cnaqc_samples)
-}
-
-get_sample_name <-function(x) {
-
-    if (class(x) == "m_cnaqc") {
-
-        lapply(x$cnaqc_obj_new_segmentation, function(y) {
-
-            y$sample
-
-        }) %>% unlist() %>% unname()
-
-    } else if (class(x) == "cnaqc") {
-
-        x$sample
-
-    } else {
-
-        wrong_class_all = class(x)
-
-        cli::cli_abort(
-            c("must provide a {.field m_cnaqc} object",
-            "x" = "{.var x} is a {.cls {class(x)}}")
-        )
-    }
-}
-
-get_mCNAqc_stats <- function(m_cnaqc_obj){
-    stats = m_cnaqc_obj[["m_cnaqc_stats"]]
-    return(stats)
-}
-

From dd8583ccb6d74091e631731c8b292969ffa67ac6 Mon Sep 17 00:00:00 2001
From: Elena Buscaroli <ele.buscaroli@gmail.com>
Date: Wed, 29 Jan 2025 16:31:58 +0100
Subject: [PATCH 6/8] deleting auxiliary scripts included in templates

---
 modules/local/SparseSignatures/getters.R | 81 ------------------------
 1 file changed, 81 deletions(-)
 delete mode 100644 modules/local/SparseSignatures/getters.R

diff --git a/modules/local/SparseSignatures/getters.R b/modules/local/SparseSignatures/getters.R
deleted file mode 100644
index 18a4313..0000000
--- a/modules/local/SparseSignatures/getters.R
+++ /dev/null
@@ -1,81 +0,0 @@
-get_sample <- function(m_cnaqc_obj,
-                    sample,
-                    which_obj) {
-    if (class(m_cnaqc_obj) != "m_cnaqc") {
-        wrong_class_all = class(m_cnaqc_obj)
-
-        cli::cli_abort(
-            c("cnaqc_objs must be a {.field m_cnaqc} object",
-            "x" = "{.var m_cnaqc_obj} is a {.cls {class(m_cnaqc_obj)}}")
-        )
-    }
-
-    consented_obj <- c("shared", "original")
-
-    if ((which_obj %in% consented_obj) == FALSE) {
-
-        cli::cli_abort("{.var which_obj} must be one of {.val shared} or {.val original}")
-
-    }
-
-    # define the element names
-
-    if (which_obj == "original") {
-
-        type = "original_cnaqc_objc"
-
-        # check if the original cnaqc obj exist
-
-        check_or = any(names(m_cnaqc_obj) == type)
-
-        if (check_or == FALSE) {
-            cli::cli_abort(c("mCNAqc object was build without keeping original CNAqc objects"),
-                        "x" = "It is not possible to retrieve the required samples")
-        } else {
-
-            cli::cli_h1("Retrieving original {.cls CNAqc} objects")
-            cnaqc_samples = m_cnaqc_obj[[type]][sample]
-
-        }
-
-    } else {
-
-        type = "cnaqc_obj_new_segmentation"
-
-        cli::cli_h1("Retrieving {.cls CNAqc} objects with the new segmentation")
-
-        cnaqc_samples = m_cnaqc_obj[[type]][sample]
-    }
-
-    return(cnaqc_samples)
-}
-
-get_sample_name <-function(x) {
-
-    if (class(x) == "m_cnaqc") {
-
-        lapply(x$cnaqc_obj_new_segmentation, function(y) {
-
-            y$sample
-
-        }) %>% unlist() %>% unname()
-
-    } else if (class(x) == "cnaqc") {
-
-        x$sample
-
-    } else {
-
-        wrong_class_all = class(x)
-
-        cli::cli_abort(
-            c("must provide a {.field m_cnaqc} object",
-            "x" = "{.var x} is a {.cls {class(x)}}")
-        )
-    }
-}
-
-get_mCNAqc_stats <- function(m_cnaqc_obj){
-    stats = m_cnaqc_obj[["m_cnaqc_stats"]]
-    return(stats)
-}

From 7a1f850c01fe3875bedae527ce24ad22abf679fb Mon Sep 17 00:00:00 2001
From: Elena Buscaroli <ele.buscaroli@gmail.com>
Date: Wed, 29 Jan 2025 16:32:44 +0100
Subject: [PATCH 7/8] updating docker containers and setting templates

---
 modules/local/CNAqc/main.nf                   |   5 +-
 modules/local/CNAqc2tsv/main.nf               |  45 +----
 modules/local/SigProfiler/SigProfiler/main.nf |   7 +-
 modules/local/SigProfiler/download/main.nf    |   5 +-
 modules/local/SparseSignatures/main.nf        | 188 +-----------------
 modules/local/annotate_driver/main.nf         |   6 +-
 modules/local/cna2CNAqc/main.nf               |  34 +---
 modules/local/ctree/main.nf                   |   4 +-
 modules/local/get_positions/main.nf           |   5 +-
 modules/local/get_positions/main_rel.nf       |   5 +-
 modules/local/join_CNAqc/main.nf              |   4 +-
 modules/local/join_positions/main.nf          |   4 +-
 modules/local/mobsterh/main.nf                | 119 +----------
 modules/local/pyclonevi/main.nf               |  48 +----
 modules/local/tinc/main.nf                    |   4 +-
 modules/local/vcf2CNAqc/main.nf               |  54 +----
 modules/local/viber/main.nf                   | 181 +----------------
 17 files changed, 58 insertions(+), 660 deletions(-)

diff --git a/modules/local/CNAqc/main.nf b/modules/local/CNAqc/main.nf
index 845f401..f624b33 100755
--- a/modules/local/CNAqc/main.nf
+++ b/modules/local/CNAqc/main.nf
@@ -1,7 +1,9 @@
 process CNAQC {
     tag "$meta.id"
     label "process_low"
-    container = 'docker://lvaleriani/cnaqc:version1.0'
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'docker://lvaleriani/cnaqc:version1.0' :
+        'docker.io/lvaleriani/cnaqc:version1.0' }"
 
     input:
     tuple val(meta), path(cna_rds), path(snv_rds)
@@ -13,7 +15,6 @@ process CNAQC {
     tuple val(meta), path("*_qc.pdf"),                                  emit: plot_pdf_qc
     path "versions.yml",                                                emit: versions
 
-
     script:
     def args                                = task.ext.args
     def prefix                              = task.ext.prefix                                       ?: "${meta.id}"
diff --git a/modules/local/CNAqc2tsv/main.nf b/modules/local/CNAqc2tsv/main.nf
index bb1b2cb..6b72a85 100644
--- a/modules/local/CNAqc2tsv/main.nf
+++ b/modules/local/CNAqc2tsv/main.nf
@@ -1,11 +1,9 @@
-//
-// Mutations extraction from mCNAqc
-//
-
 process RDS_PROCESSING {
     tag "$meta.id"
     label "process_single"
-    container = 'docker://lvaleriani/cnaqc:version1.0'
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'docker://lvaleriani/cnaqc:version1.0' :
+        'docker.io/lvaleriani/cnaqc:version1.0' }"
 
     input:
     tuple val(meta), path(rds_join), val(tumour_samples)
@@ -14,41 +12,6 @@ process RDS_PROCESSING {
     tuple val(meta), path("*_joint_table.tsv"), val(tumour_samples),    emit: tsv
     path "versions.yml",                                                emit: versions
 
-
     script:
-    def args    = task.ext.args     ?:  ''
-    def prefix  = task.ext.prefix   ?:  "${meta.id}"
-
-    """
-    #!/usr/bin/env Rscript
-    library(dplyr)
-    library(CNAqc)
-
-    source("$moduleDir/utils.R")
-
-    multi_cnaqc = readRDS(file = "$rds_join")
-    mutations_multisample <- get_sample(m_cnaqc_obj = multi_cnaqc,sample = get_sample_name(multi_cnaqc),
-                                    which_obj = "original")
-    multisample_jointTable <- list()
-
-    for (s in get_sample_name(multi_cnaqc)){
-        purity <- mutations_multisample[[s]][["purity"]]
-        multisample_jointTable[[s]] <- mutations_multisample[[s]][["mutations"]] %>%
-            dplyr::mutate(purity=purity)
-    }
-
-    joint_table <- bind_rows(multisample_jointTable)
-
-    write.table(joint_table, file = paste0("$prefix","_joint_table.tsv"), append = F, quote = F, sep = "\t", row.names = FALSE)
-
-        # version export
-    f <- file("versions.yml","w")
-    dplyr_version <- sessionInfo()\$otherPkgs\$dplyr\$Version
-    cnaqc_version <- sessionInfo()\$otherPkgs\$CNAqc\$Version
-    writeLines(paste0('"', "$task.process", '"', ":"), f)
-    writeLines(paste("    dplyr:", dplyr_version), f)
-    writeLines(paste("    CNAqc:", cnaqc_version), f)
-    close(f)
-
-    """
+    template "main_script.R"
 }
diff --git a/modules/local/SigProfiler/SigProfiler/main.nf b/modules/local/SigProfiler/SigProfiler/main.nf
index 16a949e..96fef8a 100644
--- a/modules/local/SigProfiler/SigProfiler/main.nf
+++ b/modules/local/SigProfiler/SigProfiler/main.nf
@@ -1,7 +1,9 @@
 process SIGPROFILER {
     tag "$meta.id"
     label "process_high"
-    container = 'docker://katiad/sigprofiler:version1.0'
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'docker://katiad/sigprofiler:version1.0' :
+        'docker.io/katiad/sigprofiler:version1.0' }"
 
     input:
         tuple val(meta), path(tsv_list, stageAs: '*.tsv')
@@ -61,7 +63,6 @@ process SIGPROFILER {
     from SigProfilerExtractor import sigpro as sig
     from SigProfilerMatrixGenerator.scripts import SigProfilerMatrixGeneratorFunc as matGen
 
-
     if __name__ == '__main__':
 
         dataset_id = "$meta.dataset"
@@ -72,9 +73,7 @@ process SIGPROFILER {
 
         output_path = os.path.join("output", "SBS", f"{dataset_id}.SBS96.all")
 
-
         # input data preprocessing
-
         def process_tsv_join(tsv_list):
             patients_tsv = tsv_list.split()
             # Read each file into a pandas DataFrame and ensure all columns are of type 'string'
diff --git a/modules/local/SigProfiler/download/main.nf b/modules/local/SigProfiler/download/main.nf
index 7129917..42a7dd4 100644
--- a/modules/local/SigProfiler/download/main.nf
+++ b/modules/local/SigProfiler/download/main.nf
@@ -1,6 +1,8 @@
 process DOWNLOAD_GENOME_SIGPROFILER {
     label "process_single"
-    container = 'docker://katiad/sigprofiler:version1.0'
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'docker://katiad/sigprofiler:version1.0' :
+        'docker.io/katiad/sigprofiler:version1.0' }"
 
     input:
         val(reference_genome) // reference_genome : genome -> for example: GRCh37
@@ -9,7 +11,6 @@ process DOWNLOAD_GENOME_SIGPROFILER {
         path("*"), emit: genome_sigprofiler
         path "versions.yml", emit: versions
 
-
     script:
     """
     SigProfilerMatrixGenerator install $reference_genome -v .
diff --git a/modules/local/SparseSignatures/main.nf b/modules/local/SparseSignatures/main.nf
index 755948c..21a33a2 100644
--- a/modules/local/SparseSignatures/main.nf
+++ b/modules/local/SparseSignatures/main.nf
@@ -1,10 +1,11 @@
 process SPARSE_SIGNATURES {
     tag "$meta.id"
     label "process_low_long"
-    container = 'docker://lvaleriani/sparsesignature:version1.0'
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'docker://lvaleriani/sparsesignature:version1.0' :
+        'docker.io/lvaleriani/sparsesignature:version1.0' }"
 
     input:
-
         tuple val(meta), path(tsv_join,  stageAs: '*.tsv')
 
     output:
@@ -16,186 +17,5 @@ process SPARSE_SIGNATURES {
         path "versions.yml", emit: versions
 
     script:
-    def args                         = task.ext.args
-    def prefix                       = task.ext.prefix                                    ?: "${meta.id}"
-    def K                            = args!='' && args.K                                 ? "$args.K" : ""
-    def background_signature         = args!='' && args.background_signature              ? "$args.background_signature" : ""
-    def beta                         = args!='' && args.beta                              ? "$args.beta" : ""
-    def normalize_counts             = args!='' && args.normalize_counts                  ? "$args.normalize_counts" : ""
-    def nmf_runs                     = args!='' && args.nmf_runs                          ? "$args.nmf_runs" : ""
-    def iterations                   = args!='' && args.iterations                        ? "$args.iterations" : ""
-    def max_iterations_lasso         = args!='' && args.max_iterations_lasso              ? "$args.max_iterations_lasso" : ""
-    def num_processes                = args!='' && args.num_processes                     ? "$args.num_processes" : ""
-    def cross_validation_entries     = args!='' && args.cross_validation_entries          ? "$args.cross_validation_entries" : ""
-    def cross_validation_repetitions = args!='' && args.cross_validation_repetitions      ? "$args.cross_validation_repetitions" : ""
-    def cross_validation_iterations  = args!='' && args.cross_validation_iterations       ? "$args.cross_validation_iterations" : ""
-    def lambda_values_alpha          = args!='' && args.lambda_values_alpha               ? "$args.lambda_values_alpha" : ""
-    def lambda_values_beta           = args!='' && args.lambda_values_beta                ? "$args.lambda_values_beta" : ""
-    def lambda_rate_alpha            = args!='' && args.lambda_rate_alpha                 ? "$args.lambda_rate_alpha" : ""
-    def verbose                      = args!='' && args.verbose                           ? "$args.verbose" : ""
-    def seed                         = args!='' && args.seed                              ? "$args.seed" : ""
-
-    """
-    #!/usr/bin/env Rscript
-
-    library(SparseSignatures)
-    library(ggplot2)
-    library(stringr)
-    library(patchwork)
-    library(dplyr)
-
-    source("$moduleDir/getters.R")
-
-    n_procs = parse(text="$num_processes")
-        if (n_procs == "all"){
-            n_procs = as.double("Inf")
-        } else {
-            n_procs = eval(n_procs)
-    }
-
-
-    patients_tsv = strsplit("$tsv_join", " ")[[1]]
-    tables = lapply(patients_tsv, FUN = function(p_table){
-        read.delim(p_table, sep = "\\t", header=T) %>%
-            mutate(across(everything(), as.character))
-    }
-    )
-    multisample_table = dplyr::bind_rows(tables)
-
-
-    #Extract input data information
-    input_data <- multisample_table[,c("Indiv","chr","from","to","ref","alt")]
-    input_data <- setNames(input_data, c("sample","chrom","start","end","ref","alt"))
-    input_data[["end"]] <- input_data[["start"]]
-    input_data <- input_data %>% mutate(start = as.integer(start), end = as.integer(end))
-
-    #Generate the patient vs mutation count matrix from mutation data
-    #Load reference human-genome specification.
-    #The user must select, among the available choices, the reference genome consistent with the mutation dataset.
-
-    load_genome <- function(genome, input_data) {
-        if (genome == "GRCh37") {
-            library(BSgenome.Hsapiens.1000genomes.hs37d5)
-            bsg <- BSgenome.Hsapiens.1000genomes.hs37d5::hs37d5
-            input_data[["chrom"]] <- substring(input_data[["chrom"]], 4, 5)
-
-        } else if (genome == "GRCh38") {
-            library(BSgenome.Hsapiens.UCSC.hg38)
-            bsg <- BSgenome.Hsapiens.UCSC.hg38
-
-            # Leave 'chrom' unchanged for GRCh38
-        }
-        return(list(bsg = bsg, input_data = input_data))
-    }
-
-    data_list = load_genome("$params.genome", input_data)
-    bsg <- data_list[["bsg"]]
-    input_data <- data_list[["input_data"]]
-
-
-    mut_counts = SparseSignatures::import.trinucleotides.counts(data=input_data, reference=bsg)
-
-    #Load a reference SBS5 background signature from COSMIC
-    data(background)
-
-    #Estimate the initial values of beta
-    starting_betas = SparseSignatures::startingBetaEstimation(x = mut_counts,
-                                                            K = eval(parse(text="$K")),
-                                                            background_signature  = background)
-
-
-    #Find the optimal number of signatures and sparsity level: rely on cross-validation
-    # higher number of CV repetitions corresponds to more accurate parameter estimates
-
-    cv_out = SparseSignatures::nmfLassoCV(
-        x = mut_counts,
-        K = eval(parse(text="$K")),
-        starting_beta = starting_betas,
-        background_signature = background,
-        normalize_counts = as.logical("$normalize_counts"),
-        nmf_runs = as.integer("$nmf_runs"),
-        lambda_values_alpha = eval(parse(text="$lambda_values_alpha")),
-        lambda_values_beta = eval(parse(text="$lambda_values_beta")),
-        cross_validation_entries = as.numeric("$cross_validation_entries"),
-        cross_validation_iterations = as.integer("$cross_validation_iterations"),
-        cross_validation_repetitions = as.integer("$cross_validation_repetitions"),
-        iterations = as.integer("$iterations"),
-        max_iterations_lasso = as.integer("$max_iterations_lasso"),
-        num_processes = n_procs,
-        verbose = as.logical("$verbose"),
-        seed = as.integer("$seed")
-    )
-
-
-    #Analyze the mean squared error results averaging over cross-validation repetitions
-    cv_mses <- cv_out[["grid_search_mse"]][1,,]
-    cv_means_mse <- matrix(sapply(cv_mses, FUN = mean),
-                        nrow = dim(cv_mses)[1]
-    )
-
-    dimnames(cv_means_mse) <- dimnames(cv_mses)
-
-    #Find the combination of parameters that yields the lowest MSE
-    min_ii <- which(cv_means_mse == min(cv_means_mse, na.rm = TRUE), arr.ind = TRUE)
-    min_Lambda_beta <- rownames(cv_means_mse)[min_ii[1]]
-    min_Lambda_beta <- as.numeric(gsub("_Lambda_Beta", "", min_Lambda_beta))
-    min_K <- colnames(cv_means_mse)[min_ii[2]]
-    min_K <- as.numeric(gsub("_Signatures", "", min_K))
-    best_params_config <- data.frame(min_K, min_Lambda_beta)
-
-    saveRDS(object = cv_means_mse, file = paste0("$prefix", "_cv_means_mse.rds"))
-    saveRDS(object = best_params_config, file = paste0("$prefix", "_best_params_config.rds"))
-
-    #Discovering the signatures within the dataset: NMF Lasso
-    #Compute the signatures for the best configuration.
-
-    nmf_Lasso_out = SparseSignatures::nmfLasso(
-        x = mut_counts,
-        K = min_K,
-        beta = eval(parse(text="$beta")),
-        background_signature = background,
-        normalize_counts = as.logical("$normalize_counts"),
-        lambda_rate_alpha = eval(parse(text="$lambda_rate_alpha")),
-        lambda_rate_beta = min_Lambda_beta,
-        iterations = as.integer("$iterations"),
-        max_iterations_lasso = as.integer("$max_iterations_lasso"),
-        verbose = as.logical("$verbose")
-    )
-
-    saveRDS(object = nmf_Lasso_out, file =  paste0("$prefix", "_nmf_Lasso_out.rds"))
-
-    #Signature visualization
-    signatures = nmf_Lasso_out\$beta
-    plot_signatures <- SparseSignatures::signatures.plot(beta=signatures, xlabels=FALSE)
-
-    plot_exposure = nmf_Lasso_out\$alpha %>%
-        as.data.frame() %>%
-        tibble::rownames_to_column(var="PatientID") %>%
-        tidyr::pivot_longer(cols=!"PatientID", names_to="Signatures", values_to="Exposures") %>%
-
-        ggplot() +
-        geom_bar(aes(x=PatientID, y=Exposures, fill=Signatures),
-                position="stack", stat="identity") +
-        theme(axis.text.x=element_text(angle=90,hjust=1),
-                panel.background=element_blank(),
-                axis.line=element_line(colour="black"))
-
-    plt_all = patchwork::wrap_plots(plot_exposure, plot_signatures, ncol=2) + patchwork::plot_annotation(title = "$meta.id")
-    ggplot2::ggsave(plot = plt_all, filename = paste0("$prefix", "_plot_all.pdf"), width = 210, height = 297, units="mm", dpi = 200)
-    saveRDS(object = plt_all, file = paste0("$prefix", "_plot_all.rds"))
-
-    # version export
-    f <- file("versions.yml","w")
-    SparseSignatures_version <- sessionInfo()\$otherPkgs\$SparseSignatures\$Version
-    dplyr_version <- sessionInfo()\$otherPkgs\$dplyr\$Version
-    ggplot2_version <- sessionInfo()\$otherPkgs\$ggplot2\$Version
-    patchwork_version <- sessionInfo()\$otherPkgs\$patchwork\$Version
-    writeLines(paste0('"', "$task.process", '"', ":"), f)
-    writeLines(paste("    SparseSignatures:", SparseSignatures_version), f)
-    writeLines(paste("    dplyr:", dplyr_version), f)
-    writeLines(paste("    ggplot2:", ggplot2_version), f)
-    writeLines(paste("    patchwork:", patchwork_version), f)
-    close(f)
-
-    """
+    template "main_script.R"
 }
diff --git a/modules/local/annotate_driver/main.nf b/modules/local/annotate_driver/main.nf
index e58f58f..9a3b804 100644
--- a/modules/local/annotate_driver/main.nf
+++ b/modules/local/annotate_driver/main.nf
@@ -1,8 +1,9 @@
-
 process ANNOTATE_DRIVER {
     tag "$meta.id"
     label "process_single"
-    container = 'docker://lvaleriani/cnaqc:version1.0'
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'docker://lvaleriani/cnaqc:version1.0' :
+        'docker.io/lvaleriani/cnaqc:version1.0' }"
 
     input:
     tuple val(meta), path(rds), path(driver_list)
@@ -11,7 +12,6 @@ process ANNOTATE_DRIVER {
     tuple val(meta), path("*.rds"),     emit: rds
     path "versions.yml",                emit: versions
 
-
     script:
     def args    =   task.ext.args   ?: ''
     def prefix  =   task.ext.prefix ?: "${meta.id}"
diff --git a/modules/local/cna2CNAqc/main.nf b/modules/local/cna2CNAqc/main.nf
index e6f009f..2101249 100755
--- a/modules/local/cna2CNAqc/main.nf
+++ b/modules/local/cna2CNAqc/main.nf
@@ -1,7 +1,9 @@
 process CNA_PROCESSING {
     tag "$meta.id"
     label "process_single"
-    container "docker://lvaleriani/cnaqc:version1.0"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'docker://lvaleriani/cnaqc:version1.0' :
+        'docker.io/lvaleriani/cnaqc:version1.0' }"
 
     input:
     tuple val(meta), path(cna_segs), path(cna_extra)
@@ -14,33 +16,5 @@ process CNA_PROCESSING {
     task.ext.when == null || task.ext.when
 
     script:
-    def args    = task.ext.args     ?:  ''
-    def prefix  = task.ext.prefix   ?:  "${meta.id}"
-
-    """
-    #!/usr/bin/env Rscript
-    source(paste0("$moduleDir", '/parser_CNA.R'))
-
-    if ("$meta.cna_caller" == 'sequenza'){
-    CNA = parse_Sequenza(segments = "$cna_segs", extra = "$cna_extra")
-
-    } else if ("$meta.cna_caller" == 'ASCAT'){
-    CNA = parse_ASCAT(segments = "$cna_segs", extra = "$cna_extra")
-
-    } else {
-    stop('Copy Number Caller not supported.')
-    }
-
-    saveRDS(object = CNA, file = paste0("$prefix", "_cna.rds"))
-
-    # version export
-    f <- file("versions.yml","w")
-    readr_version <- sessionInfo()\$otherPkgs\$readr\$Version
-    dplyr_version <- sessionInfo()\$otherPkgs\$dplyr\$Version
-    writeLines(paste0('"', "$task.process", '"', ":"), f)
-    writeLines(paste("    readr:", readr_version), f)
-    writeLines(paste("    dplyr:", dplyr_version), f)
-    close(f)
-
-    """
+    template "main_script.R"
 }
diff --git a/modules/local/ctree/main.nf b/modules/local/ctree/main.nf
index d9cfba5..080d8f1 100644
--- a/modules/local/ctree/main.nf
+++ b/modules/local/ctree/main.nf
@@ -1,7 +1,9 @@
 process CTREE {
     tag "$meta.id"
     label "process_low"
-    container = 'docker://elenabuscaroli/ctree:version1.1'
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'docker://elenabuscaroli/ctree:version1.1' :
+        'docker.io/elenabuscaroli/ctree:version1.1' }"
 
     input:
         tuple val(meta), path(ctree_input)
diff --git a/modules/local/get_positions/main.nf b/modules/local/get_positions/main.nf
index 097fe6e..d40e3ef 100644
--- a/modules/local/get_positions/main.nf
+++ b/modules/local/get_positions/main.nf
@@ -1,7 +1,9 @@
 process GET_POSITIONS_ALL {
     tag "$meta.id"
     label "process_single"
-    container = 'docker://lvaleriani/cnaqc:version1.0'
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'docker://lvaleriani/cnaqc:version1.0' :
+        'docker.io/lvaleriani/cnaqc:version1.0' }"
 
     input:
     tuple val(meta), path(rds_list, stageAs: '*.rds')
@@ -10,7 +12,6 @@ process GET_POSITIONS_ALL {
     tuple val(meta), path("*_all_positions.rds"),   emit: all_pos
     path "versions.yml",                            emit: versions
 
-
     script:
     def args    = task.ext.args     ?:  ''
     def prefix  = task.ext.prefix   ?:  "${meta.id}"
diff --git a/modules/local/get_positions/main_rel.nf b/modules/local/get_positions/main_rel.nf
index 833c9df..6445e94 100644
--- a/modules/local/get_positions/main_rel.nf
+++ b/modules/local/get_positions/main_rel.nf
@@ -1,7 +1,9 @@
 process GET_POSITIONS_REL {
     tag "$meta.id"
     label "process_single"
-    container = 'docker://lvaleriani/cnaqc:dev1'
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'docker://lvaleriani/cnaqc:version1.0' :
+        'docker.io/lvaleriani/cnaqc:version1.0' }"
 
     input:
     tuple val(meta), path(rds), path(all_pos)
@@ -10,7 +12,6 @@ process GET_POSITIONS_REL {
     tuple val(meta), path("*_positions_missing"),   emit: bed
     path "versions.yml",                            emit: versions
 
-
     script:
     def args    = task.ext.args     ?:  ''
     def prefix  = task.ext.prefix   ?:  "${meta.id}"
diff --git a/modules/local/join_CNAqc/main.nf b/modules/local/join_CNAqc/main.nf
index 3b80482..4223f54 100755
--- a/modules/local/join_CNAqc/main.nf
+++ b/modules/local/join_CNAqc/main.nf
@@ -1,7 +1,9 @@
 process JOIN_CNAQC {
     tag "$meta.id"
     label "process_low"
-    container = 'docker://lvaleriani/cnaqc:version1.0'
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'docker://lvaleriani/cnaqc:version1.0' :
+        'docker.io/lvaleriani/cnaqc:version1.0' }"
 
     input:
     tuple val(meta), path(rds_list, stageAs: '*.rds'), val(tumour_samples)
diff --git a/modules/local/join_positions/main.nf b/modules/local/join_positions/main.nf
index 60d23fd..5306e55 100644
--- a/modules/local/join_positions/main.nf
+++ b/modules/local/join_positions/main.nf
@@ -1,7 +1,9 @@
 process JOIN_POSITIONS {
     tag "$meta.id"
     label "process_single"
-    container = 'docker://lvaleriani/cnaqc:version1.0'
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'docker://lvaleriani/cnaqc:version1.0' :
+        'docker.io/lvaleriani/cnaqc:version1.0' }"
 
     input:
     tuple val(meta), path(rds), path(vcf_pileup), path(positions)
diff --git a/modules/local/mobsterh/main.nf b/modules/local/mobsterh/main.nf
index 8352053..aca453a 100644
--- a/modules/local/mobsterh/main.nf
+++ b/modules/local/mobsterh/main.nf
@@ -1,7 +1,9 @@
 process MOBSTERh {
     tag "$meta.id"
     label "process_single"
-    container = 'docker://elenabuscaroli/mobster:version1.0'
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'docker://elenabuscaroli/mobster:version1.0' :
+        'docker.io/elenabuscaroli/mobster:version1.0' }"
 
     input:
         tuple val(meta), path(rds_join) // rds from JOIN_CNAQC
@@ -16,118 +18,5 @@ process MOBSTERh {
         path "versions.yml", emit: versions
 
     script:
-    def args = task.ext.args ?: ""
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    def K = args!="" && args.K ? "$args.K" : ""
-    def samples = args!="" && args.samples ? "$args.samples" : ""
-    def init = args!="" && args.init ? "$args.init" : ""
-    def tail = args!="" && args.tail ? "$args.tail" : ""
-    def epsilon = args!="" && args.epsilon ? "$args.epsilon" : ""
-    def maxIter = args!="" && args.maxIter ? "$args.maxIter" : ""
-    def fit_type = args!="" && args.fit_type ? "$args.fit_type" : ""
-    def seed = args!="" && args.seed ? "$args.seed" : ""
-    def model_selection = args!="" && args.model_selection ? "$args.model_selection" : ""
-    def trace = args!="" && args.trace ? "$args.trace" : ""
-    def parallel = args!="" && args.parallel ? "$args.parallel" : ""
-    def pi_cutoff = args!="" && args.pi_cutoff ? "$args.pi_cutoff" : ""
-    def n_cutoff = args!="" && args.n_cutoff ? "$args.n_cutoff" : ""
-    def auto_setup = args!="" && args.auto_setup ? "$args.auto_setup" : ""
-    def silent = args!="" && args.silent ? "$args.silent" : ""
-
-    """
-    #!/usr/bin/env Rscript
-
-    # Sys.setenv("VROOM_CONNECTION_SIZE"=99999999)
-    library(CNAqc)
-    library(mobster)
-    library(dplyr)
-    library(ggplot2)
-    source("$moduleDir/getters.R")
-    patientID = description = "$meta.patient"
-    samples = strsplit(x = "$meta.tumour_sample", ",") %>% unlist()  # list of samples
-
-    ## read mCNAqc object
-    if ( grepl(".rds\$", tolower("$rds_join")) ) {
-        obj = readRDS("$rds_join")
-        if (class(obj) == "m_cnaqc") {
-            original = obj %>% get_sample(sample=samples, which_obj="original")
-            input_table = lapply(names(original),
-                                    function(sample_name)
-                                    original[[sample_name]] %>%
-                                        # keep only mutations on the diploid karyotype
-                                        CNAqc::subset_by_segment_karyotype("1:1") %>%
-                                        CNAqc::Mutations() %>%
-                                        dplyr::mutate(sample_id=sample_name)
-                                    ) %>% dplyr::bind_rows()
-        } else {
-            cli::cli_abort("Object of class {class($rds_join)} not supported.")
-        }
-    } else {
-        input_table = read.csv("$rds_join")
-    }
-
-    ## Function to run a single mobster fit
-    run_mobster_fit = function(joint_table, descr) {
-        # get input table for the single patient
-        inp_tb = joint_table %>%
-            dplyr::filter(VAF < 1) %>%
-            # dplyr::mutate(VAF=replace(VAF, VAF==0, 1e-7)) %>%
-            dplyr::filter(VAF!=0) %>%
-            dplyr::filter(karyotype=="1:1")
-            # dplyr::rename(variantID=gene) %>%
-            # dplyr::rename(is.driver=is_driver) %>%
-            # dplyr::rename(tumour_content=purity) %>%
-            # dplyr::rename(is_driver=is.driver)
-            # dplyr::rename(is_driver=is.driver, driver_label=variantID)
-
-    mobster_fit(x = inp_tb,
-                K = eval(parse(text="$K")),
-                samples = as.integer("$samples"),
-                init = "$init",
-                tail = eval(parse(text="$tail")),
-                epsilon = as.numeric("$epsilon"),
-                maxIter = as.integer("$maxIter"),
-                fit.type = "$fit_type",
-                seed = as.integer("$seed"),
-                model.selection = "$model_selection",
-                trace = as.logical("$trace"),
-                parallel = as.logical("$parallel"),
-                pi_cutoff = as.numeric("$pi_cutoff"),
-                N_cutoff = as.integer("$n_cutoff"),
-                auto_setup = eval(parse(text="$auto_setup")),
-                silent = as.logical("$silent"),
-                description = descr)
-    }
-
-    lapply(samples, function(sample_name) {
-    #  dir.create(outDir_sample, recursive = TRUE)
-
-        fit = run_mobster_fit(joint_table=input_table %>% dplyr::filter(sample_id == !!sample_name),
-                            descr=description)
-
-        best_fit = fit[["best"]]
-        plot_fit = plot(best_fit)
-
-        saveRDS(object=fit, file=paste0("$prefix", "_mobsterh_st_fit.rds"))
-        saveRDS(object=best_fit, file=paste0("$prefix", "_mobsterh_st_best_fit.rds"))
-        saveRDS(object=plot_fit, file=paste0("$prefix", "_mobsterh_st_best_fit_plots.rds"))
-
-        # save report plots
-        report_fig = mobster::plot_model_selection(fit)
-        saveRDS(report_fig, file=paste0("$prefix", "_REPORT_plots_mobster.rds"))
-        ggplot2::ggsave(plot=report_fig, filename=paste0("$prefix", "_REPORT_plots_mobster.pdf"), height=210, width=210, units="mm", dpi = 200)
-        ggplot2::ggsave(plot=report_fig, filename=paste0("$prefix", "_REPORT_plots_mobster.png"), height=210, width=210, units="mm", dpi = 200)
-    })
-
-
-    # version export
-    f = file("versions.yml","w")
-    cnaqc_version = sessionInfo()\$otherPkgs\$CNAqc\$Version
-    mobster_version = sessionInfo()\$otherPkgs\$mobster\$Version
-    writeLines(paste0('"', "$task.process", '"', ":"), f)
-    writeLines(paste("    CNAqc:", cnaqc_version), f)
-    writeLines(paste("    mobster:", mobster_version), f)
-    close(f)
-    """
-
+    template "main_script.R"
 }
diff --git a/modules/local/pyclonevi/main.nf b/modules/local/pyclonevi/main.nf
index 1e77925..de95471 100755
--- a/modules/local/pyclonevi/main.nf
+++ b/modules/local/pyclonevi/main.nf
@@ -1,7 +1,9 @@
 process PYCLONEVI {
     tag "$meta.id"
     label "process_low"
-    container = 'https://depot.galaxyproject.org/singularity/pyclone-vi%3A0.1.3--pyhca03a8a_0'
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/pyclone-vi%3A0.1.3--pyhca03a8a_0' :
+        'docker.io/blcdsdockerregistry/pyclone-vi:0.1.2' }"
 
     input:
         tuple val(meta), path(rds_join), val(tumour_samples)
@@ -14,47 +16,5 @@ process PYCLONEVI {
         path "versions.yml", emit: versions
 
     script:
-    def args = task.ext.args ?: ''
-    // def prefix = task.ext.prefix ?:"${meta.id}_remove_tail_$args.remove_tail"
-    def prefix = task.ext.prefix ?:"${meta.id}"
-    def n_cluster_arg = args.n_cluster ? "$args.n_cluster" : ""
-    def density_arg = args.density ? "$args.density" : ""
-    def n_grid_point_arg = args.n_grid_point ? "$args.n_grid_point" : ""
-    def n_restarts_arg = args.n_restarts ? "$args.n_restarts" : ""
-    sampleID_string = tumour_samples.join(" ")
-
-    """
-    # format the input table in order to be pyclone compliant
-    python3 $moduleDir/pyclone_utils.py create_pyclone_input $rds_join $meta.patient ${prefix}_pyclone_input_all_samples.tsv
-
-    colnames=\$(head -n 1 ${prefix}_pyclone_input_all_samples.tsv)
-
-    column_number=\$(echo -e "\$colnames" | awk -v col_name="sample_id" 'BEGIN { FS = "\t" } {
-        for (i = 1; i <= NF; i++) {
-            if (\$i == col_name) {
-                print i
-                exit
-            }
-        }
-        exit 1  # Exit with error if column not found
-    }')
-
-    echo -e "\$colnames" > ${prefix}_pyclone_input.tsv
-    for i in $sampleID_string;
-    do awk -F "\t" '\$'\$column_number' == "'"\$i"'"' ${prefix}_pyclone_input_all_samples.tsv >> ${prefix}_pyclone_input.tsv;
-    done
-
-    pyclone-vi fit -i ${prefix}_pyclone_input.tsv -o ${prefix}_all_fits.h5 -c $n_cluster_arg -d $density_arg --num-grid-points $n_grid_point_arg --num-restarts $n_restarts_arg
-    pyclone-vi write-results-file -i ${prefix}_all_fits.h5 -o ${prefix}_best_fit.txt
-
-    python3 $moduleDir/pyclone_ctree.py --joint ${prefix}_pyclone_input.tsv --best_fit ${prefix}_best_fit.txt --ctree_input ${prefix}_cluster_table.csv
-
-
-    VERSION=\$(pip show pyclone-vi | grep Version | awk '{print \$NF}')
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        pyclone-vi: \$VERSION
-    END_VERSIONS
-    """
-
+    template "main_script.py"
 }
diff --git a/modules/local/tinc/main.nf b/modules/local/tinc/main.nf
index cf63ad5..bbbc6ba 100755
--- a/modules/local/tinc/main.nf
+++ b/modules/local/tinc/main.nf
@@ -1,7 +1,9 @@
 process TINC {
     tag "$meta.id"
     label "process_single"
-    container 'docker://vvvirgy/tinc:v2' // define the running container
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'docker://vvvirgy/tinc:v2' :
+        'docker.io/vvvirgy/tinc:v2' }"
     // conda "${moduleDir}/environment.yml"
 
     input:
diff --git a/modules/local/vcf2CNAqc/main.nf b/modules/local/vcf2CNAqc/main.nf
index f9bec2f..8f533e8 100755
--- a/modules/local/vcf2CNAqc/main.nf
+++ b/modules/local/vcf2CNAqc/main.nf
@@ -1,7 +1,9 @@
 process VCF_PROCESSING {
     tag "$meta.id"
     label "process_single"
-    container "docker://lvaleriani/cnaqc:version1.0"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'docker://lvaleriani/cnaqc:version1.0' :
+        'docker.io/lvaleriani/cnaqc:version1.0' }"
 
     input:
     tuple val(meta), path(vcf), path(tbi)
@@ -14,54 +16,6 @@ process VCF_PROCESSING {
     task.ext.when == null || task.ext.when
 
     script:
-    def args                = task.ext.args         ?: ''
-    def prefix                  = task.ext.prefix   ?: "${meta.id}"
-    def filter_mutations    = args.filter_mutations ? "$args.filter_mutations": "NULL"
-
-    """
-    #!/usr/bin/env Rscript
-
-    library(dplyr)
-    library(tidyr)
-    library(vcfR)
-
-    source(paste0("$moduleDir", '/parser_vcf.R'))
-
-    # Read vcf file
-    vcf = vcfR::read.vcfR("$vcf")
-
-    # Check from which caller the .vcf has been produced
-    source = vcfR::queryMETA(vcf, element = 'source')[[1]]
-
-    if (TRUE %in% grepl(pattern = 'Mutect', x = source)){
-        calls = parse_Mutect(vcf, tumour_id = "$meta.tumour_sample", normal_id = "$meta.normal_sample", filter_mutations = as.logical("$filter_mutations"))
-
-    } else if (TRUE %in% grepl(pattern = 'strelka', x = source)){
-        calls = parse_Strelka(vcf, tumour_id = "$meta.tumour_sample", normal_id = "$meta.normal_sample", filter_mutations = as.logical("$filter_mutations"))
-
-    } else if (TRUE %in% grepl(pattern = 'Platypus', x = source)){
-        calls = parse_Platypus(vcf, tumour_id = "$meta.tumour_sample", normal_id = "$meta.normal_sample", filter_mutations = as.logical("$filter_mutations"))
-
-    } else if (TRUE %in% grepl(pattern = 'freeBayes', x = source)){
-        calls = parse_FreeBayes(vcf, tumour_id = "$meta.tumour_sample", normal_id = "$meta.normal_sample", filter_mutations = as.logical("$filter_mutations"))
-
-    } else {
-        stop('Variant Caller not supported.')
-    }
-
-    saveRDS(object = calls, file = paste0("$prefix", "_snv.rds"))
-
-    # version export
-    f <- file("versions.yml","w")
-    dplyr_version <- sessionInfo()\$otherPkgs\$dplyr\$Version
-    tidyr_version <- sessionInfo()\$otherPkgs\$tidyr\$Version
-    vcfR_version <- sessionInfo()\$otherPkgs\$vcfR\$Version
-    writeLines(paste0('"', "$task.process", '"', ":"), f)
-    writeLines(paste("    dplyr:", dplyr_version), f)
-    writeLines(paste("    tidyr:", tidyr_version), f)
-    writeLines(paste("    vcfR:", vcfR_version), f)
-    close(f)
-
-    """
+    template "main_script.R"
 }
 
diff --git a/modules/local/viber/main.nf b/modules/local/viber/main.nf
index 66a07e5..6d1153f 100644
--- a/modules/local/viber/main.nf
+++ b/modules/local/viber/main.nf
@@ -1,7 +1,9 @@
 process VIBER {
     tag "$meta.id"
     label "process_single"
-    container = 'docker://elenabuscaroli/viber:version0.1'
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'docker://elenabuscaroli/viber:version0.1' :
+        'docker.io/elenabuscaroli/viber:version0.1' }"
 
     input:
         tuple val(meta), path(rds_join), val(tumour_samples) //rds from either JOIN_CNAQC or JOIN_FIT, should be always grouped
@@ -17,26 +19,7 @@ process VIBER {
         path "versions.yml", emit: versions
 
     script:
-    // viber fit params
-    def args = task.ext.args ?: ""
-    def prefix = task.ext.prefix ?:"${meta.id}"
-    def K = args!="" && args.K ? "$args.K" : ""
-    def alpha_0 = args!="" && args.alpha_0 ? "$args.alpha_0" : ""
-    def a_0 = args!="" && args.a_0 ? "$args.a_0" : ""
-    def b_0 = args!="" && args.b_0 ? "$args.b_0" : ""
-    def maxIter = args!="" && args.maxIter ? "$args.maxIter" : ""
-    def epsilon_conv = args!="" && args.epsilon_conv ? "$args.epsilon_conv" : ""
-    def samples = args!="" && args.samples ? "$args.samples" : ""
-    def q_init = args!="" && args.q_init ? "$args.q_init" : ""
-    def trace = args!="" && args.trace ? "$args.trace" : ""
-    // viber filter params
-    def binomial_cutoff = args!="" && args.binomial_cutoff ? "$args.binomial_cutoff" : ""
-    def dimensions_cutoff = args!="" && args.dimensions_cutoff ? "$args.dimensions_cutoff" : ""
-    def pi_cutoff = args!="" && args.pi_cutoff ? "$args.pi_cutoff" : ""
-    def re_assign = args!="" && args.re_assign ? "$args.re_assign" : ""
-    // def mode = args.mode ?  "$args.mode" : ""
     def n_samples = tumour_samples.size()
-
     if (n_samples==1) {
         plot1 = "viber_best_st_mixing_plots.rds"
         plot2 = "viber_best_st_heuristic_mixing_plots.rds"
@@ -45,161 +28,5 @@ process VIBER {
         plot2 = "viber_best_st_heuristic_fit_plots.rds"
     }
 
-    """
-    #!/usr/bin/env Rscript
-
-    # Sys.setenv("VROOM_CONNECTION_SIZE"=99999999)
-
-    library(VIBER)
-    library(dplyr)
-    library(tidyverse)
-    library(ggplot2)
-    library(CNAqc)
-    source("$moduleDir/getters.R")
-
-    patientID = "$meta.patient"
-    samples = substr("$tumour_samples", 2, nchar("$tumour_samples")-1)
-    samples = strsplit(samples, ", ")[[1]]
-    print("$tumour_samples")
-    print("$rds_join")
-    print(samples)
-
-    if ( grepl(".rds\$", tolower("$rds_join")) ) {
-        input_obj = readRDS("$rds_join")
-        if (class(input_obj) == "m_cnaqc") {
-            shared = input_obj %>% get_sample(sample=samples, which_obj="shared")
-            joint_table = lapply(names(shared),
-                            function(sample_name)
-                            shared[[sample_name]] %>%
-                                # keep only mutations on the diploid karyotype
-                                CNAqc::subset_by_segment_karyotype("1:1") %>%
-                                CNAqc::Mutations() %>%
-                                dplyr::mutate(sample_id=sample_name)
-                            ) %>% dplyr::bind_rows()
-    } else {
-        cli::cli_alert_warning("Object of class {class(input_obj)} not supported.")
-        return()
-        }
-    }
-
-    print("Subset joint done")
-
-    ## TODO : add drivers to `input_tab`
-
-    ## Read input joint table
-    input_tab = joint_table %>%
-    dplyr::mutate(VAF=replace(VAF, VAF==0, 1e-7))
-
-    ## Convert the input table into longer format
-    reads_data = input_tab %>%
-    dplyr::select(chr, from, ref, alt, NV, DP, VAF, sample_id) %>%
-    tidyr::pivot_wider(names_from="sample_id",
-                        values_from=c("NV","DP","VAF"), names_sep=".")
-
-    ## Extract DP (depth)
-    dp = reads_data %>%
-    # dplyr::filter(mutation_id %in% non_tail) %>% ## this step should be managed before by other module
-    dplyr::select(dplyr::starts_with("DP")) %>%
-    dplyr::mutate(dplyr::across(.cols=dplyr::everything(),
-                                .fns=function(x) replace(x, is.na(x), 0))) %>%
-    dplyr::rename_all(function(x) stringr::str_remove_all(x,"DP."))
-
-    ## Extract NV (alt_counts)
-    nv = reads_data %>%
-    # dplyr::filter(mutation_id %in% non_tail) %>% ## this step should be managed before by other module
-    dplyr::select(dplyr::starts_with("NV")) %>%
-    dplyr::mutate(dplyr::across(.cols=dplyr::everything(),
-                                .fns=function(x) replace(x, is.na(x), 0))) %>%
-    dplyr::rename_all(function(x) stringr::str_remove_all(x,"NV."))
-
-    # Standard fit
-    viber_K = eval(parse(text="$K"))
-    viber_K[which.min(viber_K)] = 2
-    st_fit = VIBER::variational_fit(nv, dp,
-                                    K=viber_K,
-                                    data=reads_data,
-                                    # %>% dplyr::filter(mutation_id %in% non_tail)
-                                    alpha_0=as.numeric("$alpha_0"),
-                                    a_0=as.integer("$a_0"),
-                                    b_0=as.integer("$b_0"),
-                                    max_iter=as.integer("$maxIter"),
-                                    epsilon_conv=as.numeric("$epsilon_conv"),
-                                    samples=as.integer("$samples"),
-                                    q_init="$q_init",
-                                    trace=as.logical("$trace"),
-                                    description=""
-                                    )
-
-    best_fit = best_fit_heuristic = st_fit
-
-    # If all clusters are removed -> keep the origianl best fit
-    tryCatch(expr = {
-    # Apply the heuristic
-    best_fit_heuristic = VIBER::choose_clusters(st_fit,
-        binomial_cutoff=as.numeric("$binomial_cutoff"),
-        dimensions_cutoff=as.integer("$dimensions_cutoff"),
-        pi_cutoff=as.numeric("$pi_cutoff"),
-        re_assign=as.logical("$re_assign")
-        )
-    }, error = function(e) {
-    print(e)
-    best_fit_heuristic <<- st_fit
-    } )
-
-    # Save fits
-    saveRDS(best_fit, file=paste0("$prefix", "_viber_best_st_fit.rds"))
-    saveRDS(best_fit_heuristic, file = paste0("$prefix", "_viber_best_st_heuristic_fit.rds"))
-
-    # Save plots
-    if ("$n_samples" >1) { #mutlisample mode on
-    print("multisample mode on")
-    plot_fit = plot(best_fit)
-    plot_fit_heuristic = plot(best_fit_heuristic)
-
-    saveRDS(plot_fit, file=paste0("$prefix", "_viber_best_st_fit_plots.rds"))
-    saveRDS(plot_fit_heuristic, file=paste0("$prefix", "_viber_best_st_heuristic_fit_plots.rds"))
-    } else {
-    plot_fit_mixing = plot_mixing_proportions(best_fit)
-    plot_fit_mixing_heuristic = plot_mixing_proportions(best_fit_heuristic)
-
-    saveRDS(plot_fit_mixing, file=paste0("$prefix", "_viber_best_st_mixing_plots.rds"))
-    saveRDS(plot_fit_mixing_heuristic, file=paste0("$prefix", "_viber_best_st_heuristic_mixing_plots.rds"))
-    }
-
-    # Save report plot
-    n_samples = ncol(best_fit[["x"]]) - 1
-    marginals = multivariate = ggplot()
-
-    try(expr = {marginals <<- VIBER::plot_1D(best_fit)} )
-    #try(expr = {multivariate = plot(best_fit) %>% patchwork::wrap_plots()} )
-    #top_p = patchwork::wrap_plots(marginals, multivariate, design=ifelse(n_samples>2, "ABB", "AAB"))
-
-    try(expr = {multivariate = plot(best_fit)})
-    try(expr = {multivariate = ggpubr::ggarrange(plotlist = multivariate)})
-    top_p = ggpubr::ggarrange(plotlist = list(marginals, multivariate), widths=ifelse(n_samples>2, c(1,2), c(2,1)))
-
-    mix_p = VIBER::plot_mixing_proportions(best_fit)
-    binom = VIBER::plot_peaks(best_fit)
-    elbo = VIBER::plot_ELBO(best_fit)
-    #bottom_p = patchwork::wrap_plots(mix_p, binom, elbo, design="ABBBC")
-    bottom_p = ggpubr::ggarrange(plotlist = list(mix_p, binom, elbo), widths = c(1,2,1), nrow = 1)
-
-    #report_fig = patchwork::wrap_plots(top_p, bottom_p, design=ifelse(n_samples>2, "AAAB", "AAB"))
-    report_fig = ggpubr::ggarrange(top_p, bottom_p, nrow = 2, heights = ifelse(n_samples>2, c(3,1), c(2,1)))
-    saveRDS(report_fig, file=paste0("$prefix", "_REPORT_plots_viber.rds"))
-    ggplot2::ggsave(plot=report_fig, filename=paste0("$prefix", "_REPORT_plots_viber.pdf"), height=210, width=210, units="mm", dpi = 200)
-    ggplot2::ggsave(plot=report_fig, filename=paste0("$prefix", "_REPORT_plots_viber.png"), height=210, width=210, units="mm", dpi = 200)
-
-
-    # version export
-    f = file("versions.yml","w")
-    cnaqc_version = sessionInfo()\$otherPkgs\$CNAqc\$Version
-    viber_version = sessionInfo()\$otherPkgs\$VIBER\$Version
-    writeLines(paste0('"', "$task.process", '"', ":"), f)
-    writeLines(paste("    CNAqc:", cnaqc_version), f)
-    writeLines(paste("    VIBER:", viber_version), f)
-    close(f)
-
-    """
-
+    template "main_script.R"
 }

From 21c673228fd9fe5f7481c34de49b1aab9c46b0d5 Mon Sep 17 00:00:00 2001
From: Elena Buscaroli <ele.buscaroli@gmail.com>
Date: Wed, 29 Jan 2025 16:32:57 +0100
Subject: [PATCH 8/8] update nextflow.config

---
 nextflow.config | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 4725494..9c71447 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -527,6 +527,3 @@ validation {
         afterText = validation.help.afterText
     }
 }
-
-// Load modules.config for DSL2 module specific options
-includeConfig 'conf/modules.config'