diff --git a/docs/ReferencesExtension.md b/docs/ReferencesExtension.md index 76017d2..920d8e3 100644 --- a/docs/ReferencesExtension.md +++ b/docs/ReferencesExtension.md @@ -14,8 +14,9 @@ The References Extension solves this by providing a unified interface that autom ### 1.1. Core Capabilities -The extension provides two essential functions: +The extension provides three essential functions: +- **Path Transformation**: `updateReferencesFile()` - Updates YAML reference files by replacing base paths - **File Resolution**: `getReferencesFile()` - Resolves file paths from parameters or reference metadata - **Value Resolution**: `getReferencesValue()` - Retrieves metadata values with parameter override support @@ -101,7 +102,223 @@ workflow { ## 3. Core Functions Reference -### 3.1. getReferencesFile - File Path Resolution +### 3.1. updateReferencesFile - Reference File Path Transformation + +This function updates a YAML reference file by replacing base paths, useful for adapting reference files to different storage locations or environments. It creates a staged copy of the YAML file with updated paths, leaving the original unchanged. + +#### Function Signature + +```groovy +// Named parameters version (recommended) +Path updateReferencesFile( + Map options, // Configuration map with basepathFinal and basepathToReplace + Object yamlReference // Path to YAML reference file +) + +// Positional parameters version +Path updateReferencesFile( + Object yamlReference, // Path to YAML reference file + Object basepathFinal, // Final base path to use as replacement + Object basepathToReplace // Base path(s) to be replaced (String or List) +) +``` + +#### Parameters + +| Parameter | Type | Required | Description | +| ------------------------------------------------------- | ---------------- | -------- | ------------------------------------------------------------------------------------- | +| `options.basepathFinal`
or `basepath_final` | String/null | No | The final base path to use as replacement. If null/false/empty, returns original file | +| `options.basepathToReplace`
or `basepath_to_replace` | String/List/null | No | Base path(s) to be replaced. Can be a single string or list of strings | +| `yamlReference` | Path/String | Yes | Path to the YAML reference file to update | + +#### Return Value + +Returns a `Path` object pointing to either: + +- A staged copy with updated paths (when `basepathFinal` is provided) +- The original file (when `basepathFinal` is null, false, or empty) + +#### Practical Examples + +**Example 1: Adapting iGenomes paths to local storage** + +```nextflow title="update_igenomes_paths.nf" +#!/usr/bin/env nextflow + +include { updateReferencesFile } from 'plugin/nf-core-utils' + +params.references_yaml = 'references/grch38.yml' +params.local_base = '/data/references' + +workflow { + // Original YAML contains: ${params.igenomes_base}/Homo_sapiens/... + // Update to local path: /data/references/Homo_sapiens/... + + updated_yaml = updateReferencesFile( + basepathFinal: params.local_base, + basepathToReplace: '${params.igenomes_base}', + yamlReference: params.references_yaml + ) + + updated_yaml.view { "Updated reference file: ${it}" } +} +``` + +**Example 2: Replacing multiple base paths** + +```nextflow title="multi_path_replacement.nf" +#!/usr/bin/env nextflow + +include { updateReferencesFile } from 'plugin/nf-core-utils' + +params.references_yaml = 'references/genome_info.yml' +params.unified_base = '/mnt/shared/references' + +workflow { + // Replace multiple different base paths with a single unified path + // Useful when consolidating references from different sources + + updated_yaml = updateReferencesFile( + basepathFinal: params.unified_base, + basepathToReplace: [ + '${params.igenomes_base}', + '${params.references_base}', + '/old/storage/location', + 's3://old-bucket/references' + ], + yamlReference: params.references_yaml + ) + + updated_yaml.view { "Consolidated reference file: ${it}" } +} +``` + +**Example 3: Using positional parameters** + +```nextflow title="positional_params.nf" +#!/usr/bin/env nextflow + +include { updateReferencesFile } from 'plugin/nf-core-utils' + +workflow { + // Simple syntax when you only need basic path replacement + def yamlFile = file('references/genome.yml') + + updated = updateReferencesFile( + yamlFile, + '/new/base/path', + '/old/base/path' + ) + + updated.view { "Updated: ${it}" } +} +``` + +**Example 4: Cloud to local migration** + +```nextflow title="cloud_to_local.nf" +#!/usr/bin/env nextflow + +include { updateReferencesFile } from 'plugin/nf-core-utils' + +params.references_yaml = 'config/aws_references.yml' +params.local_mirror = '/data/local-mirror' +params.s3_base = 's3://ngi-igenomes/igenomes' + +workflow { + // Migrate from S3 to local storage + local_references = updateReferencesFile( + basepath_final: params.local_mirror, // Using snake_case variant + basepath_to_replace: params.s3_base, + yamlReference: params.references_yaml + ) + + // Use the updated reference file in downstream processes + PROCESS_WITH_LOCAL_REFS(local_references) +} + +process PROCESS_WITH_LOCAL_REFS { + input: + path yaml_file + + script: + """ + echo "Processing with updated references from: ${yaml_file}" + cat ${yaml_file} + """ +} +``` + +**Example 5: Conditional path updates** + +```nextflow title="conditional_update.nf" +#!/usr/bin/env nextflow + +include { updateReferencesFile } from 'plugin/nf-core-utils' + +params.references_yaml = 'references.yml' +params.use_local = false +params.local_base = '/data/references' + +workflow { + // Only update paths if using local storage + updated_yaml = updateReferencesFile( + basepathFinal: params.use_local ? params.local_base : null, + basepathToReplace: '${params.igenomes_base}', + yamlReference: params.references_yaml + ) + + // If params.use_local is false, returns original file unchanged + updated_yaml.view { "Reference file: ${it}" } +} +``` + +#### Key Features + +> [!TIP] "Staged Copies" +> When `basepathFinal` is provided, the function creates a staged copy in a temporary location (under `workDir/tmp/`), ensuring your original reference files remain unchanged. This is ideal for adapting references without modifying source files. + +> [!NOTE] "Multiple Replacements" +> The `basepathToReplace` parameter accepts either a single string or a list of strings, allowing you to replace multiple different base paths with a single unified path in one operation. + +> [!IMPORTANT] "Parameter Name Variants" +> The function supports both camelCase (`basepathFinal`, `basepathToReplace`) and snake_case (`basepath_final`, `basepath_to_replace`) parameter names for flexibility. + +#### Common Use Cases + +1. **iGenomes Migration**: Update iGenomes reference paths when moving from cloud to local storage +2. **Path Consolidation**: Unify references from multiple sources into a single base path +3. **Environment Adaptation**: Adapt reference files for different compute environments (HPC, cloud, local) +4. **Testing**: Create test versions of reference files with modified paths for pipeline validation +5. **Multi-site Deployment**: Adapt reference configurations for different institutional storage systems + +#### Error Handling + +The function validates the input YAML file and throws an `IllegalArgumentException` if: + +- The YAML file doesn't exist +- The YAML file path is invalid or null + +```nextflow title="error_handling.nf" +#!/usr/bin/env nextflow + +include { updateReferencesFile } from 'plugin/nf-core-utils' + +workflow { + try { + updated = updateReferencesFile( + basepathFinal: '/new/path', + basepathToReplace: '/old/path', + yamlReference: 'non_existent.yml' + ) + } catch (IllegalArgumentException e) { + log.error "Reference file error: ${e.message}" + exit 1 + } +} +``` + +### 3.2. getReferencesFile - File Path Resolution This function intelligently resolves file paths based on user parameters and reference metadata. @@ -162,7 +379,7 @@ workflow { } ``` -### 3.2. getReferencesValue - Metadata Value Resolution +### 3.3. getReferencesValue - Metadata Value Resolution This function extracts metadata values with user parameter override support. diff --git a/src/main/groovy/nfcore/plugin/NfUtilsExtension.groovy b/src/main/groovy/nfcore/plugin/NfUtilsExtension.groovy index f81fdb1..daaf7c9 100644 --- a/src/main/groovy/nfcore/plugin/NfUtilsExtension.groovy +++ b/src/main/groovy/nfcore/plugin/NfUtilsExtension.groovy @@ -156,6 +156,33 @@ class NfUtilsExtension extends PluginExtensionPoint { return nfcore.plugin.references.ReferencesUtils.getReferencesValue(referencesList, param, attribute) } + /** + * Update references file by replacing base paths in the YAML file + * @param options Named parameters: basepathFinal (or basepath_final) and basepathToReplace (or basepath_to_replace) + * @param yamlReference The path to the YAML reference file + * @return The updated file object (either staged copy or original) + */ + @Function + def updateReferencesFile(Map options, def yamlReference) { + def referencesUtils = new nfcore.plugin.references.ReferencesUtils() + referencesUtils.init(this.session) + return referencesUtils.updateReferencesFile(options, yamlReference) + } + + /** + * Update references file by replacing base paths in the YAML file (positional parameters) + * @param yamlReference The path to the YAML reference file + * @param basepathFinal The final base path to use as replacement (can be null, false, or empty) + * @param basepathToReplace List of base paths to be replaced (can be null, false, or empty) + * @return The updated file object (either staged copy or original) + */ + @Function + def updateReferencesFile(def yamlReference, def basepathFinal, def basepathToReplace) { + def referencesUtils = new nfcore.plugin.references.ReferencesUtils() + referencesUtils.init(this.session) + return referencesUtils.updateReferencesFile(yamlReference, basepathFinal, basepathToReplace) + } + // --- Methods from NextflowPipelineExtension --- /** * Generate version string for a workflow diff --git a/src/main/groovy/nfcore/plugin/references/ReferencesUtils.groovy b/src/main/groovy/nfcore/plugin/references/ReferencesUtils.groovy index ef9d1c2..234bdbd 100644 --- a/src/main/groovy/nfcore/plugin/references/ReferencesUtils.groovy +++ b/src/main/groovy/nfcore/plugin/references/ReferencesUtils.groovy @@ -14,10 +14,11 @@ * limitations under the License. */ -package nfcore.plugin +package nfcore.plugin.references import groovy.transform.CompileStatic import nextflow.Session +import nextflow.file.FileHelper /** * Implements utility functions for handling reference files and values @@ -70,4 +71,61 @@ class ReferencesUtils { } } } + + /** + * Update references file by replacing base paths in the YAML file + * + * @param options Named parameters map (can be first positional arg when using named params) + * @param yamlReference The path to the YAML reference file + * @return The updated file object (either staged copy or original) + */ + def updateReferencesFile(Map options, def yamlReference) { + // Support named parameters: basepathFinal/basepath_final and basepathToReplace/basepath_to_replace + def basepathFinal = options.basepathFinal ?: options.basepath_final + def basepathToReplace = options.basepathToReplace ?: options.basepath_to_replace + + def correctYamlFile = FileHelper.asPath(yamlReference.toString()) + + if (!correctYamlFile || !correctYamlFile.exists()) { + throw new IllegalArgumentException("YAML reference file does not exist: ${yamlReference}") + } + + if (basepathFinal) { + // Create a staged copy in a temporary location + def stagedYamlFile = FileHelper.asPath("${session.workDir}/tmp/${UUID.randomUUID().toString()}.${correctYamlFile.getExtension()}") + + // Ensure parent directory exists + stagedYamlFile.parent.mkdirs() + + // Copy the file + correctYamlFile.copyTo(stagedYamlFile) + correctYamlFile = stagedYamlFile + + // Use a local variable to accumulate changes + def updatedYamlContent = correctYamlFile.text + + // Handle basepathToReplace as a list or convert to list + def pathsToReplace = basepathToReplace instanceof List ? basepathToReplace : [basepathToReplace] + pathsToReplace.each { basepathReplacement -> + if (basepathReplacement) { + updatedYamlContent = updatedYamlContent.replace(basepathReplacement.toString(), basepathFinal.toString()) + } + } + correctYamlFile.text = updatedYamlContent + } + + return correctYamlFile + } + + /** + * Update references file by replacing base paths in the YAML file (positional parameters version) + * + * @param yamlReference The path to the YAML reference file + * @param basepathFinal The final base path to use as replacement (can be null, false, or empty) + * @param basepathToReplace List of base paths to be replaced (can be null, false, or empty) + * @return The updated file object (either staged copy or original) + */ + def updateReferencesFile(def yamlReference, def basepathFinal, def basepathToReplace) { + return updateReferencesFile([basepathFinal: basepathFinal, basepathToReplace: basepathToReplace], yamlReference) + } } diff --git a/src/test/groovy/nfcore/plugin/references/ReferencesUtilsTest.groovy b/src/test/groovy/nfcore/plugin/references/ReferencesUtilsTest.groovy index 9c0bf83..e30ecd2 100644 --- a/src/test/groovy/nfcore/plugin/references/ReferencesUtilsTest.groovy +++ b/src/test/groovy/nfcore/plugin/references/ReferencesUtilsTest.groovy @@ -1,10 +1,18 @@ -package nfcore.plugin +package nfcore.plugin.references +import nextflow.Session +import nextflow.file.FileHelper import spock.lang.Specification +import spock.lang.TempDir + +import java.nio.file.Path class ReferencesUtilsTest extends Specification { def extension + @TempDir + Path tempDir + def setup() { extension = ReferencesUtils } @@ -55,4 +63,232 @@ class ReferencesUtilsTest extends Specification { result_file == [null] result_value == [null] } + + def "test updateReferencesFile with named parameters - single replacement"() { + given: + // Create a test YAML file + def yamlFile = tempDir.resolve("test_references.yml") + yamlFile.text = """ +id: test_genome +fasta: \${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/genome.fasta +gtf: \${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/genes.gtf +""".stripIndent() + + def referencesUtils = new ReferencesUtils() + def session = Mock(Session) { + getWorkDir() >> tempDir + } + referencesUtils.init(session) + + when: + def result = referencesUtils.updateReferencesFile( + [basepathFinal: '/new/base/path', basepathToReplace: '${params.igenomes_base}'], + yamlFile + ) + + then: + result != null + result.exists() + def content = result.text + content.contains('/new/base/path/Homo_sapiens/NCBI/GRCh38/Sequence/genome.fasta') + content.contains('/new/base/path/Homo_sapiens/NCBI/GRCh38/Annotation/genes.gtf') + !content.contains('${params.igenomes_base}') + } + + def "test updateReferencesFile with positional parameters"() { + given: + def yamlFile = tempDir.resolve("test_references2.yml") + yamlFile.text = """ +id: test_genome +fasta: /old/path/genome.fasta +gtf: /old/path/genes.gtf +""".stripIndent() + + def referencesUtils = new ReferencesUtils() + def session = Mock(Session) { + getWorkDir() >> tempDir + } + referencesUtils.init(session) + + when: + def result = referencesUtils.updateReferencesFile(yamlFile, '/new/path', '/old/path') + + then: + result != null + result.exists() + def content = result.text + content.contains('/new/path/genome.fasta') + content.contains('/new/path/genes.gtf') + !content.contains('/old/path') + } + + def "test updateReferencesFile with multiple basepaths to replace"() { + given: + def yamlFile = tempDir.resolve("test_references3.yml") + yamlFile.text = """ +id: test_genome +fasta: \${params.igenomes_base}/genome.fasta +gtf: \${params.references_base}/genes.gtf +readme: /old/base/readme.txt +""".stripIndent() + + def referencesUtils = new ReferencesUtils() + def session = Mock(Session) { + getWorkDir() >> tempDir + } + referencesUtils.init(session) + + when: + def result = referencesUtils.updateReferencesFile( + [basepathFinal: '/new/unified/path', + basepathToReplace: ['${params.igenomes_base}', '${params.references_base}', '/old/base']], + yamlFile + ) + + then: + result != null + result.exists() + def content = result.text + content.contains('/new/unified/path/genome.fasta') + content.contains('/new/unified/path/genes.gtf') + content.contains('/new/unified/path/readme.txt') + !content.contains('${params.igenomes_base}') + !content.contains('${params.references_base}') + !content.contains('/old/base') + } + + def "test updateReferencesFile without replacement returns original file"() { + given: + def yamlFile = tempDir.resolve("test_references4.yml") + def originalContent = """ +id: test_genome +fasta: /some/path/genome.fasta +""".stripIndent() + yamlFile.text = originalContent + + def referencesUtils = new ReferencesUtils() + def session = Mock(Session) { + getWorkDir() >> tempDir + } + referencesUtils.init(session) + + when: + def result = referencesUtils.updateReferencesFile( + [basepathFinal: null, basepathToReplace: null], + yamlFile + ) + + then: + result != null + result.exists() + result == yamlFile + result.text == originalContent + } + + def "test updateReferencesFile with empty basepathFinal returns original file"() { + given: + def yamlFile = tempDir.resolve("test_references5.yml") + def originalContent = """ +id: test_genome +fasta: /some/path/genome.fasta +""".stripIndent() + yamlFile.text = originalContent + + def referencesUtils = new ReferencesUtils() + def session = Mock(Session) { + getWorkDir() >> tempDir + } + referencesUtils.init(session) + + when: + def result = referencesUtils.updateReferencesFile( + [basepathFinal: '', basepathToReplace: '/some/path'], + yamlFile + ) + + then: + result != null + result.exists() + result == yamlFile + result.text == originalContent + } + + def "test updateReferencesFile throws exception for non-existent file"() { + given: + def nonExistentFile = tempDir.resolve("non_existent.yml") + + def referencesUtils = new ReferencesUtils() + def session = Mock(Session) { + getWorkDir() >> tempDir + } + referencesUtils.init(session) + + when: + referencesUtils.updateReferencesFile( + [basepathFinal: '/new/path', basepathToReplace: '/old/path'], + nonExistentFile + ) + + then: + thrown(IllegalArgumentException) + } + + def "test updateReferencesFile creates staged copy in work directory"() { + given: + def yamlFile = tempDir.resolve("test_references6.yml") + yamlFile.text = """ +id: test_genome +fasta: /old/path/genome.fasta +""".stripIndent() + + def referencesUtils = new ReferencesUtils() + def session = Mock(Session) { + getWorkDir() >> tempDir + } + referencesUtils.init(session) + + when: + def result = referencesUtils.updateReferencesFile( + [basepathFinal: '/new/path', basepathToReplace: '/old/path'], + yamlFile + ) + + then: + result != null + result.exists() + // The result should be a different file (staged copy) + result != yamlFile + result.toString().contains(tempDir.toString()) + result.toString().contains('tmp') + // Original file should remain unchanged + yamlFile.text.contains('/old/path/genome.fasta') + } + + def "test updateReferencesFile with snake_case parameter names"() { + given: + def yamlFile = tempDir.resolve("test_references7.yml") + yamlFile.text = """ +id: test_genome +fasta: /old/path/genome.fasta +""".stripIndent() + + def referencesUtils = new ReferencesUtils() + def session = Mock(Session) { + getWorkDir() >> tempDir + } + referencesUtils.init(session) + + when: + def result = referencesUtils.updateReferencesFile( + [basepath_final: '/new/path', basepath_to_replace: '/old/path'], + yamlFile + ) + + then: + result != null + result.exists() + def content = result.text + content.contains('/new/path/genome.fasta') + !content.contains('/old/path') + } }