diff --git a/docs/ReferencesExtension.md b/docs/ReferencesExtension.md
index 76017d2..920d8e3 100644
--- a/docs/ReferencesExtension.md
+++ b/docs/ReferencesExtension.md
@@ -14,8 +14,9 @@ The References Extension solves this by providing a unified interface that autom
### 1.1. Core Capabilities
-The extension provides two essential functions:
+The extension provides three essential functions:
+- **Path Transformation**: `updateReferencesFile()` - Updates YAML reference files by replacing base paths
- **File Resolution**: `getReferencesFile()` - Resolves file paths from parameters or reference metadata
- **Value Resolution**: `getReferencesValue()` - Retrieves metadata values with parameter override support
@@ -101,7 +102,223 @@ workflow {
## 3. Core Functions Reference
-### 3.1. getReferencesFile - File Path Resolution
+### 3.1. updateReferencesFile - Reference File Path Transformation
+
+This function updates a YAML reference file by replacing base paths, useful for adapting reference files to different storage locations or environments. It creates a staged copy of the YAML file with updated paths, leaving the original unchanged.
+
+#### Function Signature
+
+```groovy
+// Named parameters version (recommended)
+Path updateReferencesFile(
+ Map options, // Configuration map with basepathFinal and basepathToReplace
+ Object yamlReference // Path to YAML reference file
+)
+
+// Positional parameters version
+Path updateReferencesFile(
+ Object yamlReference, // Path to YAML reference file
+ Object basepathFinal, // Final base path to use as replacement
+ Object basepathToReplace // Base path(s) to be replaced (String or List)
+)
+```
+
+#### Parameters
+
+| Parameter | Type | Required | Description |
+| ------------------------------------------------------- | ---------------- | -------- | ------------------------------------------------------------------------------------- |
+| `options.basepathFinal`
or `basepath_final` | String/null | No | The final base path to use as replacement. If null/false/empty, returns original file |
+| `options.basepathToReplace`
or `basepath_to_replace` | String/List/null | No | Base path(s) to be replaced. Can be a single string or list of strings |
+| `yamlReference` | Path/String | Yes | Path to the YAML reference file to update |
+
+#### Return Value
+
+Returns a `Path` object pointing to either:
+
+- A staged copy with updated paths (when `basepathFinal` is provided)
+- The original file (when `basepathFinal` is null, false, or empty)
+
+#### Practical Examples
+
+**Example 1: Adapting iGenomes paths to local storage**
+
+```nextflow title="update_igenomes_paths.nf"
+#!/usr/bin/env nextflow
+
+include { updateReferencesFile } from 'plugin/nf-core-utils'
+
+params.references_yaml = 'references/grch38.yml'
+params.local_base = '/data/references'
+
+workflow {
+ // Original YAML contains: ${params.igenomes_base}/Homo_sapiens/...
+ // Update to local path: /data/references/Homo_sapiens/...
+
+ updated_yaml = updateReferencesFile(
+ basepathFinal: params.local_base,
+ basepathToReplace: '${params.igenomes_base}',
+ yamlReference: params.references_yaml
+ )
+
+ updated_yaml.view { "Updated reference file: ${it}" }
+}
+```
+
+**Example 2: Replacing multiple base paths**
+
+```nextflow title="multi_path_replacement.nf"
+#!/usr/bin/env nextflow
+
+include { updateReferencesFile } from 'plugin/nf-core-utils'
+
+params.references_yaml = 'references/genome_info.yml'
+params.unified_base = '/mnt/shared/references'
+
+workflow {
+ // Replace multiple different base paths with a single unified path
+ // Useful when consolidating references from different sources
+
+ updated_yaml = updateReferencesFile(
+ basepathFinal: params.unified_base,
+ basepathToReplace: [
+ '${params.igenomes_base}',
+ '${params.references_base}',
+ '/old/storage/location',
+ 's3://old-bucket/references'
+ ],
+ yamlReference: params.references_yaml
+ )
+
+ updated_yaml.view { "Consolidated reference file: ${it}" }
+}
+```
+
+**Example 3: Using positional parameters**
+
+```nextflow title="positional_params.nf"
+#!/usr/bin/env nextflow
+
+include { updateReferencesFile } from 'plugin/nf-core-utils'
+
+workflow {
+ // Simple syntax when you only need basic path replacement
+ def yamlFile = file('references/genome.yml')
+
+ updated = updateReferencesFile(
+ yamlFile,
+ '/new/base/path',
+ '/old/base/path'
+ )
+
+ updated.view { "Updated: ${it}" }
+}
+```
+
+**Example 4: Cloud to local migration**
+
+```nextflow title="cloud_to_local.nf"
+#!/usr/bin/env nextflow
+
+include { updateReferencesFile } from 'plugin/nf-core-utils'
+
+params.references_yaml = 'config/aws_references.yml'
+params.local_mirror = '/data/local-mirror'
+params.s3_base = 's3://ngi-igenomes/igenomes'
+
+workflow {
+ // Migrate from S3 to local storage
+ local_references = updateReferencesFile(
+ basepath_final: params.local_mirror, // Using snake_case variant
+ basepath_to_replace: params.s3_base,
+ yamlReference: params.references_yaml
+ )
+
+ // Use the updated reference file in downstream processes
+ PROCESS_WITH_LOCAL_REFS(local_references)
+}
+
+process PROCESS_WITH_LOCAL_REFS {
+ input:
+ path yaml_file
+
+ script:
+ """
+ echo "Processing with updated references from: ${yaml_file}"
+ cat ${yaml_file}
+ """
+}
+```
+
+**Example 5: Conditional path updates**
+
+```nextflow title="conditional_update.nf"
+#!/usr/bin/env nextflow
+
+include { updateReferencesFile } from 'plugin/nf-core-utils'
+
+params.references_yaml = 'references.yml'
+params.use_local = false
+params.local_base = '/data/references'
+
+workflow {
+ // Only update paths if using local storage
+ updated_yaml = updateReferencesFile(
+ basepathFinal: params.use_local ? params.local_base : null,
+ basepathToReplace: '${params.igenomes_base}',
+ yamlReference: params.references_yaml
+ )
+
+ // If params.use_local is false, returns original file unchanged
+ updated_yaml.view { "Reference file: ${it}" }
+}
+```
+
+#### Key Features
+
+> [!TIP] "Staged Copies"
+> When `basepathFinal` is provided, the function creates a staged copy in a temporary location (under `workDir/tmp/`), ensuring your original reference files remain unchanged. This is ideal for adapting references without modifying source files.
+
+> [!NOTE] "Multiple Replacements"
+> The `basepathToReplace` parameter accepts either a single string or a list of strings, allowing you to replace multiple different base paths with a single unified path in one operation.
+
+> [!IMPORTANT] "Parameter Name Variants"
+> The function supports both camelCase (`basepathFinal`, `basepathToReplace`) and snake_case (`basepath_final`, `basepath_to_replace`) parameter names for flexibility.
+
+#### Common Use Cases
+
+1. **iGenomes Migration**: Update iGenomes reference paths when moving from cloud to local storage
+2. **Path Consolidation**: Unify references from multiple sources into a single base path
+3. **Environment Adaptation**: Adapt reference files for different compute environments (HPC, cloud, local)
+4. **Testing**: Create test versions of reference files with modified paths for pipeline validation
+5. **Multi-site Deployment**: Adapt reference configurations for different institutional storage systems
+
+#### Error Handling
+
+The function validates the input YAML file and throws an `IllegalArgumentException` if:
+
+- The YAML file doesn't exist
+- The YAML file path is invalid or null
+
+```nextflow title="error_handling.nf"
+#!/usr/bin/env nextflow
+
+include { updateReferencesFile } from 'plugin/nf-core-utils'
+
+workflow {
+ try {
+ updated = updateReferencesFile(
+ basepathFinal: '/new/path',
+ basepathToReplace: '/old/path',
+ yamlReference: 'non_existent.yml'
+ )
+ } catch (IllegalArgumentException e) {
+ log.error "Reference file error: ${e.message}"
+ exit 1
+ }
+}
+```
+
+### 3.2. getReferencesFile - File Path Resolution
This function intelligently resolves file paths based on user parameters and reference metadata.
@@ -162,7 +379,7 @@ workflow {
}
```
-### 3.2. getReferencesValue - Metadata Value Resolution
+### 3.3. getReferencesValue - Metadata Value Resolution
This function extracts metadata values with user parameter override support.
diff --git a/src/main/groovy/nfcore/plugin/NfUtilsExtension.groovy b/src/main/groovy/nfcore/plugin/NfUtilsExtension.groovy
index f81fdb1..daaf7c9 100644
--- a/src/main/groovy/nfcore/plugin/NfUtilsExtension.groovy
+++ b/src/main/groovy/nfcore/plugin/NfUtilsExtension.groovy
@@ -156,6 +156,33 @@ class NfUtilsExtension extends PluginExtensionPoint {
return nfcore.plugin.references.ReferencesUtils.getReferencesValue(referencesList, param, attribute)
}
+ /**
+ * Update references file by replacing base paths in the YAML file
+ * @param options Named parameters: basepathFinal (or basepath_final) and basepathToReplace (or basepath_to_replace)
+ * @param yamlReference The path to the YAML reference file
+ * @return The updated file object (either staged copy or original)
+ */
+ @Function
+ def updateReferencesFile(Map options, def yamlReference) {
+ def referencesUtils = new nfcore.plugin.references.ReferencesUtils()
+ referencesUtils.init(this.session)
+ return referencesUtils.updateReferencesFile(options, yamlReference)
+ }
+
+ /**
+ * Update references file by replacing base paths in the YAML file (positional parameters)
+ * @param yamlReference The path to the YAML reference file
+ * @param basepathFinal The final base path to use as replacement (can be null, false, or empty)
+ * @param basepathToReplace List of base paths to be replaced (can be null, false, or empty)
+ * @return The updated file object (either staged copy or original)
+ */
+ @Function
+ def updateReferencesFile(def yamlReference, def basepathFinal, def basepathToReplace) {
+ def referencesUtils = new nfcore.plugin.references.ReferencesUtils()
+ referencesUtils.init(this.session)
+ return referencesUtils.updateReferencesFile(yamlReference, basepathFinal, basepathToReplace)
+ }
+
// --- Methods from NextflowPipelineExtension ---
/**
* Generate version string for a workflow
diff --git a/src/main/groovy/nfcore/plugin/references/ReferencesUtils.groovy b/src/main/groovy/nfcore/plugin/references/ReferencesUtils.groovy
index ef9d1c2..234bdbd 100644
--- a/src/main/groovy/nfcore/plugin/references/ReferencesUtils.groovy
+++ b/src/main/groovy/nfcore/plugin/references/ReferencesUtils.groovy
@@ -14,10 +14,11 @@
* limitations under the License.
*/
-package nfcore.plugin
+package nfcore.plugin.references
import groovy.transform.CompileStatic
import nextflow.Session
+import nextflow.file.FileHelper
/**
* Implements utility functions for handling reference files and values
@@ -70,4 +71,61 @@ class ReferencesUtils {
}
}
}
+
+ /**
+ * Update references file by replacing base paths in the YAML file
+ *
+ * @param options Named parameters map (can be first positional arg when using named params)
+ * @param yamlReference The path to the YAML reference file
+ * @return The updated file object (either staged copy or original)
+ */
+ def updateReferencesFile(Map options, def yamlReference) {
+ // Support named parameters: basepathFinal/basepath_final and basepathToReplace/basepath_to_replace
+ def basepathFinal = options.basepathFinal ?: options.basepath_final
+ def basepathToReplace = options.basepathToReplace ?: options.basepath_to_replace
+
+ def correctYamlFile = FileHelper.asPath(yamlReference.toString())
+
+ if (!correctYamlFile || !correctYamlFile.exists()) {
+ throw new IllegalArgumentException("YAML reference file does not exist: ${yamlReference}")
+ }
+
+ if (basepathFinal) {
+ // Create a staged copy in a temporary location
+ def stagedYamlFile = FileHelper.asPath("${session.workDir}/tmp/${UUID.randomUUID().toString()}.${correctYamlFile.getExtension()}")
+
+ // Ensure parent directory exists
+ stagedYamlFile.parent.mkdirs()
+
+ // Copy the file
+ correctYamlFile.copyTo(stagedYamlFile)
+ correctYamlFile = stagedYamlFile
+
+ // Use a local variable to accumulate changes
+ def updatedYamlContent = correctYamlFile.text
+
+ // Handle basepathToReplace as a list or convert to list
+ def pathsToReplace = basepathToReplace instanceof List ? basepathToReplace : [basepathToReplace]
+ pathsToReplace.each { basepathReplacement ->
+ if (basepathReplacement) {
+ updatedYamlContent = updatedYamlContent.replace(basepathReplacement.toString(), basepathFinal.toString())
+ }
+ }
+ correctYamlFile.text = updatedYamlContent
+ }
+
+ return correctYamlFile
+ }
+
+ /**
+ * Update references file by replacing base paths in the YAML file (positional parameters version)
+ *
+ * @param yamlReference The path to the YAML reference file
+ * @param basepathFinal The final base path to use as replacement (can be null, false, or empty)
+ * @param basepathToReplace List of base paths to be replaced (can be null, false, or empty)
+ * @return The updated file object (either staged copy or original)
+ */
+ def updateReferencesFile(def yamlReference, def basepathFinal, def basepathToReplace) {
+ return updateReferencesFile([basepathFinal: basepathFinal, basepathToReplace: basepathToReplace], yamlReference)
+ }
}
diff --git a/src/test/groovy/nfcore/plugin/references/ReferencesUtilsTest.groovy b/src/test/groovy/nfcore/plugin/references/ReferencesUtilsTest.groovy
index 9c0bf83..e30ecd2 100644
--- a/src/test/groovy/nfcore/plugin/references/ReferencesUtilsTest.groovy
+++ b/src/test/groovy/nfcore/plugin/references/ReferencesUtilsTest.groovy
@@ -1,10 +1,18 @@
-package nfcore.plugin
+package nfcore.plugin.references
+import nextflow.Session
+import nextflow.file.FileHelper
import spock.lang.Specification
+import spock.lang.TempDir
+
+import java.nio.file.Path
class ReferencesUtilsTest extends Specification {
def extension
+ @TempDir
+ Path tempDir
+
def setup() {
extension = ReferencesUtils
}
@@ -55,4 +63,232 @@ class ReferencesUtilsTest extends Specification {
result_file == [null]
result_value == [null]
}
+
+ def "test updateReferencesFile with named parameters - single replacement"() {
+ given:
+ // Create a test YAML file
+ def yamlFile = tempDir.resolve("test_references.yml")
+ yamlFile.text = """
+id: test_genome
+fasta: \${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/genome.fasta
+gtf: \${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/genes.gtf
+""".stripIndent()
+
+ def referencesUtils = new ReferencesUtils()
+ def session = Mock(Session) {
+ getWorkDir() >> tempDir
+ }
+ referencesUtils.init(session)
+
+ when:
+ def result = referencesUtils.updateReferencesFile(
+ [basepathFinal: '/new/base/path', basepathToReplace: '${params.igenomes_base}'],
+ yamlFile
+ )
+
+ then:
+ result != null
+ result.exists()
+ def content = result.text
+ content.contains('/new/base/path/Homo_sapiens/NCBI/GRCh38/Sequence/genome.fasta')
+ content.contains('/new/base/path/Homo_sapiens/NCBI/GRCh38/Annotation/genes.gtf')
+ !content.contains('${params.igenomes_base}')
+ }
+
+ def "test updateReferencesFile with positional parameters"() {
+ given:
+ def yamlFile = tempDir.resolve("test_references2.yml")
+ yamlFile.text = """
+id: test_genome
+fasta: /old/path/genome.fasta
+gtf: /old/path/genes.gtf
+""".stripIndent()
+
+ def referencesUtils = new ReferencesUtils()
+ def session = Mock(Session) {
+ getWorkDir() >> tempDir
+ }
+ referencesUtils.init(session)
+
+ when:
+ def result = referencesUtils.updateReferencesFile(yamlFile, '/new/path', '/old/path')
+
+ then:
+ result != null
+ result.exists()
+ def content = result.text
+ content.contains('/new/path/genome.fasta')
+ content.contains('/new/path/genes.gtf')
+ !content.contains('/old/path')
+ }
+
+ def "test updateReferencesFile with multiple basepaths to replace"() {
+ given:
+ def yamlFile = tempDir.resolve("test_references3.yml")
+ yamlFile.text = """
+id: test_genome
+fasta: \${params.igenomes_base}/genome.fasta
+gtf: \${params.references_base}/genes.gtf
+readme: /old/base/readme.txt
+""".stripIndent()
+
+ def referencesUtils = new ReferencesUtils()
+ def session = Mock(Session) {
+ getWorkDir() >> tempDir
+ }
+ referencesUtils.init(session)
+
+ when:
+ def result = referencesUtils.updateReferencesFile(
+ [basepathFinal: '/new/unified/path',
+ basepathToReplace: ['${params.igenomes_base}', '${params.references_base}', '/old/base']],
+ yamlFile
+ )
+
+ then:
+ result != null
+ result.exists()
+ def content = result.text
+ content.contains('/new/unified/path/genome.fasta')
+ content.contains('/new/unified/path/genes.gtf')
+ content.contains('/new/unified/path/readme.txt')
+ !content.contains('${params.igenomes_base}')
+ !content.contains('${params.references_base}')
+ !content.contains('/old/base')
+ }
+
+ def "test updateReferencesFile without replacement returns original file"() {
+ given:
+ def yamlFile = tempDir.resolve("test_references4.yml")
+ def originalContent = """
+id: test_genome
+fasta: /some/path/genome.fasta
+""".stripIndent()
+ yamlFile.text = originalContent
+
+ def referencesUtils = new ReferencesUtils()
+ def session = Mock(Session) {
+ getWorkDir() >> tempDir
+ }
+ referencesUtils.init(session)
+
+ when:
+ def result = referencesUtils.updateReferencesFile(
+ [basepathFinal: null, basepathToReplace: null],
+ yamlFile
+ )
+
+ then:
+ result != null
+ result.exists()
+ result == yamlFile
+ result.text == originalContent
+ }
+
+ def "test updateReferencesFile with empty basepathFinal returns original file"() {
+ given:
+ def yamlFile = tempDir.resolve("test_references5.yml")
+ def originalContent = """
+id: test_genome
+fasta: /some/path/genome.fasta
+""".stripIndent()
+ yamlFile.text = originalContent
+
+ def referencesUtils = new ReferencesUtils()
+ def session = Mock(Session) {
+ getWorkDir() >> tempDir
+ }
+ referencesUtils.init(session)
+
+ when:
+ def result = referencesUtils.updateReferencesFile(
+ [basepathFinal: '', basepathToReplace: '/some/path'],
+ yamlFile
+ )
+
+ then:
+ result != null
+ result.exists()
+ result == yamlFile
+ result.text == originalContent
+ }
+
+ def "test updateReferencesFile throws exception for non-existent file"() {
+ given:
+ def nonExistentFile = tempDir.resolve("non_existent.yml")
+
+ def referencesUtils = new ReferencesUtils()
+ def session = Mock(Session) {
+ getWorkDir() >> tempDir
+ }
+ referencesUtils.init(session)
+
+ when:
+ referencesUtils.updateReferencesFile(
+ [basepathFinal: '/new/path', basepathToReplace: '/old/path'],
+ nonExistentFile
+ )
+
+ then:
+ thrown(IllegalArgumentException)
+ }
+
+ def "test updateReferencesFile creates staged copy in work directory"() {
+ given:
+ def yamlFile = tempDir.resolve("test_references6.yml")
+ yamlFile.text = """
+id: test_genome
+fasta: /old/path/genome.fasta
+""".stripIndent()
+
+ def referencesUtils = new ReferencesUtils()
+ def session = Mock(Session) {
+ getWorkDir() >> tempDir
+ }
+ referencesUtils.init(session)
+
+ when:
+ def result = referencesUtils.updateReferencesFile(
+ [basepathFinal: '/new/path', basepathToReplace: '/old/path'],
+ yamlFile
+ )
+
+ then:
+ result != null
+ result.exists()
+ // The result should be a different file (staged copy)
+ result != yamlFile
+ result.toString().contains(tempDir.toString())
+ result.toString().contains('tmp')
+ // Original file should remain unchanged
+ yamlFile.text.contains('/old/path/genome.fasta')
+ }
+
+ def "test updateReferencesFile with snake_case parameter names"() {
+ given:
+ def yamlFile = tempDir.resolve("test_references7.yml")
+ yamlFile.text = """
+id: test_genome
+fasta: /old/path/genome.fasta
+""".stripIndent()
+
+ def referencesUtils = new ReferencesUtils()
+ def session = Mock(Session) {
+ getWorkDir() >> tempDir
+ }
+ referencesUtils.init(session)
+
+ when:
+ def result = referencesUtils.updateReferencesFile(
+ [basepath_final: '/new/path', basepath_to_replace: '/old/path'],
+ yamlFile
+ )
+
+ then:
+ result != null
+ result.exists()
+ def content = result.text
+ content.contains('/new/path/genome.fasta')
+ !content.contains('/old/path')
+ }
}