diff --git a/.gitignore b/.gitignore
index 933109b..ccd8205 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,3 +26,7 @@
 /env/src/recode-*
 /env/src/hunspell-*
 filtered-terms.txt
+*.swp
+models/??-??
+*.sif
+core
diff --git a/03.split-text b/03.split-text
index b0f3717..bce98c6 100755
--- a/03.split-text
+++ b/03.split-text
@@ -5,26 +5,20 @@ ulimit -n 16384
 SLANG="$1"
 BATCH="$2"
 
-SPLIT="perl $KPU/moses/ems/support/split-sentences.perl"
-
 echo "Processing (${SLANG}) ${BATCH}"
 
-< ${BATCH}/plain_text.gz gzip -dc \
-| $SPLIT -k -q -n -d -l $SLANG -c 524288 \
+< ${BATCH}/text.gz gzip -dc \
+| py-segment -l $SLANG \
 | gzip -9c \
-> ${TMPDIR}/sentences.$$.gz
+> ${BATCH}/sentences.$$.gz
 
 echo "Testing output"
 
-docs_pt=$(gzip -cd ${BATCH}/plain_text.gz | wc -l)
-docs_st=$(gzip -cd ${TMPDIR}/sentences.$$.gz | wc -l)
+docs_pt=$(gzip -cd ${BATCH}/text.gz | wc -l)
+docs_st=$(gzip -cd ${BATCH}/sentences.$$.gz | wc -l)
 echo "Expecting $docs_pt documents, found $docs_st"
 test $docs_pt -eq $docs_st || exit 1
 
-# Move in two steps. First copies it to the shared fs which
-# might fail because it hits a quota. Second marks it as
-# the real thing.
-mv ${TMPDIR}/sentences.$$.gz ${BATCH}/sentences.$$.gz
 mv ${BATCH}/sentences.$$.gz ${BATCH}/sentences.gz
 echo "Copied result (${SLANG}) ${BATCH}"
 
diff --git a/05.tokenise b/05.tokenise
index 0c20a33..bf459bc 100755
--- a/05.tokenise
+++ b/05.tokenise
@@ -23,25 +23,22 @@ export -f tokenise
 echo "Processing (${SLANG}) ${BATCH}"
 
 < ${BATCH}/${INPUT}.gz gzip -dc \
-| b64filter cache bash -c tokenise \
+| b64filter bash -c tokenise \
 | gzip -9c \
-> ${TMPDIR}/${OUTPUT}.$TMPSFX.gz
+> ${BATCH}/${OUTPUT}.$TMPSFX.gz
 
 echo "Checking output"
 
 docs_st=$(gzip -cd ${BATCH}/${INPUT}.gz | wc -l)
-docs_tk=$(gzip -cd ${TMPDIR}/${OUTPUT}.$TMPSFX.gz | wc -l)
+docs_tk=$(gzip -cd ${BATCH}/${OUTPUT}.$TMPSFX.gz | wc -l)
 echo "Expecting $docs_st documents, found $docs_tk"
 test $docs_st -eq $docs_tk || exit 1
 
 lines_st=$(gzip -cd ${BATCH}/${INPUT}.gz | base64 -d | wc -l)
-lines_tk=$(gzip -cd ${TMPDIR}/${OUTPUT}.$TMPSFX.gz | base64 -d | wc -l)
+lines_tk=$(gzip -cd ${BATCH}/${OUTPUT}.$TMPSFX.gz | base64 -d | wc -l)
 echo "Expecting $lines_st lines, found $lines_tk"
 test $lines_st -eq $lines_tk || exit 1
 
-# Two-step move because the first one might fail and leave an
-# incomplete file behind, which is tricky to detect.
-mv ${TMPDIR}/${OUTPUT}.$TMPSFX.gz ${BATCH}/${OUTPUT}.$TMPSFX.gz
 mv ${BATCH}/${OUTPUT}.$TMPSFX.gz ${BATCH}/${OUTPUT}.gz
 
 echo "Moved result (${SLANG}) ${BATCH}/${OUTPUT}.gz"
diff --git a/06.align b/06.align
index a962dc8..e486e3d 100755
--- a/06.align
+++ b/06.align
@@ -18,7 +18,7 @@ TMPSFX=${JOB_ID:-$$}
 ${DOCALIGN} -j ${DOCALIGN_THREADS:-$THREADS} --threshold 0.1 \
 	${SRC_BATCH}/tokenised_${TARGET_LANG%~*}.gz \
 	${REF_BATCH}/tokenised_${TARGET_LANG%~*}.gz \
-| tee ${SRC_BATCH}/pairs-${TARGET_LANG%~*}-${REF_BATCH_ID}.txt \
+| cut -f2- \
 | ${DOCJOIN} \
 	-li\
 	-ri\
@@ -27,6 +27,7 @@ ${DOCALIGN} -j ${DOCALIGN_THREADS:-$THREADS} --threshold 0.1 \
 	-l ${SRC_BATCH}/sentences_${TARGET_LANG%~*}.gz\
 | /usr/bin/time -f '{"task":"bleualign", "pair":'"$PAIR_FORMAT"', "time":'"$TIME_FORMAT"'}' \
 parallel \
+	--will-cite \
 	--tmpdir=$TMPDIR \
 	-j${BLEUALIGN_THREADS:-$THREADS} \
 	--halt 2 \
@@ -36,5 +37,6 @@ parallel \
 	${BLEUALIGN} --print-sent-hash --bleu-threshold 0.2 \
 | gzip -c \
 > ${SRC_BATCH}/aligned-${REF_BATCH_ID}.gz.$TMPSFX
+
 mv ${SRC_BATCH}/aligned-${REF_BATCH_ID}.gz{.$TMPSFX,}
 
diff --git a/06.align.sh b/06.align.sh
index 504d7ca..841b01b 100755
--- a/06.align.sh
+++ b/06.align.sh
@@ -45,13 +45,6 @@ declare -a OPTIONS=(
 	-o ${SLURM_LOGS}/06.align-%A_%a.out
 )
 
-# Quick hack, should be a --option option, but functions.sh doesn't
-# allow for that at the moment. Someday...
-if [[ ! -z ${OOM_PROOF:-} ]]; then
-	OPTIONS+=(--mem-per-cpu 12G)
-	export BLEUALIGN_THREADS=4
-fi
-
 collection=$1
 shift
 
diff --git a/07.fix b/07.fix
index f5215a5..d405f9b 100755
--- a/07.fix
+++ b/07.fix
@@ -26,11 +26,14 @@ remove_empty_lines() {
 	awk -F"\t" '$3 != "" && $4 != "" { print }'
 }
 
+# Fix bicleaner model path for non-huggingface tools (aka bicleaner-hardrules)
+BICLEANER_MODEL_GIT_DIR=$HUGGINGFACE_HUB_CACHE/models--${BICLEANER_MODEL//\//--}
+BICLEANER_MODEL=${BICLEANER_MODEL_GIT_DIR}/snapshots/$(cat $BICLEANER_MODEL_GIT_DIR/refs/main)
+
 for match in $batch/aligned-+([0-9]).gz; do
 	echo $match 1>&2
 	matched_batch=$(echo $match | sed 's/.*-\([0-9]*\)\.gz/\1/')
 	paste <(gzip -cd ${match} \
-			| awk -F '\t' '{ print 0.0 "\t" $1 "\t"  $2}' `# bitextor's docjoin expects a score column, which it then ignores` \
 			| docjoin \
 				-r ${target_lang_data}/${shard}/${matched_batch}/url.gz \
 				-l $(dirname ${match})/url.gz) `# 1,2: target & source url`\
@@ -53,7 +56,7 @@ done \
 	--target_lang $bicleaner_lang \
 	--scol 3 \
 	--tcol 4 \
-	--metadata $BICLEANER_MODEL \
+	--metadata $BICLEANER_MODEL/metadata.yaml \
 	/dev/stdin /dev/stdout \
 | pigz -9c \
 >$HARDRULED.$TMPSFX
diff --git a/08.score b/08.score
index bdc3bea..b29c306 100755
--- a/08.score
+++ b/08.score
@@ -2,6 +2,11 @@
 set -euo pipefail
 shopt -s extglob
 
+if [ "$IS_LUMI" = true ]; then
+	module load CrayEnv
+	module load rocm/5.2.3
+fi
+
 collection=$1
 lang=$2
 target_lang_data=$3
@@ -22,7 +27,6 @@ test -r $HARDRULED
 paste <(zcat $FIXED) <(zcat $HARDRULED) \
 | cache -k 3,4 ./score-wrap.py $BICLEANER $BICLEANER_PARAMS \
 	--score_only \
-	--processes $THREADS \
 	--tmp_dir $TMPDIR \
 	--disable_hardrules \
 	--disable_porn_removal \
diff --git a/08.score.sh b/08.score.sh
index fa86b96..2c6138a 100755
--- a/08.score.sh
+++ b/08.score.sh
@@ -10,10 +10,17 @@ set -euo pipefail
 collection=$1
 shift
 
-export SBATCH_ACCOUNT=t2-cs119-gpu
-export SBATCH_PARTITION=pascal
-export SLURM_TASKS_PER_NODE=1 # No parallelism in generic.slurm plz, they'll have to share the gpu otherwise.
-export SBATCH_GRES=gpu:1
+if [ "$IS_LUMI" = true ]; then
+	export SBATCH_PARTITION="small-g"
+	export SLURM_TASKS_PER_NODE=1 # No parallelism in generic.slurm plz, they'll have to share the gpu otherwise.
+	export SBATCH_GPUS_PER_TASK=1
+	unset SBATCH_MEM_PER_CPU # If we are setting this for small partition, we don't need it for gpu jobs
+else
+	export SBATCH_ACCOUNT=t2-cs119-gpu
+	export SBATCH_PARTITION=pascal
+	export SLURM_TASKS_PER_NODE=1 # No parallelism in generic.slurm plz, they'll have to share the gpu otherwise.
+	export SBATCH_GRES=gpu:1
+fi
 
 for lang in $*; do
 	bicleaner_ai_model $lang
diff --git a/09.clean b/09.clean
index b9ec692..0f18d98 100755
--- a/09.clean
+++ b/09.clean
@@ -42,7 +42,7 @@ paste <(pigz -dc $FIXED) <(pigz -dc $SCORED) `# add bicleaner score as the 9th c
 	>(pigz -9c > $CLASSIFIED.$TMPSFX) \
 	>(wc -wl | sed 's/^ \+//' | tr -s ' ' '\t' > $STATS.$TMPSFX) \
 | awk -F"\t" "\$9 >= ${BICLEANER_THRESHOLD}" \
-| python3 $BITEXTOR/bitextor-elrc-filtering.py -c "url1,url2,seg1,seg2,checksum1,checksum2,bifixerhash,bifixerscore,bicleaner,collection" -s \
+| python3 bitextor-elrc-filtering.py -c "url1,url2,seg1,seg2,checksum1,checksum2,bifixerhash,bifixerscore,bicleaner,collection" -s \
 | LC_ALL=C sort -t$'\t' -k7,7 -k8,8nr \
 | pigz -9c \
 > $FILTERED.$TMPSFX \
diff --git a/10.reduce-classified.sh b/10.reduce-classified.sh
index e588fcd..f81f613 100755
--- a/10.reduce-classified.sh
+++ b/10.reduce-classified.sh
@@ -20,6 +20,7 @@ for collection in $collections; do
 	batch_lists+=( $batch_list )
 done
 
+mkdir -p $DATA_CLEANING
 output_file="${DATA_CLEANING}/${TARGET_LANG}-${lang}/${TARGET_LANG%~*}-${lang%~*}.${collection_hash}.classified.gz"
 
 if [ ! -f $output_file ] || ! $RETRY; then
diff --git a/11.reduce-filtered b/11.reduce-filtered
index 8caa69d..9086e3b 100755
--- a/11.reduce-filtered
+++ b/11.reduce-filtered
@@ -7,8 +7,8 @@ shift
 
 # Set up temp directory
 TMPSFX=${JOB_ID:-$$}
-#TMPDIR=$(mktemp -d --tmpdir=$(dirname $output_file) --suffix=_$TMPSFX)
-TMPDIR=$(mktemp -d --tmpdir=$SCRATCH --suffix=_$TMPSFX)
+TMPDIR=$(mktemp -d --tmpdir=$(dirname $output_file) --suffix=_$TMPSFX)
+#TMPDIR=$(mktemp -d --tmpdir=$SCRATCH --suffix=_$TMPSFX)
 test -d "$TMPDIR"
 trap "rm -rf $TMPDIR" EXIT
 
diff --git a/12.reduce-tmx b/12.reduce-tmx
index 659617f..cc07dcc 100755
--- a/12.reduce-tmx
+++ b/12.reduce-tmx
@@ -9,7 +9,7 @@ shift 3
 filtered_input=$@
 
 pigz -cd $filtered_input \
-| PYTHONPATH=$PREFIX/src/bitextor python3 ${SCRIPTS}/bitextor-buildTMX.py \
+| python3 bitextor-buildTMX.py \
 	--lang1 ${TARGET_LANG%~*} --lang2 ${lang} \
 	-c "url1,url2,seg1,seg2,checksum1,checksum2,bifixerhash,bifixerscore,bicleaner,collection,lengthratio,numTokensSL,numTokensTL" \
 	--no-delete-seg \
diff --git a/12.reduce-tmx-deferred b/12.reduce-tmx-deferred
index 284600f..d8baca5 100755
--- a/12.reduce-tmx-deferred
+++ b/12.reduce-tmx-deferred
@@ -8,7 +8,7 @@ shift 2
 filtered_input=$@
 
 pigz -cd $filtered_input \
-| PYTHONPATH=$PREFIX/src/bitextor python3 ${SCRIPTS}/bitextor-buildTMX.py \
+| python3 bitextor-buildTMX.py \
 	--lang1 ${TARGET_LANG%~*} --lang2 ${lang} \
 	-c "url1,url2,seg1,seg2,checksum1,checksum2,bifixerhash,bifixerscore,bicleaner,collection,lengthratio,numTokensSL,numTokensTL" \
 	--dedup "bifixerhash" \
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..1c27bb4
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,42 @@
+FROM bitextor/bitextor:8.3
+
+COPY cirrus-scripts /cirrus-scripts
+WORKDIR /cirrus-scripts
+
+RUN git submodule update --init env/src/preprocess/
+RUN mkdir /cirrus-scripts/env/src/paracrawl/build && \
+    cd /cirrus-scripts/env/src/paracrawl/build && \
+    cmake .. && \
+    make -j8 merge_sort && \
+    cp bin/merge_sort /usr/local/bin/
+
+COPY GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB /mkl-key.pub
+RUN mkdir /etc/apt/keyrings
+RUN gpg --dearmor -o /etc/apt/keyrings/mkl.gpg /mkl-key.pub && rm /mkl-key.pub
+RUN echo "deb [signed-by=/etc/apt/keyrings/mkl.gpg] https://apt.repos.intel.com/mkl all main" > /etc/apt/sources.list.d/intel-mkl.list
+RUN apt-get update && apt-get install -yy intel-mkl-64bit-2020.0-088
+
+# Compile Marian CPU from Bergamot
+RUN git clone https://github.com/browsermt/marian-dev /opt/marian-bergamot
+WORKDIR /opt/marian-bergamot
+RUN git checkout 2be8344fcf2776fb43a7376284067164674cbfaf
+WORKDIR /opt/marian-bergamot/build
+RUN cmake .. -DUSE_SENTENCEPIECE=on -DCOMPILE_CUDA=off -DUSE_FBGEMM=on
+RUN make -j24
+
+RUN pip uninstall -y tensorflow keras
+RUN pip install tensorflow-rocm==2.12.1.600
+
+RUN apt-get remove -yy intel-mkl-64bit-2020.0-088 build-essential && apt-get -yy autoremove && \
+    rm -Rf /opt/marian-bergamot/build/src && \
+    rm -Rf /opt/marian-bergamot/src && \
+    rm -Rf /opt/marian-bergamot/build/local && \
+    rm -Rf /opt/marian-bergamot/build/libmarian.a && \
+    strip /opt/marian-bergamot/build/marian* && \
+    strip /opt/marian-bergamot/build/spm*
+
+RUN apt-get install -y locales
+RUN locale-gen en_US.UTF-8
+ENV LANG=en_US.UTF-8 LANGUAGE=en_US:en LC_ALL=en_US.UTF-8
+
+ENTRYPOINT ["/bin/bash"]
diff --git a/bitextor-buildTMX.py b/bitextor-buildTMX.py
index d00d080..b85f408 100644
--- a/bitextor-buildTMX.py
+++ b/bitextor-buildTMX.py
@@ -39,8 +39,7 @@
 import unicodedata
 from xml.sax.saxutils import escape
 
-sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/utils")
-from utils.common import open_xz_or_gzip_or_plain, dummy_open
+from bitextor.utils.common import open_xz_or_gzip_or_plain, dummy_open
 
 def remove_control_characters(text):
     return "".join(ch for ch in text if unicodedata.category(ch)[0]!="C")
diff --git a/bitextor-elrc-filtering.py b/bitextor-elrc-filtering.py
new file mode 100644
index 0000000..38de231
--- /dev/null
+++ b/bitextor-elrc-filtering.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+
+#  This file is part of Bitextor.
+#
+#  Bitextor is free software: you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation, either version 3 of the License, or
+#  (at your option) any later version.
+#
+#  Bitextor is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with Bitextor.  If not, see <https://www.gnu.org/licenses/>.
+
+import sys
+import argparse
+
+oparser = argparse.ArgumentParser(
+    description="Script that reads takes a list of aligned segments, such as that produced by bitextor-alignsegments "
+                "script, and computes the basic ELRC quality metrics: number of tokens in lang1/lang2 and length "
+                "ratio.")
+oparser.add_argument('aligned_seg', metavar='FILE', nargs='?',
+                     help='File containing the set of aliged segments (if undefined, the script reads from the '
+                          'standard input)',
+                     default=None)
+oparser.add_argument("-s", "--stats", help="Print stats or just output the input", action="store_true",
+                     dest="isPrintingStats", default=False)
+oparser.add_argument("-f", "--filtering", help="Filter lines according to ELRC rules (printing stats required)",
+                     action="store_true", dest="isFiltering", default=False)
+oparser.add_argument("-c", "--columns",
+                     help="Name of columns of the input tab separated file split by comma. Default: url1,url2,seg1,"
+                          "seg2,hunalign,bicleaner",
+                     default="url1,url2,seg1,seg2,hunalign,bicleaner")
+
+options = oparser.parse_args()
+
+if options.aligned_seg is not None:
+    reader = open(options.aligned_seg, "r")
+else:
+    reader = sys.stdin
+
+columns = options.columns.split(',')
+
+for i in reader:
+    fields = i.split("\t")
+    fields[-1] = fields[-1].strip()
+    fieldsdict = dict()
+    extracolumns = []
+
+    for field, column in zip(fields, columns):
+        fieldsdict[column] = field
+    if options.isPrintingStats:
+        extracolumns = ["lengthratio", "numTokensSL", "numTokensTL"]
+        if len(fieldsdict["seg2"]) == 0:
+            lengthRatio = 0
+        else:
+            lengthRatio = len(fieldsdict["seg1"]) * 1.0 / len(fieldsdict["seg2"])
+        numTokensSL = len(fieldsdict["seg1"].split(
+            ' '))  # This is not the way this should be counted, we need to tokenize better first
+        numTokensTL = len(fieldsdict["seg2"].split(
+            ' '))  # This is not the way this should be counted, we need to tokenize better first
+        fieldsdict["lengthratio"] = str(lengthRatio)
+        fieldsdict["numTokensSL"] = str(numTokensSL)
+        fieldsdict["numTokensTL"] = str(numTokensTL)
+        if options.isFiltering:
+            if "bicleaner" in fieldsdict and fieldsdict["bicleaner"].strip() != '':
+                fieldsdict["bicleaner"] = str(round(float(fieldsdict["bicleaner"]), 4))
+            if int(fieldsdict["numTokensSL"]) >= 200 or int(fieldsdict["numTokensTL"]) >= 200 or fieldsdict[
+                "seg1"].strip() == '' or fieldsdict["seg2"].strip() == '' or float(
+                    fieldsdict["lengthratio"]) >= 6 or float(fieldsdict["lengthratio"]) <= 0.1666:
+                continue
+    fieldstoprint = []
+    for column in columns + extracolumns:
+        fieldstoprint.append(fieldsdict[column])
+    print("\t".join(fieldstoprint))
diff --git a/cirrus-scripts.def b/cirrus-scripts.def
new file mode 100644
index 0000000..761f8c8
--- /dev/null
+++ b/cirrus-scripts.def
@@ -0,0 +1,2 @@
+bootstrap: docker-daemon
+from: cirrus-scripts:latest
diff --git a/config.d/10.lumi.sh b/config.d/10.lumi.sh
new file mode 100644
index 0000000..84c34f7
--- /dev/null
+++ b/config.d/10.lumi.sh
@@ -0,0 +1,58 @@
+if [[ $(hostname -A) =~ "uan"[0-9][0-9] ]]; then
+	PROJ_DIR=/projappl/project_465000498/zaragoza
+	SCRATCH_DIR=/scratch/project_465000498/zaragoza/cirrus-scripts-data
+
+	# Override binaries called by env variable
+	# they should be available in PATH for lumi
+	export DOCALIGN=docalign
+	export DOCJOIN=docjoin
+	export BLEUALIGN=bleualign_cpp
+    export TOKENISER="/home/docker/bitextor/third_party/preprocess/moses/tokenizer/tokenizer.perl"
+
+	function bicleaner_model {
+		local lang=$1
+
+		export BIFIXER_PARAMS="--aggressive_dedup -q"
+		export BICLEANER=bicleaner-classify-lite
+		export BICLEANER_THRESHOLD="0.5"
+		export BICLEANER_PARAMS="-q" # --score_only is always supplied
+
+		# Default path: here instead of in config.csd3 because path depends on $lang and the exceptions
+		# above don't follow this pattern very well, which is why it's not in the 09.clean code itself.
+		export BICLEANER_MODEL=$PROJ_DIR/bicleaner-models/${TARGET_LANG%~*}-${lang%~*}/${TARGET_LANG%~*}-${lang%~*}.yaml
+	}
+
+	function bicleaner_ai_model {
+		export HUGGINGFACE_HUB_CACHE="/projappl/project_465000498/.cache/huggingface/hub"
+		export BIFIXER_PARAMS="--aggressive_dedup -q"
+		export BICLEANER=bicleaner-ai-classify
+		export BICLEANER_THRESHOLD="0.5"
+		export BICLEANER_PARAMS="-q --batch_size 64 --block_size 100000"
+		export BICLEANER_MODEL=bitextor/bicleaner-ai-full-${TARGET_LANG%~*}-${lang%~*}
+	}
+
+	export DATA_CLEANING=$SCRATCH_DIR/data/clean
+	export COLLECTION_ROOT="$SCRATCH_DIR/data"
+	declare -A COLLECTIONS=(
+		["sample3"]="$COLLECTION_ROOT/output_wide15_filtered_sample3"
+		["output_wide15_filtered_sample12"]="$COLLECTION_ROOT/output_wide15_filtered_sample12"
+		["output_CommonCrawl40_filtered_sample"]="$COLLECTION_ROOT/output_CommonCrawl40_filtered_sample"
+		["wide16"]="/scratch/project_465000498/hplt/data/wide00016"
+	)
+
+	# Where jobs should be executed. Values used in functions.sh/schedule.
+	export SBATCH_ACCOUNT=project_465000498
+	#TODO should investigate if this variable has to be set depending on the step
+	# small partition is allocatable by resources
+	# standard partition is allocatable by node
+	export SBATCH_PARTITION=debug
+	export SBATCH_MEM_PER_CPU=1750 # Maximum recommended size for LUMI
+	export SLURM_LOGS=$SCRATCH_DIR/logs
+	export TASKS_PER_BATCH=${TPB:-1}
+
+	# How many resources should be allocated per slurm job. Defaults
+	# to as many as necessary to process all tasks in parallel. Individual
+	# .slurm job definitions define how many cpus should be allocated per
+	# task.
+	export SLURM_TASKS_PER_NODE=${TPN:-1}
+fi
diff --git a/env/init.d/lumi.sh b/env/init.d/lumi.sh
new file mode 100644
index 0000000..bd37dc7
--- /dev/null
+++ b/env/init.d/lumi.sh
@@ -0,0 +1,24 @@
+if [[ $(hostname -A) =~ "uan"[0-9][0-9] ]]; then
+	module purge
+	module load LUMI/23.09
+    #module load Boost/1.81.0-cpeCray-23.03 # This must be changed if boost version changes in env/setup.d/paracrawl
+	#module purge && module load \
+	#	PrgEnv-cray/8.3.3 \
+	#	craype-x86-milan \
+	#	cray-python/3.9.12.1 \
+	#	perftools-base/22.12.0
+
+	# Recommended options in lumi docs: https://docs.lumi-supercomputer.eu/development/compiling/prgenv/#wrapper-and-compiler-options
+	# for cray compiler
+	# gnu compiler in lumi throws a warning when loading it, saying it is not recommended
+	# according to https://docs.lumi-supercomputer.eu/development/compiling/prgenv/#choosing-the-target-architecture
+	# instead of specifying -march, craype-x86-milan module has to be loaded for LUMI-C partitions
+	export CFLAGS="-O2 -funroll-loops -ffast-math"
+	export CXXFLAGS="-O2 -funroll-loops -ffast-math"
+	export IS_LUMI=true
+
+	#export PATH="/pfs/lustrep1/projappl/project_462000252/zaragoza/test_env/conda_env/bin:$PATH"
+	#export PATH="/pfs/lustrep1/projappl/project_462000252/zaragoza/bitextor-8.1/bin:$PATH"
+
+	export SQUEUE_FORMAT="%.18i %.9P %.20j %.8u %.2t %.10M %.6D %R"
+fi
diff --git a/env/setup.d/bifixer b/env/setup.d/bifixer
index 2ce9488..2e5d52f 100644
--- a/env/setup.d/bifixer
+++ b/env/setup.d/bifixer
@@ -9,8 +9,5 @@ depends() {
 }
 
 install() {
-	pip3 install -r bifixer/requirements.txt
-	echo "python3 $PREFIX/src/bifixer/bifixer/bifixer.py \"\$@\"" \
-		> $PREFIX/bin/bifixer
-	chmod +x $PREFIX/bin/bifixer
+	pip install "bifixer==0.8.8"
 }
diff --git a/env/setup.d/giashard b/env/setup.d/giashard
index 5a3fb5d..1003ffd 100644
--- a/env/setup.d/giashard
+++ b/env/setup.d/giashard
@@ -4,8 +4,12 @@ is-installed() {
 	test -x $GOPATH/bin/giashard
 }
 
+depends() {
+	echo go
+}
+
 install() {
-	go get -u github.com/paracrawl/giashard/cmd/giashard
+	go install github.com/paracrawl/giashard/cmd/giashard@latest
 }
 
 
diff --git a/env/setup.d/go b/env/setup.d/go
new file mode 100644
index 0000000..bdfced6
--- /dev/null
+++ b/env/setup.d/go
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+is-installed() {
+	test -x $PREFIX/go/bin/go
+}
+
+install() {
+	pushd ..
+	wget -O go.linux-amd64.tgz https://go.dev/dl/go1.20.3.linux-amd64.tar.gz
+	tar xvf go.linux-amd64.tgz
+	rm go.linux-amd64.tgz
+	popd
+}
diff --git a/env/setup.d/kenlm b/env/setup.d/kenlm
index a791d86..c5c356d 100644
--- a/env/setup.d/kenlm
+++ b/env/setup.d/kenlm
@@ -17,6 +17,6 @@ install() {
 	cmake .. -DKENLM_MAX_ORDER=7 -DCMAKE_INSTALL_PREFIX:PATH=$PREFIX
 	make -j8 install
 
-	pip3 install .. --install-option="--max_order 7"
+	pip3 install .. --config-settings="--build-option=--max_order=7"
 	popd
 }
diff --git a/env/setup.d/paracrawl b/env/setup.d/paracrawl
new file mode 100644
index 0000000..1d609fe
--- /dev/null
+++ b/env/setup.d/paracrawl
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+is-installed() {
+	test -x $PREFIX/bin/batch_dedupe
+}
+
+depends() {
+	if [ "$IS_LUMI" == "true" ]; then
+		echo env
+	else
+		echo boost preprocess
+	fi
+}
+
+install() {
+	pushd paracrawl
+
+	mkdir -p build && cd build
+
+	if [ "$IS_LUMI" == "true" ]; then
+		module load Boost/1.81.0-cpeCray-23.03 ICU
+	fi
+
+	cmake .. \
+		-DCMAKE_BUILD_TYPE=Release
+	make -j8
+	cp lib/libparacrawl_util.a $PREFIX/lib/
+	cp bin/* $PREFIX/bin/
+	cd .. && rm -r build
+
+	popd
+}
diff --git a/env/src/bifixer b/env/src/bifixer
index 1ca2f1b..7367178 160000
--- a/env/src/bifixer
+++ b/env/src/bifixer
@@ -1 +1 @@
-Subproject commit 1ca2f1bd7f844a13110471a6d03b5264627a1626
+Subproject commit 7367178785a75d1d92d23b43b6a70b1fe5dc3a58
diff --git a/env/src/bitextor b/env/src/bitextor
index eaa85a6..845f81e 160000
--- a/env/src/bitextor
+++ b/env/src/bitextor
@@ -1 +1 @@
-Subproject commit eaa85a64b344b1673d8a19440fcea41c56f638dc
+Subproject commit 845f81e94b86d6864d87144042ea1c5bab920dd6
diff --git a/generic.slurm b/generic.slurm
index 3c591b6..a2b7632 100755
--- a/generic.slurm
+++ b/generic.slurm
@@ -60,10 +60,16 @@ fi
 GROUP_END=$(( $TASKS_PER_BATCH * $SLURM_ARRAY_TASK_ID ))
 GROUP_START=$(( $GROUP_END - $TASKS_PER_BATCH ))
 
-echo "Processing $GROUP_START to $GROUP_END with $THREADS threads"
+echo "Processing $GROUP_START to $GROUP_END in ${SLURM_TASKS_PER_NODE} parallel jobs each with $THREADS threads"
 
 awk "NR > $GROUP_START && NR <= $GROUP_END" $BATCHES \
-| parallel \
+| singularity exec \
+    -B $(pwd -P) \
+    -B $COLLECTION_ROOT \
+    --pwd $(pwd -P) \
+    cirrus-scripts.sif \
+parallel \
+	--will-cite \
 	-j${SLURM_TASKS_PER_NODE} \
 	--line-buffer \
 	--colsep $'\t' \
diff --git a/models/translate-bergamot.sh b/models/translate-bergamot.sh
new file mode 100755
index 0000000..aec8778
--- /dev/null
+++ b/models/translate-bergamot.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+set -euo pipefail
+
+MARIAN=/opt/marian-bergamot/build
+MODEL=$(dirname $(realpath -es ${BASH_SOURCE[0]}))/model
+
+foldfilter -s -w 500 \
+$MARIAN/marian-decoder \
+	-c $MODEL/config.yml \
+	--cpu-threads $THREADS \
+	--quiet-translation \
+	--max-length-crop
diff --git a/pipeline.sh b/pipeline.sh
index b83b1d3..e022fc5 100755
--- a/pipeline.sh
+++ b/pipeline.sh
@@ -78,14 +78,14 @@ main() {
 
 	local collections=($(./collections.sh -gwb))
 
-	case $lang in
-		ca|eu|gl|oc)
-			export TARGET_LANG=es
-			;;
-		en|ga|hr|is|nn|no|nb)
-			collections=(${collections[@]} gwb)
-			;;
-	esac
+	#case $lang in
+	#	ca|eu|gl|oc)
+	#		export TARGET_LANG=es
+	#		;;
+	#	en|ga|hr|is|nn|no|nb)
+	#		collections=(${collections[@]} gwb)
+	#		;;
+	#esac
 
 	for collection in ${collections[@]}; do
 		job_id=$(schedule_batch_jobs $collection $lang)