diff --git a/tools/semibin/bin.xml b/tools/semibin/bin.xml
index 3f7886b8c94..65ca5d06aa4 100644
--- a/tools/semibin/bin.xml
+++ b/tools/semibin/bin.xml
@@ -11,11 +11,15 @@
@@ -44,7 +85,7 @@ SemiBin2 bin
-
+
@@ -78,7 +119,7 @@ SemiBin2 bin
-
+
@@ -141,7 +182,7 @@ SemiBin2 bin
-
+
@@ -172,4 +213,4 @@ Outputs
]]>
-
+
\ No newline at end of file
diff --git a/tools/semibin/concatenate_fasta.xml b/tools/semibin/concatenate_fasta.xml
index a4624f4b60a..d09f70081d5 100644
--- a/tools/semibin/concatenate_fasta.xml
+++ b/tools/semibin/concatenate_fasta.xml
@@ -63,4 +63,4 @@ Outputs
]]>
-
+
\ No newline at end of file
diff --git a/tools/semibin/convert.py b/tools/semibin/convert.py
new file mode 100644
index 00000000000..071783a888f
--- /dev/null
+++ b/tools/semibin/convert.py
@@ -0,0 +1,94 @@
+import os
+import pickle
+import sys
+
+import torch
+from safetensors.torch import load_file, save_file
+
+# -------------------------------
+# Metadata encoding/decoding
+# -------------------------------
+
+
+def encode_metadata(obj):
+ """
+ Recursively encode Python objects into tensors:
+ - torch.Tensor → leave as-is
+ - dict → recursively encode
+ - list/tuple → convert to dict {0: v0, 1: v1, ...} and encode recursively
+ - other → pickle into uint8 tensor
+ """
+ if isinstance(obj, torch.Tensor):
+ return obj
+ elif isinstance(obj, dict):
+ return {k: encode_metadata(v) for k, v in obj.items()}
+ elif isinstance(obj, (list, tuple)):
+ return {str(i): encode_metadata(v) for i, v in enumerate(obj)}
+ else:
+ data = pickle.dumps(obj)
+ return torch.tensor(list(data), dtype=torch.uint8)
+
+
+def decode_metadata(obj):
+ """
+ Recursively decode tensors back into Python objects.
+ """
+ if isinstance(obj, torch.Tensor):
+ if obj.dtype == torch.uint8:
+ data = bytes(obj.tolist())
+ return pickle.loads(data)
+ return obj
+ elif isinstance(obj, dict):
+ # Convert dicts with all digit keys back to lists
+ if all(k.isdigit() for k in obj.keys()):
+ return [decode_metadata(obj[k]) for k in sorted(obj.keys(), key=int)]
+ else:
+ return {k: decode_metadata(v) for k, v in obj.items()}
+ else:
+ return obj
+
+# -------------------------------
+# Flatten/unflatten for SafeTensors
+# -------------------------------
+
+
+def flatten_dict(d, parent_key='', sep='/'):
+ items = {}
+ for k, v in d.items():
+ new_key = f"{parent_key}{sep}{k}" if parent_key else k
+ if isinstance(v, dict):
+ items.update(flatten_dict(v, new_key, sep=sep))
+ else:
+ items[new_key] = v
+ return items
+
+
+def unflatten_dict(d, sep='/'):
+ result = {}
+ for k, v in d.items():
+ keys = k.split(sep)
+ target = result
+ for key in keys[:-1]:
+ target = target.setdefault(key, {})
+ target[keys[-1]] = v
+ return result
+
+
+# -------------------------------
+# Save .pt as SafeTensors
+# -------------------------------
+
+if __name__ == "__main__":
+ FILE_PATH = sys.argv[1]
+ if FILE_PATH.endswith('.pt'):
+ checkpoint = torch.load("model.pt", map_location="cpu")
+ encoded = encode_metadata(checkpoint)
+ flat = flatten_dict(encoded)
+ save_file(flat, os.path.join(os.path.dirname(sys.argv[1]), "model.safetensors"))
+ print("Saved restorable SafeTensors file!")
+ else:
+ loaded_flat = load_file("model_restorable.safetensors")
+ loaded_nested = unflatten_dict(loaded_flat)
+ restored_checkpoint = decode_metadata(loaded_nested)
+ torch.save(restored_checkpoint, os.path.join(os.path.dirname(sys.argv[1]), "model.pt"))
+ print("Saved restored checkpoint as model_restored.pt!")
diff --git a/tools/semibin/generate_cannot_links.xml b/tools/semibin/generate_cannot_links.xml
index 209ed24013f..3d1ff31d9f5 100644
--- a/tools/semibin/generate_cannot_links.xml
+++ b/tools/semibin/generate_cannot_links.xml
@@ -30,7 +30,6 @@ SemiBin2
#if $ml_threshold:
--ml-threshold $ml_threshold
#end if
- --cannot-name 'cannot'
--threads \${GALAXY_SLOTS:-1}
--processes \${GALAXY_SLOTS:-1}
]]>
@@ -136,4 +135,4 @@ Outputs
]]>
-
+
\ No newline at end of file
diff --git a/tools/semibin/generate_sequence_features.xml b/tools/semibin/generate_sequence_features.xml
index 2600ddf0f91..b5c4005c875 100644
--- a/tools/semibin/generate_sequence_features.xml
+++ b/tools/semibin/generate_sequence_features.xml
@@ -433,4 +433,4 @@ Outputs
]]>
-
+
\ No newline at end of file
diff --git a/tools/semibin/macros.xml b/tools/semibin/macros.xml
index 17f22410405..657fa95340a 100644
--- a/tools/semibin/macros.xml
+++ b/tools/semibin/macros.xml
@@ -1,8 +1,8 @@
- 2.1.0
- 1
- 21.01
+ 2.2.0
+ 0
+ 25.0
semibin
@@ -11,6 +11,7 @@
semibin
+ safetensors
@@ -462,7 +463,7 @@ ln -s '$e' '${identifier}.bam' &&
-
+
diff --git a/tools/semibin/semibin.xml b/tools/semibin/semibin.xml
index 1c36ea5ff71..9cbb559a8cd 100644
--- a/tools/semibin/semibin.xml
+++ b/tools/semibin/semibin.xml
@@ -75,8 +75,27 @@ SemiBin2
--compression none
--threads \${GALAXY_SLOTS:-1}
--processes \${GALAXY_SLOTS:-1}
+ 2> 'stderr.txt' || true
&&
-echo "output" &&
+
+if grep -q "is empty or misformatted" 'stderr.txt'; then
+ mkdir 'output/output_bins' 'output/output_recluster_bins' 'output/output_prerecluster_bins';
+ touch 'output/output_bins/empty_bin.fa' ;
+ touch 'output/output_recluster_bins/empty_bin.fa' ;
+ touch 'output/output_prerecluster_bins/empty_bin.fa' ;
+fi
+&&
+
+if grep -q "Edge weights must not be NaN values" 'stderr.txt'; then
+ mkdir 'output/output_bins' 'output/output_recluster_bins' 'output/output_prerecluster_bins';
+ touch 'output/output_bins/empty_bin.fa' ;
+ touch 'output/output_recluster_bins/empty_bin.fa' ;
+ touch 'output/output_prerecluster_bins/empty_bin.fa' ;
+fi
+
+&& cat 'stderr.txt' >&2
+&& rm 'stderr.txt'
+&& echo "output" &&
ls output
]]>
diff --git a/tools/semibin/test-data/model.h5 b/tools/semibin/test-data/model.h5
deleted file mode 100644
index e2b0c4d7da2..00000000000
Binary files a/tools/semibin/test-data/model.h5 and /dev/null differ
diff --git a/tools/semibin/train.xml b/tools/semibin/train.xml
index c392442322f..86b5628f76a 100644
--- a/tools/semibin/train.xml
+++ b/tools/semibin/train.xml
@@ -55,6 +55,9 @@ SemiBin2 train_semi
--ratio $min_len.ratio
#end if
--orf-finder '$orf_finder'
+
+&& python '$__tool_directory__/convert.py' 'output/model.pt'
+&& rm 'output/model.pt'
]]>
@@ -101,9 +104,53 @@ SemiBin2 train_semi
-