diff --git a/src/boltz/data/feature/featurizerv2.py b/src/boltz/data/feature/featurizerv2.py index 2fcb30713..454b28daf 100644 --- a/src/boltz/data/feature/featurizerv2.py +++ b/src/boltz/data/feature/featurizerv2.py @@ -1800,6 +1800,7 @@ def process_template_features( for template in templates: offset = template.template_st - template.query_st + query_templated_segments = set(range(template.query_st, template.query_en)) # Get query and template tokens to map residues query_tokens = data.tokens @@ -1810,7 +1811,7 @@ def process_template_features( # Get the template tokens at the query residues chain_id = tmpl_chain_name_to_asym_id[template.template_chain] toks = template_tokens[template_tokens["asym_id"] == chain_id] - toks = [t for t in toks if t["res_idx"] - offset in q_indices] + toks = [t for t in toks if t["res_idx"] - offset in query_templated_segments] for t in toks: q_idx = q_indices[t["res_idx"] - offset] row_tokens.append( diff --git a/src/boltz/data/parse/schema.py b/src/boltz/data/parse/schema.py index ecff892c3..cd39f534a 100644 --- a/src/boltz/data/parse/schema.py +++ b/src/boltz/data/parse/schema.py @@ -523,17 +523,25 @@ def get_local_alignments(query: str, template: str) -> list[Alignment]: """ aligner = Align.PairwiseAligner(scoring="blastp") aligner.mode = "local" - aligner.open_gap_score = -1000 - aligner.extend_gap_score = -1000 + # Biopython blastp default gap score + aligner.open_gap_score = -12.0 + aligner.extend_gap_score = -1.0 alignments = [] - for result in aligner.align(query, template): - coordinates = result.coordinates + # Take first alignment as the "best" alignment + result = aligner.align(query, template)[0] + coordinates = result.coordinates + for ipos in range(0, coordinates.shape[1] - 1): + query_st, query_en = coordinates[0, ipos:ipos+2] + template_st, template_en = coordinates[1, ipos:ipos+2] + if query_st == query_en or template_st == template_en: + # insertion or deltion, skip the chunk + continue alignment = Alignment( - query_st=int(coordinates[0][0]), - query_en=int(coordinates[0][1]), - template_st=int(coordinates[1][0]), - template_en=int(coordinates[1][1]), + query_st=int(query_st), + query_en=int(query_en), + template_st=int(template_st), + template_en=int(template_en) ) alignments.append(alignment)