Skip to content

Add sequence reconstruction #31

@giulioisac

Description

@giulioisac

Add something along this to sonia:

from sonnia import Sonia
from sonnia.utils import nt2aa,gene_to_num_str
import numpy as np
def reconstruct_sequence(sonia_model:Sonia,
                         ntjunction:str,
                         V:str,
                         J:str)->str:
    """Return full sequence and CDR3 anchor position"""
    try:
        V = sonia_model.pgen_model.V_mask_mapping[gene_to_num_str(V, "V")][0]
        J = sonia_model.pgen_model.J_mask_mapping[gene_to_num_str(J, "J")][0]
    except:
        return np.nan
    fullV_gene = sonia_model.genomic_data.genV[V][2]
    endV = -len(sonia_model.genomic_data.genV[V][1])
    begin = fullV_gene[: endV]
    fullJ_gene = sonia_model.genomic_data.genJ[J][2]
    beginJ = len(sonia_model.genomic_data.genJ[J][1])
    end = fullJ_gene[beginJ:]
    ntseq = begin + ntjunction + end
    return ntseq

# generate sequences using the model
model=Sonia(ppost_model='human_B_kappa')
seqs=model.generate_sequences_post(int(10),nucleotide=True)

# reconstruct sequences
reconstructed_seqs=[reconstruct_sequence(model,seq[3],seq[1],seq[2]) for seq in seqs]
reconstructed_seqs_aa=[nt2aa(seq) for seq in reconstructed_seqs]

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions