36
36
37
37
import os
38
38
39
+ import numpy as np
40
+ from Bio .PDB import (
41
+ PDBParser ,
42
+ Selection ,
43
+ Superimposer ,
44
+ PDBIO ,
45
+ Atom ,
46
+ Residue ,
47
+ Structure ,
48
+ )
49
+
50
+ from dlpacker .utils import (
51
+ DLPModel ,
52
+ InputBoxReader ,
53
+ THE20 ,
54
+ SCH_ATOMS ,
55
+ BB_ATOMS ,
56
+ SIDE_CHAINS ,
57
+ BOX_SIZE ,
58
+ )
59
+
39
60
dir_path = os .path .dirname (os .path .realpath (__file__ ))
40
61
DEFAULT_REFERENCE_PDB = os .path .join (dir_path , 'data' , 'reference.pdb' )
41
62
DEFAULT_LIBRARY_NPZ = os .path .join (dir_path , 'data' , 'library.npz' )
42
63
DEFAULT_CHARGES_RTP = os .path .join (dir_path , 'data' , 'charges.rtp' )
43
64
44
- CUSTOMIZED_WEIGHTS_DIR = os .getenv ('DLPACKER_PRETRAINED_WEIGHT' )
65
+
66
+ CUSTOMIZED_WEIGHTS_DIR = os .getenv ('DLPACKER_PRETRAINED_WEIGHT' )
45
67
if CUSTOMIZED_WEIGHTS_DIR :
46
68
if not os .path .exists (CUSTOMIZED_WEIGHTS_DIR ):
47
69
os .makedirs (CUSTOMIZED_WEIGHTS_DIR )
48
70
DEFAULT_WEIGHTS = os .path .join (CUSTOMIZED_WEIGHTS_DIR , 'DLPacker_weights' )
49
71
else :
50
72
DEFAULT_WEIGHTS = os .path .join (dir_path , 'data' , 'DLPacker_weights' )
51
73
74
+
52
75
if not os .path .exists (f'{ DEFAULT_WEIGHTS } .h5' ):
53
76
from dlpacker .utils import unzip_weights
54
77
58
81
output_dir = os .path .dirname (DEFAULT_WEIGHTS ),
59
82
)
60
83
61
- import numpy as np
62
- from Bio .PDB import (
63
- PDBParser ,
64
- Selection ,
65
- Superimposer ,
66
- PDBIO ,
67
- Atom ,
68
- Residue ,
69
- Structure ,
70
- )
71
- from dlpacker .utils import (
72
- DLPModel ,
73
- InputBoxReader ,
74
- DataGenerator ,
75
- THE20 ,
76
- SCH_ATOMS ,
77
- BB_ATOMS ,
78
- SIDE_CHAINS ,
79
- BOX_SIZE ,
80
- )
81
-
82
84
83
85
class DLPacker :
84
86
# This is the meat of our code.
@@ -126,9 +128,7 @@ def __init__(
126
128
127
129
self .input_reader = input_reader
128
130
if not self .input_reader :
129
- self .input_reader = InputBoxReader (
130
- charges_filename = charges_filename
131
- )
131
+ self .input_reader = InputBoxReader (charges_filename = charges_filename )
132
132
133
133
def _load_library (self ):
134
134
# Loads library of rotamers.
@@ -137,9 +137,7 @@ def _load_library(self):
137
137
self .library = np .load (self .lib_name , allow_pickle = True )
138
138
self .library = self .library ['arr_0' ].item ()
139
139
for k in self .library ['grids' ]:
140
- self .library ['grids' ][k ] = self .library ['grids' ][k ].astype (
141
- np .float32
142
- )
140
+ self .library ['grids' ][k ] = self .library ['grids' ][k ].astype (np .float32 )
143
141
144
142
def _read_structures (self ):
145
143
# Reads in main PDB structure and reference structure.
@@ -218,13 +216,9 @@ def _remove_altloc(self, structure: Structure):
218
216
disordered_list .append (atom )
219
217
# sometimes one of the altlocs just does not exist!
220
218
try :
221
- selected_list .append (
222
- atom .disordered_get (self .altloc [0 ])
223
- )
219
+ selected_list .append (atom .disordered_get (self .altloc [0 ]))
224
220
except :
225
- selected_list .append (
226
- atom .disordered_get (self .altloc [1 ])
227
- )
221
+ selected_list .append (atom .disordered_get (self .altloc [1 ]))
228
222
selected_list [- 1 ].set_altloc (' ' )
229
223
selected_list [- 1 ].disordered_flag = 0
230
224
@@ -249,11 +243,7 @@ def _align_residue(self, residue: Residue):
249
243
# In order to generate input box properly
250
244
# we first need to align selected residue
251
245
# to reference atoms from reference.pdb
252
- if (
253
- not residue .has_id ('N' )
254
- or not residue .has_id ('C' )
255
- or not residue .has_id ('CA' )
256
- ):
246
+ if not residue .has_id ('N' ) or not residue .has_id ('C' ) or not residue .has_id ('CA' ):
257
247
print (
258
248
'Missing backbone atoms: residue' ,
259
249
self ._get_residue_tuple (residue ),
@@ -265,9 +255,7 @@ def _align_residue(self, residue: Residue):
265
255
self .sup .apply (self ._get_parent_structure (residue ))
266
256
return True
267
257
268
- def _align_structures (
269
- self , structure_a : Structure , structure_b : Structure
270
- ):
258
+ def _align_structures (self , structure_a : Structure , structure_b : Structure ):
271
259
# Aligns two structures using backbone atoms
272
260
bb_a , bb_b = [], []
273
261
residues_a = Selection .unfold_entities (structure_a , 'R' )
@@ -297,20 +285,11 @@ def _get_box_atoms(self, residue: Residue):
297
285
b = self .box_size + 1 # one angstrom offset to include more atoms
298
286
for a in self ._get_parent_structure (residue ).get_atoms ():
299
287
xyz = a .coord
300
- if (
301
- xyz [0 ] < b
302
- and xyz [0 ] > - b
303
- and xyz [1 ] < b
304
- and xyz [1 ] > - b
305
- and xyz [2 ] < b
306
- and xyz [2 ] > - b
307
- ):
288
+ if xyz [0 ] < b and xyz [0 ] > - b and xyz [1 ] < b and xyz [1 ] > - b and xyz [2 ] < b and xyz [2 ] > - b :
308
289
atoms .append (a )
309
290
return atoms
310
291
311
- def _genetare_input_box (
312
- self , residue : Residue , allow_missing_atoms : bool = False
313
- ):
292
+ def _genetare_input_box (self , residue : Residue , allow_missing_atoms : bool = False ):
314
293
# Takes a residue and generates a special
315
294
# dictionary that is then given to InputReader,
316
295
# which uses this dictionary to generate the actual input
@@ -400,42 +379,29 @@ def _get_sorted_residues(
400
379
for residue in Selection .unfold_entities (structure , 'R' ):
401
380
if not targets or self ._get_residue_tuple (residue ) in targets :
402
381
if residue .get_resname () in THE20 :
403
- if (
404
- residue .has_id ('CA' )
405
- and residue .has_id ('C' )
406
- and residue .has_id ('N' )
407
- ):
382
+ if residue .has_id ('CA' ) and residue .has_id ('C' ) and residue .has_id ('N' ):
408
383
atoms = self ._get_box_atoms (residue )
409
384
tuples .append ((residue , len (atoms )))
410
385
tuples .sort (key = lambda x : - x [1 ])
411
386
412
387
elif method == 'score' :
413
388
tuples = []
414
- for i , residue in enumerate (
415
- Selection .unfold_entities (structure , 'R' )
416
- ):
389
+ for i , residue in enumerate (Selection .unfold_entities (structure , 'R' )):
417
390
if not targets or self ._get_residue_tuple (residue ) in targets :
418
- if (
419
- residue .get_resname () in THE20
420
- and residue .get_resname () != 'GLY'
421
- ):
391
+ if residue .get_resname () in THE20 and residue .get_resname () != 'GLY' :
422
392
name = self ._get_residue_tuple (residue )
423
393
print ("Scoring residue:" , i , name , end = '\r ' )
424
394
425
395
r , s , n = self ._get_residue_tuple (residue )
426
396
box = self ._genetare_input_box (residue , True )
427
397
428
398
if not box :
429
- print (
430
- "\n Skipping residue:" , i , residue .get_resname ()
431
- )
399
+ print ("\n Skipping residue:" , i , residue .get_resname ())
432
400
continue
433
401
434
402
pred = self ._get_prediction (box , n )
435
403
scores = np .abs (self .library ['grids' ][n ] - pred )
436
- scores = np .mean (
437
- scores , axis = tuple (range (1 , pred .ndim + 1 ))
438
- )
404
+ scores = np .mean (scores , axis = tuple (range (1 , pred .ndim + 1 )))
439
405
best_ind = np .argmin (scores )
440
406
best_score = np .min (scores )
441
407
tuples .append ((residue , best_score / SCH_ATOMS [n ]))
@@ -512,9 +478,7 @@ def mutate_sequence(self, target: tuple, new_label: str):
512
478
# and mutates it in the sequence to new one given by new_label argument
513
479
# IMPORTANT: this function just renames a residue without
514
480
# doing anything else at all
515
- assert (
516
- new_label in THE20
517
- ), 'Only mutations to canonical 20 amino acids are supported!'
481
+ assert new_label in THE20 , 'Only mutations to canonical 20 amino acids are supported!'
518
482
for residue in Selection .unfold_entities (self .structure , 'R' ):
519
483
if target == self ._get_residue_tuple (residue ):
520
484
residue .resname = new_label
@@ -558,9 +522,7 @@ def reconstruct_residue(self, residue: Residue, refine_only: bool = False):
558
522
residue [name ].coord = best_match [i ]
559
523
else :
560
524
# most values are dummy here
561
- new_atom = Atom .Atom (
562
- name , best_match [i ], 0 , 1 , ' ' , name , 2 , element = name [:1 ]
563
- )
525
+ new_atom = Atom .Atom (name , best_match [i ], 0 , 1 , ' ' , name , 2 , element = name [:1 ])
564
526
residue .add (new_atom )
565
527
566
528
def reconstruct_protein (
@@ -587,21 +549,14 @@ def reconstruct_protein(
587
549
if not self .reconstructed :
588
550
self .reconstructed = self .structure .copy ()
589
551
else :
590
- print (
591
- 'Reconstructed structure already exists, something might be wrong!'
592
- )
552
+ print ('Reconstructed structure already exists, something might be wrong!' )
593
553
if not refine_only :
594
554
self ._remove_sidechains (self .reconstructed )
595
555
596
556
# run reconstruction for all residues in selected order
597
- sorted_residues = self ._get_sorted_residues (
598
- self .reconstructed , method = order
599
- )
557
+ sorted_residues = self ._get_sorted_residues (self .reconstructed , method = order )
600
558
for i , residue in enumerate (sorted_residues ):
601
- if (
602
- residue .get_resname () in THE20
603
- and residue .get_resname () != 'GLY'
604
- ):
559
+ if residue .get_resname () in THE20 and residue .get_resname () != 'GLY' :
605
560
name = self ._get_residue_tuple (residue )
606
561
print ("Working on residue:" , i , name , end = '\r ' )
607
562
self .reconstruct_residue (residue , refine_only )
@@ -636,9 +591,7 @@ def reconstruct_region(
636
591
if not self .reconstructed :
637
592
self .reconstructed = self .structure .copy ()
638
593
else :
639
- print (
640
- 'Reconstructed structure already exists, something might be wrong!'
641
- )
594
+ print ('Reconstructed structure already exists, something might be wrong!' )
642
595
643
596
# remove side chains for target amino acids is refine_only is False
644
597
if not refine_only :
@@ -647,15 +600,10 @@ def reconstruct_region(
647
600
self ._remove_sidechain (residue )
648
601
649
602
# run reconstruction for specified list of residues
650
- sorted_residues = self ._get_sorted_residues (
651
- self .reconstructed , targets , method = order
652
- )
603
+ sorted_residues = self ._get_sorted_residues (self .reconstructed , targets , method = order )
653
604
for i , residue in enumerate (sorted_residues ):
654
605
if self ._get_residue_tuple (residue ) in targets :
655
- if (
656
- residue .get_resname () in THE20
657
- and residue .get_resname () != 'GLY'
658
- ):
606
+ if residue .get_resname () in THE20 and residue .get_resname () != 'GLY' :
659
607
name = self ._get_residue_tuple (residue )
660
608
print ("Working on residue:" , i , name , end = '\r ' )
661
609
self .reconstruct_residue (residue , refine_only )
0 commit comments