Provides utility functions for assigning atom types to each atom in a molecule.
Atom types are defined in the SmartsPatternRegistry
.
Warning
As of the current version of the library, these functions are not used in the main code.
They are kept for comparison and testing with earlier versions of the library.
assign_atom_types
assign_atom_types(mol, atomgroup)
Assign atom types to each atom in the molecule. Atom types are defined in SmartsPatternRegistry
.
Parameters:
Name |
Type |
Description |
Default |
mol |
MolType
|
The molecule for which to assign atom types.
|
required
|
atomgroup |
AtomGroupType
|
The atomgroup for which to assign atom types.
|
required
|
Returns:
Type |
Description |
NDArray[int8]
|
NDArray[np.int8]: An array of shape (n_atoms, n_atom_types) where each element is either 0 or 1.
|
Source code in lahuta/utils/atom_types.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64 | def assign_atom_types(mol: MolType, atomgroup: AtomGroupType) -> NDArray[np.int8]:
"""Assign atom types to each atom in the molecule. Atom types are defined in `SmartsPatternRegistry`.
Args:
mol (MolType): The molecule for which to assign atom types.
atomgroup (AtomGroupType): The atomgroup for which to assign atom types.
Returns:
NDArray[np.int8]: An array of shape (n_atoms, n_atom_types) where each element is either 0 or 1.
"""
atypes = AVAILABLE_ATOM_TYPES
atypes_array = np.zeros((mol.NumAtoms(), len(atypes)), dtype=np.int8)
for atom_type in SmartsPatternRegistry:
smartsdict = SmartsPatternRegistry[atom_type.name].value
for smarts in smartsdict.values():
ob_smart: ObSmartPatternType = OBSmartsPatternWrapper(ob.OBSmartsPattern())
ob_smart.Init(str(smarts))
ob_smart.Match(mol)
matches = [x[0] for x in ob_smart.GetMapList()]
for match in matches:
atom = mol.GetAtom(match)
atypes_array[atom.GetId(), atypes[atom_type.name]] = 1
# ALL WATER MOLECULES ARE HYDROGEN BOND DONORS AND ACCEPTORS
for atom in atomgroup.select_atoms("resname SOL HOH TIP3 TIP4 WAT W and not name H*"):
atypes_array[atom.index, atypes["hbond_acceptor"]] = 1
atypes_array[atom.index, atypes["hbond_donor"]] = 1
# OVERRIDE PROTEIN ATOM TYPING FROM DICTIONARY
for residue in atomgroup.select_atoms("resname " + " ".join(STANDARD_AMINO_ACIDS)).residues:
for atom in residue.atoms:
# REMOVE TYPES IF ALREADY ASSIGNED FROM SMARTS
for prot_atype in list(PROT_ATOM_TYPES.keys()):
atypes_array[atom.index, atypes[prot_atype]] = 0
# ADD ATOM TYPES FROM DICTIONARY
for prot_atype, atom_ids in PROT_ATOM_TYPES.items():
atom_id = residue.resname.strip() + atom.name.strip()
if atom_id in atom_ids:
atypes_array[atom.index, atypes[prot_atype]] = 1
return atypes_array
|
vec_assign_atom_types
vec_assign_atom_types(mol, atomgroup, ta)
Assign atom types to each atom in the molecule. Atom types are defined in SmartsPatternRegistry
.
Parameters:
Name |
Type |
Description |
Default |
mol |
MolType
|
The molecule for which to assign atom types.
|
required
|
atomgroup |
AtomGroupType
|
The atomgroup for which to assign atom types.
|
required
|
ta |
dict[str, NDArray[str_]]
|
A dictionary containing the atom names and residue names.
|
required
|
Returns:
Type |
Description |
NDArray[int8]
|
NDArray[np.int8]: An array of shape (n_atoms, n_atom_types) where each element is either 0 or 1.
|
Source code in lahuta/utils/atom_types.py
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133 | def vec_assign_atom_types(
mol: MolType,
atomgroup: AtomGroupType,
ta: dict[str, NDArray[np.str_]],
) -> NDArray[np.int8]:
"""Assign atom types to each atom in the molecule. Atom types are defined in `SmartsPatternRegistry`.
Args:
mol (MolType): The molecule for which to assign atom types.
atomgroup (AtomGroupType): The atomgroup for which to assign atom types.
ta (dict[str, NDArray[np.str_]]): A dictionary containing the atom names and residue names.
Returns:
NDArray[np.int8]: An array of shape (n_atoms, n_atom_types) where each element is either 0 or 1.
"""
atypes = {x: i for i, x in enumerate(list(PROT_ATOM_TYPES.keys()))}
atypes_array = np.zeros((mol.NumAtoms(), len(atypes)), dtype=np.int8)
for atom_type in SmartsPatternRegistry:
smartsdict = SmartsPatternRegistry[atom_type.name].value
for smarts in smartsdict.values():
ob_smart: ObSmartPatternType = OBSmartsPatternWrapper(ob.OBSmartsPattern())
ob_smart.Init(str(smarts))
ob_smart.Match(mol)
matches = [x[0] for x in ob_smart.GetMapList()]
for match in matches:
atom = mol.GetAtom(match)
if atom.GetResidue().GetName() not in STANDARD_AMINO_ACIDS:
atypes_array[atom.GetId(), atypes[atom_type.name]] = 1
# ALL WATER MOLECULES ARE HYDROGEN BOND DONORS AND ACCEPTORS
for atom in atomgroup.select_atoms("resname SOL HOH TIP3 TIP4 WAT W and not name H*"):
atypes_array[atom.index, atypes["hbond_acceptor"]] = 1
atypes_array[atom.index, atypes["hbond_donor"]] = 1
# OVERRIDE PROTEIN ATOM TYPING FROM DICTIONARY
resname, atom_name = ta["resname"], ta["name"]
# Convert atoms to NumPy arrays for efficient indexing
ag = atomgroup.select_atoms("resname " + " ".join(STANDARD_AMINO_ACIDS)).atoms
resindices = np.array([atom.resindex for atom in atomgroup])
indices = np.array([atom.index for atom in atomgroup])
# Convert arrays to string type
resname_str = resname[resindices].astype(str)
atom_name_str = atom_name[indices].astype(str)
# Generate atom_id array by concatenating resname and atom_name arrays
atom_ids: NDArray[np.str_] = np.core.defchararray.add(
np.core.defchararray.strip(resname_str),
np.core.defchararray.strip(atom_name_str),
)
for idx, atom in enumerate(ag):
atom_id = atom_ids[idx]
atom_types = ID_TO_TYPES.get(atom_id, None)
if atom_types is None:
continue
for atom_type_x in atom_types:
atypes_array[atom.index, atypes[atom_type_x]] = 1
return atypes_array
|