Skip to content

OpenBabel OBMol

lahuta.core.obmol

Generate an Open Babel molecule from an ARC object.

Generating an Open Babel molecule from an ARC object is done by creating a new OBMol object and calling the create_mol method with the ARC object as an argument. This is necessary because of the quite involved process of creating a molecule using the provided Open Babel API. It, unfortunately, does not support vectorized operations, so we have to create the molecule atom by atom. There is a performance penalty for this.

Classes:

Name Description
OBMol

A class for generating an Open Babel molecule from an ARC object.

Example
obmol = OBMol()
obmol.create_mol(arc)
obmol.mol

OBMol

A class for generating an Open Babel molecule from an ARC object.

Source code in lahuta/core/obmol.py
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
class OBMol:
    """A class for generating an Open Babel molecule from an ARC object."""

    def __init__(self) -> None:
        self.mol: Optional[MolType] = None

    def create_residue_obmol(
        self,
        resid: NDArray[np.int32],
        resname: NDArray[np.str_],
        chain_id: NDArray[np.int32],
    ) -> MolResType:
        """Create a new residue in the molecule.

        Args:
            resid (NDArray[np.int32]): The residue ID.
            resname (NDArray[np.str_]): The residue name.
            chain_id (NDArray[np.int32]): The chain ID.

        Returns:
            MolResType: A new residue instance.
        """
        assert self.mol is not None, "Molecule is not initialized"
        ob_res = self.mol.NewResidue()
        ob_res.SetChainNum(int(chain_id))
        ob_res.SetNum(str(resid))
        ob_res.SetName(str(resname))
        return ob_res

    def create_atom_obmol(
        self,
        idx: int,
        atom_name: str,
        atom_element: str,
        atom_pos: NDArray[np.float_],
        ob_residue: MolResType,
    ) -> None:
        """Create a new atom in the molecule.

        Args:
            idx (int): The index of the atom.
            atom_name (str): The name of the atom.
            atom_element (str): The type of atom to add.
            atom_pos (NDArray[np.float_]): The position of the atom.
            ob_residue (MolResType): The residue the atom belongs to.
        """
        assert self.mol is not None, "Molecule is not initialized"
        ob_atom: MolAtomType = self.mol.NewAtom(idx)
        ob_atom.SetAtomicNum(gemmi.Element(atom_element).atomic_number)
        ob_atom.SetType(atom_name)
        ob_atom.SetVector(float(atom_pos[0]), float(atom_pos[1]), float(atom_pos[2]))
        ob_atom.SetFormalCharge(0)  # atom.charge)

        self.add_atoms_to_residue(ob_atom, ob_residue)

    def create_bond_obmol(self, atom1_id: int, atom2_id: int) -> None:
        """Create a bond between two atoms in the molecule.

        Args:
            atom1_id (int): The ID of the first atom.
            atom2_id (int): The ID of the second atom.
        """
        assert self.mol is not None, "Molecule is not initialized"
        ob_atom1 = self.mol.GetAtomById(atom1_id)
        ob_atom2 = self.mol.GetAtomById(atom2_id)

        for neighbor in ob.OBAtomAtomIter(ob_atom1):
            if neighbor.GetId() == ob_atom2.GetId():
                continue

        ob_bond = self.mol.NewBond()
        ob_bond.SetBegin(ob_atom1)
        ob_bond.SetEnd(ob_atom2)

    def add_atoms_to_residue(self, ob_atom: MolAtomType, ob_res: MolResType) -> None:
        """Add an atom to a residue.

        Args:
            ob_atom (MolAtomType): The atom to add.
            ob_res (MolResType): The residue to add the atom to.
        """
        ob_res.AddAtom(ob_atom)
        ob_res.SetHetAtom(ob_atom, not gemmi.find_tabulated_residue(ob_res.GetName()).is_standard())  # type: ignore
        ob_res.SetSerialNum(ob_atom, ob_res.GetSerialNum(ob_atom))

    def perceive_bonds(self) -> None:
        """Identify all the bonds in the molecule."""
        if self.mol:
            self.mol.ConnectTheDots()
            log_level = ob.cvar.obErrorLog.GetOutputLevel()
            ob.cvar.obErrorLog.SetOutputLevel(0)

            self.mol.PerceiveBondOrders()
            ob.cvar.obErrorLog.SetOutputLevel(log_level)

    def perceive_properties(self) -> MolType | None:
        """Identify properties of the molecule.

        Returns:
            MolType | None: The molecule with its properties perceived.
        """
        if self.mol:
            self.mol.SetAromaticPerceived()
            self.mol.SetAtomTypesPerceived()
            self.mol.SetChiralityPerceived()
            self.mol.SetRingTypesPerceived()
            self.mol.SetPartialChargesPerceived()

        return self.mol

    def end_modify(self, nuke_perceived_data: bool = True) -> None:
        """End the modification of the molecule.

        Args:
            nuke_perceived_data (bool, optional): If True, perceived data is deleted. Default is True.
        """
        if self.mol:
            self.mol.EndModify(nuke_perceived_data)

    def create_mol(self, arc: ARC, connections: Optional[Any] = None) -> None:  # noqa: ANN401
        """Create a new molecule from an ARC (Atomic Record Collection) object.

        Args:
            arc (ARC): An object representing atomic data.
            connections (Optional[Any], optional): A list of connections between atoms. Default is None.
        """
        chains, residues, atoms = arc.chains, arc.residues, arc.atoms
        coords = atoms.coordinates

        atoms_df = pd.DataFrame(
            {
                "atom_name": arc.atoms.names,
                "chain_name": arc.chains.auths,
                "res_id": arc.residues.resids,
                "res_name": arc.residues.resnames,
            }
        )

        # use MolTypeWrapper to create a new molecule
        self.mol = MolTypeWrapper(ob.OBMol()).mol
        self.mol.BeginModify()

        ob_res = None
        added_residues: set[tuple[NDArray[Any], NDArray[Any], NDArray[Any]]] = set()
        for idx, (chain, residue, atom) in enumerate(zip(chains, residues, atoms, strict=True)):
            atom_id = int(atom["id"])
            atom_name, element = atom["name"], atom["element"]
            resname: NDArray[np.str_] = residue["resname"]
            resid: NDArray[np.int32] = residue["resid"]
            chain_id: NDArray[np.int32] = chain["id"]

            _cra_ = (chain_id, resid, resname)
            if ob_res is None or _cra_ not in added_residues:
                ob_res = self.create_residue_obmol(resid, resname, chain_id)
                added_residues.add(_cra_)

            self.create_atom_obmol(atom_id, str(atom_name), str(element), coords[idx], ob_res)

        self.perceive_bonds()

        if connections is None:
            connections = []
        assert connections is not None
        for connection in connections:
            prt1, prt2 = connection.partner1, connection.partner2
            atom1: int = self._get_atom_index(
                atoms_df,
                prt1.atom_name,
                prt1.chain_name,
                prt1.res_id.seqid.num,
                prt1.res_id.name,
            )
            atom2: int = self._get_atom_index(
                atoms_df,
                prt2.atom_name,
                prt2.chain_name,
                prt2.res_id.seqid.num,
                prt2.res_id.name,
            )

            self.create_bond_obmol(atom1, atom2)

        self.perceive_properties()

        self.end_modify(True)
        self.mol.SetChainsPerceived()

    def _get_atom_index(
        self,
        atoms_df: pd.DataFrame,
        atom_name: str,
        chain_name: str,
        res_id: int,
        res_name: str,
    ) -> int:
        """Get the index of an atom in a DataFrame.

        Args:
            atoms_df (pd.DataFrame): A DataFrame with atomic data.
            atom_name (str): The name of the atom.
            chain_name (str): The name of the chain.
            res_id (int): The residue ID.
            res_name (str): The name of the residue.

        Returns:
            int: The index of the atom in the DataFrame.
        """
        index = atoms_df[
            (atoms_df["atom_name"] == atom_name)
            & (atoms_df["chain_name"] == chain_name)
            & (atoms_df["res_id"] == res_id)
            & (atoms_df["res_name"] == res_name)
        ].index

        if not len(index):
            raise ValueError("Atom is not unique or does not exist")

        return int(index[0])

create_residue_obmol

create_residue_obmol(resid, resname, chain_id)

Create a new residue in the molecule.

Parameters:

Name Type Description Default
resid NDArray[int32]

The residue ID.

required
resname NDArray[str_]

The residue name.

required
chain_id NDArray[int32]

The chain ID.

required

Returns:

Name Type Description
MolResType MolResType

A new residue instance.

Source code in lahuta/core/obmol.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def create_residue_obmol(
    self,
    resid: NDArray[np.int32],
    resname: NDArray[np.str_],
    chain_id: NDArray[np.int32],
) -> MolResType:
    """Create a new residue in the molecule.

    Args:
        resid (NDArray[np.int32]): The residue ID.
        resname (NDArray[np.str_]): The residue name.
        chain_id (NDArray[np.int32]): The chain ID.

    Returns:
        MolResType: A new residue instance.
    """
    assert self.mol is not None, "Molecule is not initialized"
    ob_res = self.mol.NewResidue()
    ob_res.SetChainNum(int(chain_id))
    ob_res.SetNum(str(resid))
    ob_res.SetName(str(resname))
    return ob_res

create_atom_obmol

create_atom_obmol(idx, atom_name, atom_element, atom_pos, ob_residue)

Create a new atom in the molecule.

Parameters:

Name Type Description Default
idx int

The index of the atom.

required
atom_name str

The name of the atom.

required
atom_element str

The type of atom to add.

required
atom_pos NDArray[float_]

The position of the atom.

required
ob_residue MolResType

The residue the atom belongs to.

required
Source code in lahuta/core/obmol.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def create_atom_obmol(
    self,
    idx: int,
    atom_name: str,
    atom_element: str,
    atom_pos: NDArray[np.float_],
    ob_residue: MolResType,
) -> None:
    """Create a new atom in the molecule.

    Args:
        idx (int): The index of the atom.
        atom_name (str): The name of the atom.
        atom_element (str): The type of atom to add.
        atom_pos (NDArray[np.float_]): The position of the atom.
        ob_residue (MolResType): The residue the atom belongs to.
    """
    assert self.mol is not None, "Molecule is not initialized"
    ob_atom: MolAtomType = self.mol.NewAtom(idx)
    ob_atom.SetAtomicNum(gemmi.Element(atom_element).atomic_number)
    ob_atom.SetType(atom_name)
    ob_atom.SetVector(float(atom_pos[0]), float(atom_pos[1]), float(atom_pos[2]))
    ob_atom.SetFormalCharge(0)  # atom.charge)

    self.add_atoms_to_residue(ob_atom, ob_residue)

create_bond_obmol

create_bond_obmol(atom1_id, atom2_id)

Create a bond between two atoms in the molecule.

Parameters:

Name Type Description Default
atom1_id int

The ID of the first atom.

required
atom2_id int

The ID of the second atom.

required
Source code in lahuta/core/obmol.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def create_bond_obmol(self, atom1_id: int, atom2_id: int) -> None:
    """Create a bond between two atoms in the molecule.

    Args:
        atom1_id (int): The ID of the first atom.
        atom2_id (int): The ID of the second atom.
    """
    assert self.mol is not None, "Molecule is not initialized"
    ob_atom1 = self.mol.GetAtomById(atom1_id)
    ob_atom2 = self.mol.GetAtomById(atom2_id)

    for neighbor in ob.OBAtomAtomIter(ob_atom1):
        if neighbor.GetId() == ob_atom2.GetId():
            continue

    ob_bond = self.mol.NewBond()
    ob_bond.SetBegin(ob_atom1)
    ob_bond.SetEnd(ob_atom2)

add_atoms_to_residue

add_atoms_to_residue(ob_atom, ob_res)

Add an atom to a residue.

Parameters:

Name Type Description Default
ob_atom MolAtomType

The atom to add.

required
ob_res MolResType

The residue to add the atom to.

required
Source code in lahuta/core/obmol.py
108
109
110
111
112
113
114
115
116
117
def add_atoms_to_residue(self, ob_atom: MolAtomType, ob_res: MolResType) -> None:
    """Add an atom to a residue.

    Args:
        ob_atom (MolAtomType): The atom to add.
        ob_res (MolResType): The residue to add the atom to.
    """
    ob_res.AddAtom(ob_atom)
    ob_res.SetHetAtom(ob_atom, not gemmi.find_tabulated_residue(ob_res.GetName()).is_standard())  # type: ignore
    ob_res.SetSerialNum(ob_atom, ob_res.GetSerialNum(ob_atom))

perceive_bonds

perceive_bonds()

Identify all the bonds in the molecule.

Source code in lahuta/core/obmol.py
119
120
121
122
123
124
125
126
127
def perceive_bonds(self) -> None:
    """Identify all the bonds in the molecule."""
    if self.mol:
        self.mol.ConnectTheDots()
        log_level = ob.cvar.obErrorLog.GetOutputLevel()
        ob.cvar.obErrorLog.SetOutputLevel(0)

        self.mol.PerceiveBondOrders()
        ob.cvar.obErrorLog.SetOutputLevel(log_level)

perceive_properties

perceive_properties()

Identify properties of the molecule.

Returns:

Type Description
MolType | None

MolType | None: The molecule with its properties perceived.

Source code in lahuta/core/obmol.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
def perceive_properties(self) -> MolType | None:
    """Identify properties of the molecule.

    Returns:
        MolType | None: The molecule with its properties perceived.
    """
    if self.mol:
        self.mol.SetAromaticPerceived()
        self.mol.SetAtomTypesPerceived()
        self.mol.SetChiralityPerceived()
        self.mol.SetRingTypesPerceived()
        self.mol.SetPartialChargesPerceived()

    return self.mol

end_modify

end_modify(nuke_perceived_data=True)

End the modification of the molecule.

Parameters:

Name Type Description Default
nuke_perceived_data bool

If True, perceived data is deleted. Default is True.

True
Source code in lahuta/core/obmol.py
144
145
146
147
148
149
150
151
def end_modify(self, nuke_perceived_data: bool = True) -> None:
    """End the modification of the molecule.

    Args:
        nuke_perceived_data (bool, optional): If True, perceived data is deleted. Default is True.
    """
    if self.mol:
        self.mol.EndModify(nuke_perceived_data)

create_mol

create_mol(arc, connections=None)

Create a new molecule from an ARC (Atomic Record Collection) object.

Parameters:

Name Type Description Default
arc ARC

An object representing atomic data.

required
connections Optional[Any]

A list of connections between atoms. Default is None.

None
Source code in lahuta/core/obmol.py
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
def create_mol(self, arc: ARC, connections: Optional[Any] = None) -> None:  # noqa: ANN401
    """Create a new molecule from an ARC (Atomic Record Collection) object.

    Args:
        arc (ARC): An object representing atomic data.
        connections (Optional[Any], optional): A list of connections between atoms. Default is None.
    """
    chains, residues, atoms = arc.chains, arc.residues, arc.atoms
    coords = atoms.coordinates

    atoms_df = pd.DataFrame(
        {
            "atom_name": arc.atoms.names,
            "chain_name": arc.chains.auths,
            "res_id": arc.residues.resids,
            "res_name": arc.residues.resnames,
        }
    )

    # use MolTypeWrapper to create a new molecule
    self.mol = MolTypeWrapper(ob.OBMol()).mol
    self.mol.BeginModify()

    ob_res = None
    added_residues: set[tuple[NDArray[Any], NDArray[Any], NDArray[Any]]] = set()
    for idx, (chain, residue, atom) in enumerate(zip(chains, residues, atoms, strict=True)):
        atom_id = int(atom["id"])
        atom_name, element = atom["name"], atom["element"]
        resname: NDArray[np.str_] = residue["resname"]
        resid: NDArray[np.int32] = residue["resid"]
        chain_id: NDArray[np.int32] = chain["id"]

        _cra_ = (chain_id, resid, resname)
        if ob_res is None or _cra_ not in added_residues:
            ob_res = self.create_residue_obmol(resid, resname, chain_id)
            added_residues.add(_cra_)

        self.create_atom_obmol(atom_id, str(atom_name), str(element), coords[idx], ob_res)

    self.perceive_bonds()

    if connections is None:
        connections = []
    assert connections is not None
    for connection in connections:
        prt1, prt2 = connection.partner1, connection.partner2
        atom1: int = self._get_atom_index(
            atoms_df,
            prt1.atom_name,
            prt1.chain_name,
            prt1.res_id.seqid.num,
            prt1.res_id.name,
        )
        atom2: int = self._get_atom_index(
            atoms_df,
            prt2.atom_name,
            prt2.chain_name,
            prt2.res_id.seqid.num,
            prt2.res_id.name,
        )

        self.create_bond_obmol(atom1, atom2)

    self.perceive_properties()

    self.end_modify(True)
    self.mol.SetChainsPerceived()