Source code for chemprop.featurizers.bond
from typing import Sequence
import numpy as np
from rdkit.Chem.rdchem import Bond, BondType
from chemprop.featurizers.base import VectorFeaturizer
[docs]
class MultiHotBondFeaturizer(VectorFeaturizer[Bond]):
"""A :class:`MultiHotBondFeaturizer` feauturizes bonds based on the following attributes:
* ``null``-ity (i.e., is the bond ``None``?)
* bond type
* conjugated?
* in ring?
* stereochemistry
The feature vectors produced by this featurizer have the following (general) signature:
+---------------------+-----------------+--------------+
| slice [start, stop) | subfeature | unknown pad? |
+=====================+=================+==============+
| 0-1 | null? | N |
+---------------------+-----------------+--------------+
| 1-5 | bond type | N |
+---------------------+-----------------+--------------+
| 5-6 | conjugated? | N |
+---------------------+-----------------+--------------+
| 6-8 | in ring? | N |
+---------------------+-----------------+--------------+
| 7-14 | stereochemistry | Y |
+---------------------+-----------------+--------------+
**NOTE**: the above signature only applies for the default arguments, as the bond type and
sterochemistry slices can increase in size depending on the input arguments.
Parameters
----------
bond_types : Sequence[BondType] | None, default=[SINGLE, DOUBLE, TRIPLE, AROMATIC]
the known bond types
stereos : Sequence[int] | None, default=[0, 1, 2, 3, 4, 5]
the known bond stereochemistries. See [1]_ for more details
References
----------
.. [1] https://www.rdkit.org/docs/source/rdkit.Chem.rdchem.html#rdkit.Chem.rdchem.BondStereo.values
"""
def __init__(
self, bond_types: Sequence[BondType] | None = None, stereos: Sequence[int] | None = None
):
self.bond_types = bond_types or [
BondType.SINGLE,
BondType.DOUBLE,
BondType.TRIPLE,
BondType.AROMATIC,
]
self.stereo = stereos or range(6)
[docs]
def __len__(self):
return 1 + len(self.bond_types) + 2 + (len(self.stereo) + 1)
[docs]
def __call__(self, b: Bond) -> np.ndarray:
x = np.zeros(len(self), int)
if b is None:
x[0] = 1
return x
i = 1
bond_type = b.GetBondType()
bt_bit, size = self.one_hot_index(bond_type, self.bond_types)
if bt_bit != size:
x[i + bt_bit] = 1
i += size - 1
x[i] = int(b.GetIsConjugated())
x[i + 1] = int(b.IsInRing())
i += 2
stereo_bit, _ = self.one_hot_index(int(b.GetStereo()), self.stereo)
x[i + stereo_bit] = 1
return x
[docs]
@classmethod
def one_hot_index(cls, x, xs: Sequence) -> tuple[int, int]:
"""Returns a tuple of the index of ``x`` in ``xs`` and ``len(xs) + 1`` if ``x`` is in ``xs``.
Otherwise, returns a tuple with ``len(xs)`` and ``len(xs) + 1``."""
n = len(xs)
return xs.index(x) if x in xs else n, n + 1
[docs]
class RIGRBondFeaturizer(VectorFeaturizer[Bond]):
"""A :class:`RIGRBondFeaturizer` featurizes bonds using resonance-invariant features [1]_.
The generated bond features include:
* ``null``-ity (i.e., whether the bond is ``None``)
* in ring?
References
-----------
.. [1] Zalte, A. S.; Pang, H.-W.; Doner, A. C.; Green, W. H.
"RIGR: Resonance-Invariant Graph Representation for Molecular Property Prediction."
J. Chem. Inf. Model. 2025, 65 (20), 10832–10843.
https://doi.org/10.1021/acs.jcim.5c00495
"""
[docs]
def __len__(self):
return 2
[docs]
def __call__(self, b: Bond) -> np.ndarray:
x = np.zeros(len(self), int)
if b is None:
x[0] = 1
return x
x[1] = int(b.IsInRing())
return x
[docs]
@classmethod
def one_hot_index(cls, x, xs: Sequence) -> tuple[int, int]:
"""Returns a tuple of the index of ``x`` in ``xs`` and ``len(xs) + 1`` if ``x`` is in ``xs``.
Otherwise, returns a tuple with ``len(xs)`` and ``len(xs) + 1``."""
n = len(xs)
return xs.index(x) if x in xs else n, n + 1