Source code for chemprop.utils.utils
from __future__ import annotations
from enum import StrEnum
import os
from typing import Any, Callable, Iterable, Iterator, Type
import multiprocess
import numpy as np
import psutil
from rdkit import Chem
[docs]
class EnumMapping(StrEnum):
[docs]
@classmethod
def get(cls, name: str | EnumMapping) -> EnumMapping:
if isinstance(name, cls):
return name
try:
return cls[name.upper()]
except KeyError:
raise KeyError(
f"Unsupported {cls.__name__} member! got: '{name}'. expected one of: {', '.join(cls.keys())}"
)
[docs]
@classmethod
def keys(cls) -> Iterator[str]:
return (e.name for e in cls)
[docs]
@classmethod
def values(cls) -> Iterator[str]:
return (e.value for e in cls)
[docs]
@classmethod
def items(cls) -> Iterator[tuple[str, str]]:
return zip(cls.keys(), cls.values())
[docs]
def make_mol(
smi: str,
keep_h: bool = False,
add_h: bool = False,
ignore_stereo: bool = False,
reorder_atoms: bool = False,
) -> Chem.Mol:
"""build an RDKit molecule from a SMILES string.
Parameters
----------
smi : str
a SMILES string.
keep_h : bool, optional
whether to keep hydrogens in the input smiles. This does not add hydrogens, it only keeps
them if they are specified. Default is False.
add_h : bool, optional
whether to add hydrogens to the molecule. Default is False.
ignore_stereo : bool, optional
whether to ignore stereochemical information (R/S and Cis/Trans) when constructing the molecule. Default is False.
reorder_atoms : bool, optional
whether to reorder the atoms in the molecule by their atom map numbers. This is useful when
the order of atoms in the SMILES string does not match the atom mapping, e.g. '[F:2][Cl:1]'.
Default is False. NOTE: This does not reorder the bonds.
Returns
-------
Chem.Mol
the RDKit molecule.
"""
params = Chem.SmilesParserParams()
params.removeHs = not keep_h
mol = Chem.MolFromSmiles(smi, params)
if mol is None:
raise RuntimeError(f"SMILES {smi} is invalid! (RDKit returned None)")
if add_h:
mol = Chem.AddHs(mol)
if ignore_stereo:
for atom in mol.GetAtoms():
atom.SetChiralTag(Chem.ChiralType.CHI_UNSPECIFIED)
for bond in mol.GetBonds():
bond.SetStereo(Chem.BondStereo.STEREONONE)
if reorder_atoms:
atom_map_numbers = tuple(atom.GetAtomMapNum() for atom in mol.GetAtoms())
new_order = np.argsort(atom_map_numbers).tolist()
mol = Chem.rdmolops.RenumberAtoms(mol, new_order)
return mol
[docs]
def create_and_call_object(
cls: Type,
call_args: tuple = (),
call_kwargs: dict = None,
init_args: tuple = (),
init_kwargs: dict = None,
) -> Any:
"""
Instantiate a class with optional init args, then call the instance with args.
This is useful for parallel calls to methods that contain boost functions.
"""
if call_kwargs is None:
call_kwargs = {}
if init_kwargs is None:
init_kwargs = {}
return cls(*init_args, **init_kwargs)(*call_args, **call_kwargs)
[docs]
def parallel_execute(
exe_func: Callable,
func_args: Iterable[tuple] = (),
func_kwargs: Iterable[dict] = (),
n_workers: int = 0,
) -> list:
"""Optionally executes a function in parallel.
Parameters
----------
exe_func : Callable
function to execute.
func_args : Iterable
arguments for each iteration of function execution.
func_kwargs : Iterable
keyword arguments for each iteration of function execution.
n_workers : int, optional
Number of parallel workers.
Returns
-------
list
list of function outputs for each argument.
"""
func_args = list(func_args)
func_kwargs = list(func_kwargs)
if not func_kwargs:
func_kwargs = [{}] * len(func_args)
if not func_args:
func_args = [()] * len(func_kwargs)
combined = list(zip(func_args, func_kwargs))
if n_workers >= 2:
def wrapped_call(args, kwargs):
return exe_func(*args, **kwargs)
with multiprocess.Pool(n_workers) as p:
results = p.starmap(wrapped_call, combined)
else:
results = [exe_func(*func_arg, **func_kwargs) for (func_arg, func_kwargs) in combined]
return results
[docs]
def pretty_shape(shape: Iterable[int]) -> str:
"""Make a pretty string from an input shape
Example
--------
>>> X = np.random.rand(10, 4)
>>> X.shape
(10, 4)
>>> pretty_shape(X.shape)
'10 x 4'
"""
return " x ".join(map(str, shape))
[docs]
def get_memory_usage():
# Get the current process
process = psutil.Process(os.getpid())
# Get memory info in bytes
memory_info = process.memory_info()
# Convert to MB for readability
memory_mb = memory_info.rss / 1024 / 1024
return f"Memory usage: {memory_mb:.2f} MB"
[docs]
def is_cuikmolmaker_available():
try:
import cuik_molmaker # noqa: F401
return True
except ImportError:
return False