Predicting Regression - Reaction

Contents

Predicting Regression - Reaction#

Import packages#

[1]:

import pandas as pd
import numpy as np
import torch
from lightning import pytorch as pl
from pathlib import Path

from chemprop import data, featurizers, models

Change model input here#

[2]:

chemprop_dir = Path.cwd().parent
checkpoint_path = chemprop_dir / "tests" / "data" / "example_model_v2_regression_rxn.ckpt" # path to the checkpoint file.
# If the checkpoint file is generated using the training notebook, it will be in the `checkpoints` folder with name similar to `checkpoints/epoch=19-step=180.ckpt`.

Load model#

[3]:

mpnn = models.MPNN.load_from_checkpoint(checkpoint_path)
mpnn

/home/hwpang/miniforge3/envs/chemprop_v2_dev/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:199: Attribute 'graph_transform' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['graph_transform'])`.
/home/hwpang/miniforge3/envs/chemprop_v2_dev/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:199: Attribute 'output_transform' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['output_transform'])`.

[3]:

MPNN(
  (message_passing): BondMessagePassing(
    (W_i): Linear(in_features=134, out_features=300, bias=False)
    (W_h): Linear(in_features=300, out_features=300, bias=False)
    (W_o): Linear(in_features=406, out_features=300, bias=True)
    (W_d): Linear(in_features=300, out_features=300, bias=True)
    (dropout): Dropout(p=0.0, inplace=False)
    (tau): ReLU()
    (V_d_transform): Identity()
    (graph_transform): GraphTransform(
      (V_transform): Identity()
      (E_transform): Identity()
    )
  )
  (agg): MeanAggregation()
  (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (predictor): RegressionFFN(
    (ffn): MLP(
      (0): Sequential(
        (0): Linear(in_features=300, out_features=300, bias=True)
      )
      (1): Sequential(
        (0): ReLU()
        (1): Dropout(p=0.0, inplace=False)
        (2): Linear(in_features=300, out_features=1, bias=True)
      )
    )
    (criterion): MSELoss()
    (output_transform): UnscaleTransform()
  )
  (X_d_transform): Identity()
)

Change predict input here#

[4]:

chemprop_dir = Path.cwd().parent
test_path = chemprop_dir / "tests" / "data" / "regression" / "rxn" / "rxn.csv"
smiles_column = 'smiles'

Load smiles#

[5]:

df_test = pd.read_csv(test_path)

smis = df_test.loc[:, smiles_column].values
smis[:5]

[5]:

array(['[O:1]([C:2]([C:3]([C:4](=[O:5])[C:6]([O:7][H:15])([H:13])[H:14])([H:11])[H:12])([H:9])[H:10])[H:8]>>[C:3](=[C:4]=[O:5])([H:11])[H:12].[C:6]([O:7][H:15])([H:8])([H:13])[H:14].[O:1]=[C:2]([H:9])[H:10]',
       '[C:1]1([H:8])([H:9])[O:2][C@@:3]2([H:10])[C@@:4]3([H:11])[O:5][C@:6]1([H:12])[C@@:7]23[H:13]>>[C:1]1([H:8])([H:9])[O:2][C:3]([H:10])=[C:7]([H:13])[C@:6]1([O+:5]=[C-:4][H:11])[H:12]',
       '[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])=[C:5]([H:15])[C:6]([H:16])=[C:7]1[H:17])([H:8])([H:9])[H:10]',
       '[C:1]([O:2][C:3]([C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6]([O:7][H:19])([H:17])[H:18])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C-:1]([O+:2]=[C:3]([C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6]([O:7][H:19])([H:17])[H:18])[H:13])[H:12])([H:8])[H:10].[H:9][H:11]',
       '[C:1]([C:2]#[C:3][C:4]([C:5](=[O:6])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[C:3]=[C:4]([H:10])[H:11])[C:5](=[O:6])[H:12])([H:7])([H:8])[H:9]'],
      dtype=object)

Load datapoints#

[6]:

test_data = [data.ReactionDatapoint.from_smi(smi) for smi in smis]

Define featurizer#

[7]:

featurizer = featurizers.CondensedGraphOfReactionFeaturizer(mode_="PROD_DIFF")
# Testing parameters should match training parameters

Get dataset and dataloader#

[8]:

test_dset = data.ReactionDataset(test_data, featurizer=featurizer)
test_loader = data.build_dataloader(test_dset, shuffle=False)

Perform tests#

[9]:

with torch.inference_mode():
    trainer = pl.Trainer(
        logger=None,
        enable_progress_bar=True,
        accelerator="cpu",
        devices=1
    )
    test_preds = trainer.predict(mpnn, test_loader)

/home/hwpang/miniforge3/envs/chemprop_v2_dev/lib/python3.11/site-packages/lightning/fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/hwpang/miniforge3/envs/chemprop_v2_dev/lib/pyt ...
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/hwpang/miniforge3/envs/chemprop_v2_dev/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
/home/hwpang/miniforge3/envs/chemprop_v2_dev/lib/python3.11/site-packages/lightning/fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/hwpang/miniforge3/envs/chemprop_v2_dev/lib/pyt ...
/home/hwpang/miniforge3/envs/chemprop_v2_dev/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=63` in the `DataLoader` to improve performance.

Predicting DataLoader 0: 100%|██████████| 100/100 [00:00<00:00, 613.08it/s]

[11]:

test_preds = np.concatenate(test_preds, axis=0)
df_test['preds'] = test_preds
df_test

[11]:

	smiles	ea	preds
0	[O:1]([C:2]([C:3]([C:4](=[O:5])[C:6]([O:7][H:1...	8.898934	8.010366
1	[C:1]1([H:8])([H:9])[O:2][C@@:3]2([H:10])[C@@:...	5.464328	8.075241
2	[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H...	5.270552	8.069977
3	[C:1]([O:2][C:3]([C@@:4]([C:5]([H:14])([H:15])...	8.473006	8.023890
4	[C:1]([C:2]#[C:3][C:4]([C:5](=[O:6])[H:12])([H...	5.579037	8.040219
...	...	...	...
95	[C:1]([C:2]([C:3]([H:12])([H:13])[H:14])([C:4]...	9.295665	8.025584
96	[O:1]=[C:2]([C@@:3]1([H:9])[C:4]([H:10])([H:11...	7.753442	8.039022
97	[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H...	10.650215	8.082537
98	[C:1]1([H:8])([H:9])[C@@:2]2([H:10])[N:3]1[C:4...	10.138945	8.170304
99	[C:1]([C@@:2]1([C:3]([C:4]([O:5][H:15])([H:13]...	6.979934	8.068456

100 rows × 3 columns

[ ]: