Post

CReM - Basic Example

This is an example of how to use the CReM framework to generate chemical structures using a fragment-based approach. We will demonstrate how to mutate, grow, and link molecules using the CReM framework.

CReM - Basic Example

CReM Notebook

To access the Jupyter Notebooks associated with this project, please navigate to the examples directory

In addition, apart from the two jupyter notebooks belonging to the original author of CReM, crem_example and crem_ml, all the other notebooks are created by @WyrdWyn4, and would need you to specify the address of the database you are using.

Global Variables and Helper Functions

The database can be downloaded here. I specifically used this database for the notebooks.

1
db = r"chembl22_sa25_hac12.db"

Drawer

Helper functions to draw molecules with highlighting atoms

1
2
3
4
5
6
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem.Draw import IPythonConsole
from IPython.display import SVG, Image
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
def drawsvg(mol, highlightAtoms=[], highlightReplacement=False, molSize=(400, 300), kekulize=True):
    mc = Chem.Mol(mol.ToBinary())
    if kekulize:
        try:
            Chem.Kekulize(mc)
        except:
            mc = Chem.Mol(mol.ToBinary())
    if not mc.GetNumConformers():
        AllChem.Compute2DCoords(mc)
    if highlightReplacement:
        ids = []
        for a in mol.GetAtoms():
            if 'react_atom_idx' not in a.GetPropsAsDict():
                ids.append(a.GetIdx())
        if ids:
            highlightAtoms = ids
    drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0], molSize[1])
    drawer.DrawMolecule(mc, highlightAtoms=highlightAtoms)
    drawer.FinishDrawing()
    svg = drawer.GetDrawingText()
    return SVG(svg.replace('svg:',''))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
def drawgrid(mols, highlight=0):
    """
    highlight: 0 - no highlight
               1 - highlight changed atoms
               2 - highlight unchanged atoms
    """
    if highlight == 0:
        return Draw.MolsToGridImage(mols[:8], molsPerRow=4, subImgSize=(300,200), useSVG=True)
    else:
        ids_list = []
        for m in mols:
            ids = []
            for a in m.GetAtoms():
                if 'react_atom_idx' not in a.GetPropsAsDict():
                    ids.append(a.GetIdx())
            if highlight == 1:
                ids_list.append(ids)
            elif highlight == 2:
                ids_list.append(list(set(range(m.GetNumAtoms())) - set(ids)))
        return Draw.MolsToGridImage(mols, molsPerRow=4, subImgSize=(300,200), 
                                    highlightAtomLists=ids_list, useSVG=True)

Structure Generation

Mutate Mols

mutate_mol

Generate new molecules by replacing fragments in the supplied molecule with fragments from the database.

1
2
3
4
5
from crem.crem import mutate_mol
from rdkit import Chem

m = Chem.MolFromSmiles('c1cc(OC)ccc1C')
mols = [Chem.MolFromSmiles(smiles) for smiles in mutate_mol(m, db_name=db, max_size=1)]
1
drawgrid(mols, highlight=1)

mutate_mol2:

Convenience function for parallel processing using multiprocessing.

1
2
3
4
5
6
7
8
9
10
from multiprocessing import Pool
from functools import partial
from crem.crem import mutate_mol2
from rdkit import Chem

p = Pool(2)
input_smi = ['c1ccccc1N', 'NCC(=O)OC', 'NCCCO']
input_mols = [Chem.MolFromSmiles(s) for s in input_smi]

res = list(p.imap(partial(mutate_mol2, db_name=db, max_size=1), input_mols))
1
drawgrid([Chem.MolFromSmiles(s) for s in input_smi] + [Chem.MolFromSmiles(s) for s in res[0]], highlight=1)

Grow Mols

grow_mol:

Replace hydrogens with fragments from the database.

1
2
3
4
5
from crem.crem import grow_mol
from rdkit import Chem

m = Chem.MolFromSmiles('c1cc(OC)ccc1C')
mols = [Chem.MolFromSmiles(smiles) for smiles in grow_mol(m, db_name=db)]
1
drawgrid(mols, highlight=1)

grow_mol2:

Convenience function for parallel processing using multiprocessing.

1
2
3
4
5
6
7
8
9
10
from multiprocessing import Pool
from functools import partial
from crem.crem import grow_mol2
from rdkit import Chem

p = Pool(2)
input_smi = ['c1ccccc1N', 'NCC(=O)OC', 'NCCCO']
input_mols = [Chem.MolFromSmiles(s) for s in input_smi]

res = list(p.imap(partial(grow_mol2, db_name=db), input_mols))
1
drawgrid([Chem.MolFromSmiles(s) for s in input_smi] + [Chem.MolFromSmiles(s) for s in res[0]], highlight=1)

Link two molecules by a linker from the database.

1
2
3
4
5
6
from crem.crem import link_mols
from rdkit import Chem

m1 = Chem.MolFromSmiles('c1cc(OC)ccc1C')
m2 = Chem.MolFromSmiles('NCC(=O)O')
mols = [Chem.MolFromSmiles(smile) for smile in link_mols(m1, m2, db_name=db)]
1
drawgrid(mols, highlight=1)

Convenience function for parallel processing using multiprocessing.

1
2
3
4
5
6
7
8
9
from multiprocessing import Pool
from functools import partial
from crem.crem import link_mols2
from rdkit import Chem

p = Pool(2)
input_mols = [(Chem.MolFromSmiles('c1cc(OC)ccc1C'), Chem.MolFromSmiles('NCC(=O)O'))]

res = p.starmap(partial(link_mols2, db_name=db), input_mols)
1
drawgrid([Chem.MolFromSmiles('c1cc(OC)ccc1C'), Chem.MolFromSmiles('NCC(=O)O')] + [Chem.MolFromSmiles(s) for s in res[0]], highlight=1)

Command Line Utilities

1
2
3
import os
os.chdir(r'C:\Users\walee\crem\example')
print(os.getcwd())

Fragmentation:

Fragment input compounds by cutting bonds matching bond SMARTS.

1
!python ../crem/fragmentation.py -i ../input-files/input.smi -o ../output-files/output_frag.txt -c 32 -v
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import rdMolDraw2D
from IPython.display import SVG, display

with open('../output-files/output_frag.txt') as f:
    smiles_list = f.readlines()[:8]

## Clean up the SMILES strings and convert to RDKit molecule objects
mols = []
for s in smiles_list:
    smiles = s.split(',')[0].strip()
    mol = Chem.MolFromSmiles(smiles)
    if mol:
        mols.append(mol)

drawgrid(mols, highlight=0)

Convert Fragments to Environment:

Create text file for fragment replacement from fragmented molecules.

1
!python -m crem.frag_to_env_mp -i ../input-files/frags.txt -o ../output-files/output_env.txt -r 3 -c 32 -v
1
drawgrid(mols, highlight=0)

Create SQLite DB:

Create SQLite DB from a text file containing environment fragments.

1
!python -m crem.import_env_to_db -i ../input-files/env_frags.txt -o ../output-files/output.db -r 3 -n 32 -v
1
drawgrid(mols, highlight=0)
This post is licensed under AGPL-3.0 by the author.