Source code for insilico.filters

from rdkit import Chem
from typing import (Dict, List)
import pandas as pd


[docs]def apply_filter( filters: Dict, molecules: pd.DataFrame, dependencies: Dict=None) -> pd.DataFrame: """ Apply a different set of `filters` to a molecular set. :param dict filters: Set of predicates to filter the molecules :param molecules: Pandas DataFrame containing the properties :param dict dependencies: Current task parent :returns: Pandas Dataframe """ keywords = ['functional_groups'] for key in keywords: if key in filters: molecules = filter_by_functional_group(molecules, filters[key]) return molecules
def filter_by_functional_group(molecules: pd.DataFrame, functional_groups: List) -> pd.DataFrame: """ Search for a set of functional_groups """ # Transform molecules to rkdit molecules mols = molecules.smiles.apply(lambda x: Chem.MolFromSmiles(x)) patterns = [Chem.MolFromSmiles(f) for f in functional_groups['smiles']] # Check if the functional_groups are in the molecules molecules['functional_groups'] = mols.apply( lambda m: any(m.HasSubstructMatch(p) for p in patterns)) return molecules