Source code for molreps.graph

"""
Main graph generator for making molecular graphs.

It uses networkx as graph interface and a mol object from rdkit, ase, pymatgen or similar.
"""

# Necessary
import networkx as nx
import numpy as np
from molreps.methods.geo_npy import add_edges_reverse_indices
# Rdkit
try:
    import rdkit
    import rdkit.Chem.Descriptors
    import rdkit.Chem.AllChem

    MOLGRAPH_RDKIT_AVAILABLE = True
    from molreps.methods.mol_rdkit import rdkit_atom_list, rdkit_bond_list, rdkit_bond_distance_list
    from molreps.methods.mol_rdkit import rdkit_mol_from_atoms_bonds, rdkit_add_conformer
except ModuleNotFoundError:
    print("Warning: Rdkit not found for mol class.")
    MOLGRAPH_RDKIT_AVAILABLE = False

# openbabel
try:
    from openbabel import openbabel

    MOLGRAPH_OPENBABEL_AVAILABLE = True
    from molreps.methods.mol_pybel import ob_get_bond_table_from_coordinates
except ModuleNotFoundError:
    print("Warning: Openbabel not found for mol class.")
    MOLGRAPH_OPENBABEL_AVAILABLE = False

if MOLGRAPH_RDKIT_AVAILABLE:

[docs]    def rdkit_get_property_atoms(mol, key, prop, **kwargs):

        atom_fun_dict = {
            "AtomicNum": rdkit.Chem.rdchem.Atom.GetAtomicNum,
            "Symbol": rdkit.Chem.rdchem.Atom.GetSymbol,
            "NumExplicitHs": rdkit.Chem.rdchem.Atom.GetNumExplicitHs,
            "NumImplicitHs": rdkit.Chem.rdchem.Atom.GetNumImplicitHs,
            "IsAromatic": rdkit.Chem.rdchem.Atom.GetIsAromatic,
            "TotalDegree": rdkit.Chem.rdchem.Atom.GetTotalDegree,
            "TotalValence": rdkit.Chem.rdchem.Atom.GetTotalValence,
            "Mass": rdkit.Chem.rdchem.Atom.GetMass,
            "IsInRing": rdkit.Chem.rdchem.Atom.IsInRing,
            "Hybridization": rdkit.Chem.rdchem.Atom.GetHybridization,
            "ChiralTag": rdkit.Chem.rdchem.Atom.GetChiralTag,
            "FormalCharge": rdkit.Chem.rdchem.Atom.GetFormalCharge,
            "ImplicitValence": rdkit.Chem.rdchem.Atom.GetImplicitValence,
            "NumRadicalElectrons": rdkit.Chem.rdchem.Atom.GetNumRadicalElectrons,
        }
        if prop in atom_fun_dict:
            return rdkit_atom_list(mol, key, atom_fun_dict[prop])
        else:
            raise NotImplementedError("Property", prop, "is not predefined, use custom function.")


[docs]    def rdkit_get_property_bonds(mol, key, prop, **kwargs):

        bond_fun_dict = {
            "BondType": rdkit.Chem.rdchem.Bond.GetBondType,
            "IsAromatic": rdkit.Chem.rdchem.Bond.GetIsAromatic,
            "IsConjugated": rdkit.Chem.rdchem.Bond.GetIsConjugated,
            "IsInRing": rdkit.Chem.rdchem.Bond.IsInRing,
            "Stereo": rdkit.Chem.rdchem.Bond.GetStereo
        }
        if prop in bond_fun_dict:
            return rdkit_bond_list(mol, key, bond_fun_dict[prop])
        elif prop == "Distance":
            return rdkit_bond_distance_list(mol, key, **kwargs)
        else:
            raise NotImplementedError("Property", prop, "is not predefined, use custom function.")


[docs]    def rdkit_get_property_molstate(mol, key, prop, **kwargs):
        state_fun_dict = {
            "ExactMolWt": rdkit.Chem.Descriptors.ExactMolWt
        }
        if prop in state_fun_dict:
            return {key: state_fun_dict[prop](mol)}
        elif prop == "NumAtoms":
            return {key: mol.GetNumAtoms()}
        else:
            raise NotImplementedError("Property", prop, "is not predefined, use custom function.")


# Main class to make graph
[docs]class MolGraph(nx.Graph):
    """Molecular Graph which inherits from networkx graph."""

    _mols_implemented = {'rdkit': {
        'nodes': ["AtomicNum", "Symbol", "NumExplicitHs","NumImplicitHs","IsAromatic","TotalDegree",
            "TotalValence","Mass", "IsInRing","Hybridization", "ChiralTag", "FormalCharge",
            "ImplicitValence", "NumRadicalElectrons"],
        'edges': ["BondType","IsAromatic","IsConjugated","IsInRing","Stereo","Distance"],
        'state': ["NumAtoms", "ExactMolWt"]}
    }

    def __init__(self, mol=None, **kwargs):
        super(MolGraph, self).__init__(**kwargs)

        self.mol = mol
        # State Variable
        self._graph_state = {}
        self.mol_type = None
        if isinstance(mol, rdkit.Chem.Mol):
            self.mol_type = "rdkit"

    # Check for identifier
    def _make_edges(self, key, propy, **args):
        if self.mol_type == "rdkit":
            self.add_edges_from(rdkit_get_property_bonds(self.mol, key=key, prop=propy, **args))
        else:
            raise ValueError("Property identifier is not implemented for mol type", self.mol_type)

    def _make_nodes(self, key, propy, **args):
        if self.mol_type == "rdkit":
            self.add_nodes_from(rdkit_get_property_atoms(self.mol, key=key, prop=propy, **args))
        else:
            raise ValueError("Property identifier is not implemented for mol type", self.mol_type)

    def _make_state(self, key, propy, **args):
        if self.mol_type == "rdkit":
            self._graph_state.update(rdkit_get_property_molstate(self.mol, key=key, prop=propy, **args))
        else:
            raise ValueError("Property identifier is not implemented for mol type", self.mol_type)

[docs]    def make(self,
             nodes=None,
             edges=None,
             state=None
             ):
        """
        Construct graph from mol instance.
        
        The input is a dictionary of properties to calculate. The dict-key 
        can be chosen freely and will be graph attributes. 
        The identifier is a string for built-in function e.g. 'proton'. Or if args have to be provided:
        key : {'class': identifier, 'args':{ args_dict }}
        Otherwise you can provide a custom method via the the identifier dict of the form:
        key : {'class': function/class, 'args':{ args_dict }}
        The callable object of 'class' must accept as first argument this instance.
        Then key=key and then additional args from 'args':{ args_dict }.
        
        Args:
            nodes (dict, optional): Properties for nodes. Defaults to {'proton' : "proton" }
            edges (dict, optional): Properties for edges. Defaults to
                {'bond': 'bond'} or {'distance': {'class': 'distance', 'args': {}}
            state (dict, optional): Properties for graph state. Defaults to {'size' : 'size'}

        Raises:
            AttributeError: If mol not found.
            ValueError: If identifier dict is incorrect.
            TypeError: If property info is incorrect.

        Returns:
            self: This instance.
        """
        # Set defaults if None
        if self.mol is None:
            raise AttributeError("Initialize Molecule before making graph")
        if nodes is None:
            nodes = [self._mols_implemented[self.mol_type]['nodes'][0]]
        if edges is None:
            edges = [self._mols_implemented[self.mol_type]['edges'][0]]
        if state is None:
            state = [self._mols_implemented[self.mol_type]['state'][0]]

        # Make default keys if only list is inserted
        if isinstance(nodes, list) or isinstance(nodes, tuple):
            nodes_dict = {}
            for x in nodes:
                if isinstance(x, str):
                    nodes_dict.update({x: x})
                elif isinstance(x, dict):
                    nodes_dict.update({x['class']: x})
                else:
                    raise ValueError(
                        "Method must be single string or class dict but got", x)
            nodes = nodes_dict
        if isinstance(edges, list) or isinstance(edges, tuple):
            edges_dict = {}
            for x in edges:
                if isinstance(x, str):
                    edges_dict.update({x: x})
                elif isinstance(x, dict):
                    edges_dict.update({x['class']: x})
                else:
                    raise ValueError(
                        "Method must be single string or class dict serialized, but got", x)
            edges = edges_dict
        if isinstance(state, list) or isinstance(state, tuple):
            state_dict = {}
            for x in state:
                if isinstance(x, str):
                    state_dict.update({x: x})
                elif isinstance(x, dict):
                    state_dict.update({x['class']: x})
                else:
                    raise ValueError(
                        "Method must be single string or class dict but got", x)
            state = state_dict

        for key, value in nodes.items():
            if isinstance(value, str):
                self._make_nodes(key, value)
            elif isinstance(value, dict):
                if 'class' not in value:
                    raise ValueError(" 'class' method must be defined in", value)
                if isinstance(value['class'], str):
                    args = value['args'] if 'args' in value else {}
                    self._make_nodes(key, value['class'], **args)
                else:
                    # Custom function/class here
                    args = value['args'] if 'args' in value else {}
                    value['class'](self, key=key, **args)
            else:
                raise TypeError(
                    "Method must be a dict of {'class' : callable function/class or identifier, \
                    'args' : {'value' : 0} }, with optional args but got",
                    value, "instead")

        for key, value in edges.items():
            if isinstance(value, str):
                self._make_edges(key, value)
            elif isinstance(value, dict):
                if 'class' not in value:
                    raise ValueError(" 'class' method must be defined in", value)
                if isinstance(value['class'], str):
                    args = value['args'] if 'args' in value else {}
                    self._make_edges(key, value['class'], **args)
                else:
                    # Custom function/class here
                    args = value['args'] if 'args' in value else {}
                    value['class'](self, key=key, **args)
            else:
                raise TypeError(
                    "Method must be a dict of {'class' : callable function/class or identifier, \
                    'args' : {'value' : 0} }, with optinal args but got",
                    value, "instead")

        for key, value in state.items():
            if isinstance(value, str):
                self._make_state(key, value)
            elif isinstance(value, dict):
                if 'class' not in value:
                    raise ValueError(" 'class' method must be defined in", value)
                if isinstance(value['class'], str):
                    args = value['args'] if 'args' in value else {}
                    self._make_state(key, value['class'], **args)
                else:
                    # Custom function/class here
                    args = value['args'] if 'args' in value else {}
                    value['class'](self, key=key, **args)
            else:
                raise TypeError(
                    "Method must be a dict of {'class' : callable function/class or identifier, \
                    'args' : {'value' : 0} }, with optinal args but got",
                    value, "instead")

        return self

[docs]    def to_tensor(self,
                  nodes=None,
                  edges=None,
                  state=None,
                  trafo_nodes=None,
                  trafo_edges=None,
                  trafo_state=None,
                  default_nodes=None,
                  default_edges=None,
                  default_state=None,
                  out_tensor=np.array
                  ):
        """
        Convert the nx graph into a dict of tensors which can be directly used for GCN.
        
        The desired attributes must be given with a suitable conversion function plus default value.
        Here, one can add also the type of tensor or one-Hot mappings etc. and its default/zero state,
        if the attributes is not specified for a specific node/edge. The properties are always mapped to numpy arrays
        and then converted to out_tensor.

        Args:
            nodes (list, optional): Nodes properties. Defaults to ['proton'].
            edges (list, optional): Edge properties. Defaults to ['bond'].
            state (list, optional): State Properties. Defaults to ['size'].
            trafo_nodes (dict, optional): Transformation function for nodes. Defaults to np.array.
            trafo_edges (dict, optional): Transformation function for edges. Defaults to np.array.
            trafo_state (dict, optional): Transformation function for state. Defaults to np.array.
            default_nodes (dict, optional): Zero Nodes properties. Defaults to np.array(0).
            default_edges (dict, optional): Zero Edge properties. Defaults to np.array(0).
            default_state (dict, optional): Zero State Properties. Defaults to np.array(0).
            out_tensor (func) : Final Function for each node/edge/state. Default is np.array.

        Returns:
            dict: Graph tensors as dictionary.

        """
        if nodes is None:
            nodes = [self._mols_implemented[self.mol_type]['nodes'][0]]
        if edges is None:
            edges = [self._mols_implemented[self.mol_type]['edges'][0]]
        if state is None:
            state = [self._mols_implemented[self.mol_type]['state'][0]]

        if trafo_nodes is None:
            trafo_nodes = {}
        if trafo_edges is None:
            trafo_edges = {}
        if trafo_state is None:
            trafo_state = {}
        if default_nodes is None:
            default_nodes = {}
        if default_edges is None:
            default_edges = {}
        if default_state is None:
            default_state = {}

        for x in nodes:
            if x not in trafo_nodes:
                trafo_nodes[x] = np.array
        for x in edges:
            if x not in trafo_edges:
                trafo_edges[x] = np.array
        for x in state:
            if x not in trafo_state:
                trafo_state[x] = np.array
        for x in nodes:
            if x not in default_nodes:
                default_nodes[x] = np.array(0.0)
        for x in edges:
            if x not in default_edges:
                default_edges[x] = np.array(0.0)
        for x in state:
            if x not in default_state:
                default_state[x] = np.array(0.0)

        outn = []
        oute = []
        outs = []
        out_a = nx.to_numpy_array(self)

        node_idx = np.array(list(self.nodes), dtype=np.int)
        edge_idx = np.array(list(self.edges), dtype=np.int)

        for i in node_idx:
            current_node = []
            for key in nodes:
                if key in self.nodes[i]:
                    current_node.append(trafo_nodes[key](self.nodes[i][key]))
                else:
                    current_node.append(default_nodes[key])
            outn.append(current_node)
        outn = np.array(outn)

        for i in edge_idx:
            current_edge = []
            for key in edges:
                if key in self.edges[i]:
                    current_edge.append(trafo_edges[key](self.edges[i][key]))
                else:
                    current_edge.append(default_edges[key])
            oute.append(current_edge)
        oute = np.array(oute)

        for key in state:
            if key in self._graph_state:
                outs.append(trafo_state[key](self._graph_state[key]))
            else:
                outs.append(default_state[key])
        outs = np.array(outs)

        # Make un-directed and sort edges and edge_index
        outei, oute = add_edges_reverse_indices(edge_idx,oute)

        return {"nodes": out_tensor(outn),
                "edges": out_tensor(oute),
                "state": out_tensor(outs),
                "adjacency": out_tensor(out_a),
                "indices": out_tensor(outei)}

# m = rdkit.Chem.MolFromSmiles("CC=O")
# test = MolGraph(m)
# test.make()
# nx.draw(test, with_labels=True)
# out = test.to_tensor()