RDKit
Open-source cheminformatics and machine learning.
AtomPairGenerator.h
Go to the documentation of this file.
1//
2// Copyright (C) 2018 Boran Adas, Google Summer of Code
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RD_ATOMPAIRGEN_H_2018_06
13#define RD_ATOMPAIRGEN_H_2018_06
14
17
18namespace RDKit {
19namespace AtomPair {
20using namespace AtomPairs;
21
24 const bool df_includeChirality;
25 const bool df_topologicalTorsionCorrection;
26
27 public:
28 /**
29 \brief Construct a new AtomPairAtomInvGenerator object
30
31 \param includeChirality toggles the inclusions of bits indicating R/S
32 chirality
33 \param topologicalTorsionCorrection when set subtracts 2 from invariants
34 returned, added so TopologicalTorsionGenerator can use this
35 */
36 AtomPairAtomInvGenerator(bool includeChirality = false,
37 bool topologicalTorsionCorrection = false);
38
39 std::vector<std::uint32_t> *getAtomInvariants(
40 const ROMol &mol) const override;
41
42 std::string infoString() const override;
44};
45
46/*!
47 \brief class that holds atom-pair fingerprint specific arguments
48
49 */
50template <typename OutputType>
52 : public FingerprintArguments<OutputType> {
53 public:
55 const bool df_use2D;
56 const unsigned int d_minDistance;
57 const unsigned int d_maxDistance;
58
59 OutputType getResultSize() const override;
60
61 std::string infoString() const override;
62
63 /*!
64 \brief construct a new AtomPairArguments object
65
66 \param countSimulation if set, use count simulation while generating the
67 fingerprint
68 \param includeChirality if set, chirality will be used in the atom
69 invariants, this is ignored if atomInvariantsGenerator is present for
70 the /c FingerprintGenerator that uses this
71 \param use2D if set, the 2D (topological) distance matrix will be
72 used
73 \param minDistance minimum distance between atoms to be considered in a
74 pair, default is 1 bond
75 \param maxDistance maximum distance between atoms to be considered in a
76 pair, default is maxPathLen-1 bonds
77 \param countBounds boundaries for count simulation, corresponding bit
78 will be set if the count is higher than the number provided for that spot
79 \param fpSize size of the generated fingerprint, does not affect the sparse
80 versions
81
82 */
83 AtomPairArguments(const bool countSimulation = true,
84 const bool includeChirality = false,
85 const bool use2D = true, const unsigned int minDistance = 1,
86 const unsigned int maxDistance = (maxPathLen - 1),
87 const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
88 const std::uint32_t fpSize = 2048);
89};
90
91/*!
92 \brief class that holds atom-environment data needed for atom-pair fingerprint
93 generation
94
95 */
96template <typename OutputType>
98 : public AtomEnvironment<OutputType> {
99 const unsigned int d_atomIdFirst;
100 const unsigned int d_atomIdSecond;
101 const unsigned int d_distance;
102
103 public:
105 const std::vector<std::uint32_t> *atomInvariants,
106 const std::vector<std::uint32_t> *bondInvariants,
107 const AdditionalOutput *additionalOutput,
108 const bool hashResults = false,
109 const std::uint64_t fpSize = 0) const override;
110
111 /*!
112 \brief construct a new AtomPairAtomEnv object
113
114 \param atomIdFirst id of the first atom of the atom-pair
115 \param atomIdSecond id of the second atom of the atom-pair
116 \param distance distance between the atoms
117 */
118 AtomPairAtomEnv(const unsigned int atomIdFirst,
119 const unsigned int atomIdSecond, const unsigned int distance);
120};
121
122/*!
123 \brief class that generates atom-environments for atom-pair fingerprint
124
125 */
126template <typename OutputType>
128 : public AtomEnvironmentGenerator<OutputType> {
129 public:
130 std::vector<AtomEnvironment<OutputType> *> getEnvironments(
131 const ROMol &mol, FingerprintArguments<OutputType> *arguments,
132 const std::vector<std::uint32_t> *fromAtoms,
133 const std::vector<std::uint32_t> *ignoreAtoms, const int confId,
134 const AdditionalOutput *additionalOutput,
135 const std::vector<std::uint32_t> *atomInvariants,
136 const std::vector<std::uint32_t> *bondInvariants,
137 const bool hashResults = false) const override;
138
139 std::string infoString() const override;
140};
141
142/*!
143 \brief helper function that generates a /c FingerprintGenerator that generates
144 atom-pair fingerprints
145 \tparam OutputType determines the size of the bitIds and the result, can be 32
146 or 64 bit unsigned integer
147 \param minDistance minimum distance between atoms to be considered in a pair,
148 default is 1 bond
149 \param maxDistance maximum distance between atoms to be considered in a pair,
150 default is maxPathLen-1 bonds
151 \param includeChirality if set, chirality will be used in the atom invariants,
152 this is ignored if atomInvariantsGenerator is provided
153 \param use2D if set, the 2D (topological) distance matrix will be used
154 \param atomInvariantsGenerator atom invariants to be used during fingerprint
155 generation
156 \param useCountSimulation if set, use count simulation while generating the
157 fingerprint
158 \param countBounds boundaries for count simulation, corresponding bit will be
159 set if the count is higher than the number provided for that spot
160 \param fpSize size of the generated fingerprint, does not affect the sparse
161 versions
162 \param ownsAtomInvGen if set atom invariants generator is destroyed with the
163 fingerprint generator
164
165 \return FingerprintGenerator<OutputType>* that generates atom-pair
166 fingerprints
167
168 This generator supports the following \c AdditionalOutput types:
169 - \c atomToBits : which bits each atom is involved in
170 - \c atomCounts : how many bits each atom sets
171 - \c bitInfoMap : map from bitId to (atomId1, atomId2) pairs
172
173 */
174template <typename OutputType>
177 const unsigned int minDistance = 1,
178 const unsigned int maxDistance = maxPathLen - 1,
179 const bool includeChirality = false, const bool use2D = true,
180 AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
181 const bool useCountSimulation = true, const std::uint32_t fpSize = 2048,
182 const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
183 const bool ownsAtomInvGen = false);
184
185} // namespace AtomPair
186} // namespace RDKit
187
188#endif
abstract base class that generates atom-environments from a molecule
abstract base class that holds atom-environments that will be hashed to generate the fingerprint
abstract base class for atom invariants generators
class that holds atom-pair fingerprint specific arguments
AtomPairArguments(const bool countSimulation=true, const bool includeChirality=false, const bool use2D=true, const unsigned int minDistance=1, const unsigned int maxDistance=(maxPathLen - 1), const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, const std::uint32_t fpSize=2048)
construct a new AtomPairArguments object
OutputType getResultSize() const override
Returns the size of the fingerprint based on arguments.
std::string infoString() const override
method that returns information string about the fingerprint specific argument set and the arguments ...
class that holds atom-environment data needed for atom-pair fingerprint generation
OutputType getBitId(FingerprintArguments< OutputType > *arguments, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, const AdditionalOutput *additionalOutput, const bool hashResults=false, const std::uint64_t fpSize=0) const override
calculates and returns the bit id to be set for this atom-environment
AtomPairAtomEnv(const unsigned int atomIdFirst, const unsigned int atomIdSecond, const unsigned int distance)
construct a new AtomPairAtomEnv object
AtomPairAtomInvGenerator * clone() const override
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
AtomPairAtomInvGenerator(bool includeChirality=false, bool topologicalTorsionCorrection=false)
Construct a new AtomPairAtomInvGenerator object.
class that generates atom-environments for atom-pair fingerprint
std::vector< AtomEnvironment< OutputType > * > getEnvironments(const ROMol &mol, FingerprintArguments< OutputType > *arguments, const std::vector< std::uint32_t > *fromAtoms, const std::vector< std::uint32_t > *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, const bool hashResults=false) const override
generate and return all atom-envorinments from a molecule
std::string infoString() const override
method that returns information about this /c AtomEnvironmentGenerator and its arguments if any
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...
class that generates same fingerprint style for different output formats
#define RDKIT_FINGERPRINTS_EXPORT
Definition: export.h:169
RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator< OutputType > * getAtomPairGenerator(const unsigned int minDistance=1, const unsigned int maxDistance=maxPathLen - 1, const bool includeChirality=false, const bool use2D=true, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, const bool useCountSimulation=true, const std::uint32_t fpSize=2048, const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, const bool ownsAtomInvGen=false)
helper function that generates a /c FingerprintGenerator that generates atom-pair fingerprints
const unsigned int maxPathLen
Std stuff.
Definition: Abbreviations.h:18