RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
GenericGroups.h
Go to the documentation of this file.
1//
2// Copyright (C) 2021 Greg Landrum
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_GENERICGROUPS_H
12#define RD_GENERICGROUPS_H
13
14#include <vector>
15#include <functional>
16#include <map>
17#include <boost/dynamic_bitset.hpp>
18
19namespace RDKit {
20class ROMol;
21class Atom;
22class Bond;
23
24namespace GenericGroups {
25// We'd like to be able to correctly interpret what's written by Marvin and
26// MarvinJS, so the conditions for these are adapted from the ChemAxon
27// documentation for homology groups
28// (https://docs.chemaxon.com/display/docs/homology-groups.md)
29//
30// If I had questions about what the queries should do, I ran example in Reaxys
31// with MarvinJS as the sketcher to see what that returns.
32//
33// I've tried to document deviations or surprises
34
35namespace Matchers {
36//! Matches alkyl side chains
37/*!
38
39 Conditions:
40 - side chain consists entirely of carbon or hydrogen
41 - at least one carbon is present
42 - all bonds are single
43 - no ring bonds
44
45*/
47 const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
48//! Matches alkenyl side chains
49/*!
50
51 Conditions:
52 - side chain consists entirely of carbon or hydrogen
53 - contains at least one C=C
54 - no ring bonds
55
56*/
58 const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
59//! Matches alkynyl side chains
60/*!
61
62 Conditions:
63 - side chain consists entirely of carbon or hydrogen
64 - contains at least one C#C
65 - no ring bonds
66
67*/
69 const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
70
71//! Matches carbocyclic side chains
72/*!
73
74 Note: this is Reaxys query type CBC and matches carbocycles
75
76 Conditions:
77 - atom is in at least one ring composed entirely of carbon
78 - atom is not in any rings not compatible with the above conditions
79 - additional fused rings in the system must obey the same rules
80
81
82*/
84 const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
85//! Matches cycloalkyl side chains
86/*!
87
88 Note: this is Reaxys query type CAL and is directly equivalent to alkyl,
89 except the immediate atom needs to be in a ring.
90
91
92 Conditions:
93 - atom is in at least one ring composed entirely of carbon and connected
94 with single bonds
95 - atoms in the ring do not have unsaturations (including exocyclic)
96 - atom is not in any rings not compatible with the above conditions
97 - additional fused rings in the system must obey the same rules (i.e. all
98 single bonds)
99
100
101*/
103 const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
104//! Matches cycloalkenyl side chains
105/*!
106
107 Note: this is Reaxys query type CEL and matches carbocycles which have at
108 least one double or aromatic bond.
109
110 Conditions:
111 - atom is in at least one ring composed entirely of carbon and with at least
112 one double or aromatic bond
113 - atom is not in any rings not compatible with the above conditions
114 - additional fused rings in the system must obey the same rules (including
115 that each ring must have at least one double or aromatic bond)
116
117
118*/
120 const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
121
122//! Matches heterocyclic side chains
123/*!
124
125 Note: this is Reaxys query type CHC and matches heterocycles
126
127 Conditions:
128 - atom is in at least one fused ring with a heteroatom
129
130
131*/
133 const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
134
135//! Matches aryl side chains
136/*!
137
138 Note: this is Reaxys query type ARY and matches carbocycles which are aromatic
139
140 Conditions:
141 - atom is in at least one aromatic ring composed entirely of carbon
142 - atom is not in any rings not compatible with the above conditions
143 - additional fused rings in the system must obey the same rules
144
145
146*/
148 const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
149
150//! Matches heteroaryl side chains
151/*!
152
153 Note: this is Reaxys query type HAR and matches aromatic heterocycles
154
155 Conditions:
156 - atom is in at least one fused aromatic sytem with a heteroatom
157
158
159*/
161 const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
162
163//! Matches cyclic side chains
164/*!
165
166 Note: this is Reaxys query type CYC and matches cycles
167
168 Conditions:
169 - atom is in at least one ring
170
171*/
173 const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
174
175//! Matches acyclic side chains
176/*!
177
178 Note: this is Reaxys query type ACY and matches sidechains with no cycles
179
180 Conditions:
181 - no atom in the sidechain is in a ring
182
183*/
185 const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
186
187//! Matches all-carbon acyclic side chains
188/*!
189
190 Note: this is Reaxys query type ABC and matches all-carbon sidechains with no
191 cycles
192
193 Conditions:
194 - all atoms in the sidechain are carbon
195 - no atom in the sidechain is in a ring
196
197*/
199 const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
200
201//! Matches acyclic side chains with at least one heteroatom
202/*!
203
204 Note: this is Reaxys query type AHC and matches sidechains with no cycles and
205 at least one heteroatom
206
207 Conditions:
208 - at least one non-carbon, non-hydrogen atom is in the sidechain
209 - no atom in the sidechain is in a ring
210
211*/
213 const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
214//! Matches acyclic alkoxy side chains
215/*!
216
217 Note: this is Reaxys query type AOX and matches alkoxy sidechains
218
219 Conditions:
220 - first atom is an O
221 - all other atoms are C
222 - all single bonds
223 - no atom in the sidechain is in a ring
224
225*/
227 const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
228
229//! Matches rings without carbon
230/*!
231
232 Note: this is Reaxys query type CXX and matches rings which contain no carbon
233
234 Conditions:
235 - a ring is present
236 - none of the atoms in the fused ring system are carbon
237
238*/
240 const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
241} // namespace Matchers
242const static std::map<
243 std::string,
244 std::function<bool(const ROMol &, const Atom &, boost::dynamic_bitset<>)>>
248 {"Alkenyl", Matchers::AlkenylAtomMatcher},
250 {"Alkynyl", Matchers::AlkynylAtomMatcher},
252 {"Carbocyclic", Matchers::CarbocyclicAtomMatcher},
254 {"Carbocycloalkyl", Matchers::CarbocycloalkylAtomMatcher},
256 {"Carbocycloalkenyl", Matchers::CarbocycloalkenylAtomMatcher},
258 {"Carboaryl", Matchers::CarboarylAtomMatcher},
260 {"Cyclic", Matchers::CyclicAtomMatcher},
262 {"Acyclic", Matchers::AcyclicAtomMatcher},
264 {"Carboacyclic", Matchers::CarboacyclicAtomMatcher},
266 {"Heteroacyclic", Matchers::HeteroacyclicAtomMatcher},
270 {"Heterocyclic", Matchers::HeterocyclicAtomMatcher},
272 {"Heteroaryl", Matchers::HeteroarylAtomMatcher},
274 {"NoCarbonRing", Matchers::NoCarbonRingAtomMatcher},
276};
277//! returns false if any of the molecule's generic atoms are not satisfied in
278/// the current match
280 const ROMol &mol, const ROMol &query,
281 const std::vector<unsigned int> &match);
282//! sets the apropriate generic query tags based on atom labels and/or SGroups
283/*
284
285- Generic query tags found in the atom labels/SGroups will be overwrite existing
286generic query tags (if there are any present).
287- only SUP SGroups are considered
288- Any atom labels or SGroups which are converted will be removed
289- If both atom labels and SGroups are being used and an atom has generic
290query tags in both, the one from the SGroup will be used.
291- Generic query tags not found in GenericGroups::genericMatchers will be ignored
292
293*/
295 ROMol &mol, bool useAtomLabels = true, bool useSGroups = true);
297 ROMol &mol);
298} // namespace GenericGroups
299} // namespace RDKit
300
301#endif
The class for representing atoms.
Definition Atom.h:68
#define RDKIT_GENERICGROUPS_EXPORT
Definition export.h:217
RDKIT_GENERICGROUPS_EXPORT bool CyclicAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches cyclic side chains.
RDKIT_GENERICGROUPS_EXPORT bool AlkylAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches alkyl side chains.
RDKIT_GENERICGROUPS_EXPORT bool CarbocycloalkylAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches cycloalkyl side chains.
RDKIT_GENERICGROUPS_EXPORT bool HeteroacyclicAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches acyclic side chains with at least one heteroatom.
RDKIT_GENERICGROUPS_EXPORT bool HeterocyclicAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches heterocyclic side chains.
RDKIT_GENERICGROUPS_EXPORT bool CarbocyclicAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches carbocyclic side chains.
RDKIT_GENERICGROUPS_EXPORT bool AcyclicAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches acyclic side chains.
RDKIT_GENERICGROUPS_EXPORT bool AlkynylAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches alkynyl side chains.
RDKIT_GENERICGROUPS_EXPORT bool CarbocycloalkenylAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches cycloalkenyl side chains.
RDKIT_GENERICGROUPS_EXPORT bool NoCarbonRingAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches rings without carbon.
RDKIT_GENERICGROUPS_EXPORT bool CarboacyclicAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches all-carbon acyclic side chains.
RDKIT_GENERICGROUPS_EXPORT bool HeteroarylAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches heteroaryl side chains.
RDKIT_GENERICGROUPS_EXPORT bool AlkoxyacyclicAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches acyclic alkoxy side chains.
RDKIT_GENERICGROUPS_EXPORT bool AlkenylAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches alkenyl side chains.
RDKIT_GENERICGROUPS_EXPORT bool CarboarylAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches aryl side chains.
RDKIT_GENERICGROUPS_EXPORT bool genericAtomMatcher(const ROMol &mol, const ROMol &query, const std::vector< unsigned int > &match)
RDKIT_GENERICGROUPS_EXPORT void setGenericQueriesFromProperties(ROMol &mol, bool useAtomLabels=true, bool useSGroups=true)
sets the apropriate generic query tags based on atom labels and/or SGroups
static const std::map< std::string, std::function< bool(const ROMol &, const Atom &, boost::dynamic_bitset<>)> > genericMatchers
RDKIT_GENERICGROUPS_EXPORT void convertGenericQueriesToSubstanceGroups(ROMol &mol)
Std stuff.
bool rdvalue_is(const RDValue_cast_t)