10#ifndef RGROUP_DECOMP_DATA
11#define RGROUP_DECOMP_DATA
41 std::vector<std::vector<RGroupMatch>>
matches;
55 :
params(std::move(inputParams)) {
62 :
params(std::move(inputParams)) {
63 for (
size_t i = 0; i < inputCores.size(); ++i) {
70 for (
auto &core :
cores) {
71 RWMol *alignCore = core.first ?
cores[0].core.get() :
nullptr;
73 "Could not prepare at least one core");
75 core.second.labelledCore.reset(
new RWMol(*core.second.core));
105 unsigned int label = 0;
118 "Scoring method is not fingerprint variance!");
121 "Illegal permutation prune length");
152 "permutation.size() should be <= matches.size()");
154 for (
size_t mol_idx = 0; mol_idx <
permutation.size(); ++mol_idx) {
155 std::vector<RGroupMatch> keepVector;
156 size_t mi = mol_idx + offset;
171 const bool removeAllHydrogenRGroups =
175 std::vector<RGroupMatch> results;
177 for (
size_t i = 0; i <
matches.size(); ++i) {
179 results.push_back(
matches[i].at(pi));
192 std::map<int, std::set<int>> labelCores;
193 std::set<int> coresVisited;
194 for (
auto &position : results) {
195 int core_idx = position.core_idx;
196 if (coresVisited.find(core_idx) == coresVisited.end()) {
197 coresVisited.insert(core_idx);
198 auto core =
cores.find(core_idx);
199 if (core !=
cores.end()) {
200 for (
auto rlabels :
getRlabels(*core->second.core)) {
201 int rlabel = rlabels.first;
202 labelCores[rlabel].insert(core_idx);
208 for (
int label :
labels) {
209 if (label > 0 && !removeAllHydrogenRGroups) {
213 for (
auto &position : results) {
214 R_DECOMP::const_iterator rgroup = position.rgroups.find(label);
215 bool labelHasCore = labelCores[label].find(position.core_idx) !=
216 labelCores[label].end();
217 if (labelHasCore && rgroup != position.rgroups.end() &&
218 !rgroup->second->is_hydrogen) {
225 for (
auto &position : results) {
226 position.rgroups.erase(label);
256 for (
const auto &p : atoms) {
264 const std::vector<std::pair<Atom *, Atom *>> &atomsToAdd) {
265 for (
const auto &i : atomsToAdd) {
266 mol.
addAtom(i.second,
false,
true);
276 UsedLabels &used_labels,
const std::set<int> &indexLabels,
277 const std::map<
int, std::vector<int>> &extraAtomRLabels) {
285 std::map<int, Atom *> atoms =
getRlabels(core);
293 std::map<int, std::vector<int>> bondsToCore;
294 std::vector<std::pair<Atom *, Atom *>> atomsToAdd;
297 for (
const auto &rlabels : atoms) {
298 int userLabel = rlabels.first;
302 Atom *atom = rlabels.second;
303 mappings[userLabel] = userLabel;
304 used_labels.
add(userLabel);
310 auto *newAt =
new Atom(0);
312 atomsToAdd.emplace_back(atom, newAt);
317 for (
auto newLabel : indexLabels) {
318 auto atm = atoms.find(newLabel);
319 if (atm == atoms.end()) {
323 Atom *atom = atm->second;
326 auto mapping = mappings.find(newLabel);
327 if (mapping == mappings.end()) {
328 rlabel = used_labels.
next();
329 mappings[newLabel] = rlabel;
331 rlabel = mapping->second;
339 auto *newAt =
new Atom(0);
341 atomsToAdd.emplace_back(atom, newAt);
346 for (
const auto &extraAtomRLabel : extraAtomRLabels) {
347 auto atm = atoms.find(extraAtomRLabel.first);
348 if (atm == atoms.end()) {
351 Atom *atom = atm->second;
353 for (
size_t i = 0; i < extraAtomRLabel.second.size(); ++i) {
354 int rlabel = used_labels.
next();
358 "Multiple attachments to a dummy (or hydrogen) is weird.");
359 auto *newAt =
new Atom(0);
361 atomsToAdd.emplace_back(atom, newAt);
366 for (
const auto &rlabels : atoms) {
367 auto atom = rlabels.second;
385 std::vector<std::pair<Atom *, Atom *>> atomsToAdd;
386 std::map<int, int> rLabelCoreIndexToAtomicWt;
393 const std::vector<int> &rlabels =
397 for (
int rlabel : rlabels) {
398 auto label = mappings.find(rlabel);
407 auto *newAt =
new Atom(0);
409 atomsToAdd.emplace_back(atom, newAt);
417 rLabelCoreIndexToAtomicWt[rLabelCoreIndex] = atom->
getAtomicNum();
426 bool implicitOnly =
false;
427 bool updateExplicitCount =
false;
428 bool sanitize =
false;
450 std::cerr <<
"Relabel Rgroup smiles " <<
MolToSmiles(mol) << std::endl;
461 std::set<int> indexLabels;
470 std::map<int, std::vector<int>> extraAtomRLabels;
472 for (
auto &it : best) {
473 for (
auto &rgroup : it.rgroups) {
474 if (rgroup.first > 0) {
478 indexLabels.insert(rgroup.first);
481 std::map<int, int> rlabelsUsedInRGroup =
482 rgroup.second->getNumBondsToRlabels();
483 for (
auto &numBondsUsed : rlabelsUsedInRGroup) {
485 if (numBondsUsed.second > 1) {
486 extraAtomRLabels[numBondsUsed.first].resize(numBondsUsed.second -
494 for (
auto &core :
cores) {
495 core.second.labelledCore.reset(
new RWMol(*core.second.core));
508 used_labels.
add(userLabel);
510 for (
auto &core :
cores) {
512 indexLabels, extraAtomRLabels);
515 for (
auto &it : best) {
516 for (
auto &rgroup : it.rgroups) {
521 std::set<int> uniqueMappedValues;
523 std::inserter(uniqueMappedValues, uniqueMappedValues.end()),
524 [](
const std::pair<int, int> &p) { return p.second; });
526 "Error in uniqueness of final RLabel mapping");
528 uniqueMappedValues.size() ==
userLabels.size() + indexLabels.size(),
529 "Error in final RMapping size");
536 switch (scoreMethod) {
542 fingerprintVarianceScoreData);
550 bool finalize =
false) {
554 auto t0 = std::chrono::steady_clock::now();
555 std::unique_ptr<CartesianProduct> iterator;
565 auto best = max_element(results.begin(), results.end(),
567 return a.rGroupScorer.getBestScore() <
568 b.rGroupScorer.getBestScore();
572 auto result = ga.
run();
582 std::vector<size_t> permutations;
590 std::back_inserter(permutations),
591 [](
const std::vector<RGroupMatch> &m) { return m.size(); });
592 permutation = std::vector<size_t>(permutations.size(), 0);
598 std::cerr <<
"Processing" << std::endl;
601 iterator = std::move(it);
605 while (iterator->next()) {
606 if (count > iterator->maxPermutations) {
610 std::cerr <<
"**************************************************"
615 :
score(iterator->permutation);
622 std::cerr <<
" ===> current best:" << newscore <<
">"
640 if (pruneMatches || finalize) {
#define CHECK_INVARIANT(expr, mess)
#define PRECONDITION(expr, mess)
The class for representing atoms.
void setNoImplicit(bool what)
sets our noImplicit flag, indicating whether or not we are allowed to have implicit Hs
void setAtomicNum(int newNum)
sets our atomic number
void setIsotope(unsigned int what)
sets our isotope number
int getAtomicNum() const
returns our atomic number
int getAtomMapNum() const
void setAtomMapNum(int mapno, bool strict=true)
Set the atom map Number of the atom.
unsigned int getIsotope() const
returns our isotope number
unsigned int getDegree() const
bool getPropIfPresent(const std::string &key, T &res) const
void clearProp(const std::string &key) const
clears the value of a property
void getProp(const std::string &key, T &res) const
allows retrieval of a particular property value
bool hasProp(const std::string &key) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
void setProp(const std::string &key, T val, bool computed=false) const
sets a property value
std::set< int > labels_used
vector< GaResult > runBatch()
GaResult run(int runNumber=1)
unsigned int numberPermutations() const
void pushTieToStore(const std::vector< size_t > &permutation)
store the passed tied permutation for subsequent processing
void startProcessing()
called when process() starts to initialize State
void setBestPermutation(const std::vector< size_t > &permutation, double score)
set the passed permutation and score as the best one
void clearTieStore()
clear all stored tied permutations
const std::vector< size_t > & getBestPermutation() const
return the best permutation found so far
void breakTies(const std::vector< std::vector< RGroupMatch > > &matches, const std::set< int > &labels, const std::unique_ptr< CartesianProduct > &iterator, const std::chrono::steady_clock::time_point &t0, double timeout)
find the best permutation across the tied ones that were stored
double matchScore(const std::vector< size_t > &permutation, const std::vector< std::vector< RGroupMatch > > &matches, const std::set< int > &labels)
score the passed permutation of matches
size_t tieStoreSize() const
number of stored tied permutations
double getBestScore() const
return the best score found so far
unsigned int getNumConformers() const
AtomIterator endAtoms()
get an AtomIterator pointing at the end of our Atoms
void updatePropertyCache(bool strict=true)
calculates any of our lazy properties
AtomIterator beginAtoms()
get an AtomIterator pointing at our first Atom
RWMol is a molecule class that is intended to be edited.
unsigned int addAtom(bool updateLabel=true)
adds an empty Atom to our collection
unsigned int addBond(unsigned int beginAtomIdx, unsigned int endAtomIdx, Bond::BondType order=Bond::UNSPECIFIED)
adds a Bond between the indicated Atoms
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
static std::string to_string(const Descriptor &desc)
RDKIT_GRAPHMOL_EXPORT void setTerminalAtomCoords(ROMol &mol, unsigned int idx, unsigned int otherIdx)
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_RDGENERAL_EXPORT const std::string dummyLabel
RDKIT_RDGENERAL_EXPORT const std::string _MolFileRLabel
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL_CORE_INDEX
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string done
RDKIT_GRAPHMOL_EXPORT void setAtomRLabel(Atom *atm, int rlabel)
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams ¶ms)
returns canonical SMILES for a molecule
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string SIDECHAIN_RLABELS
bool checkForTimeout(const std::chrono::steady_clock::time_point &t0, double timeout, bool throwOnTimeout=true)
bool isAnyAtomWithMultipleNeighborsOrNotUserRLabel(const Atom &atom)
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL_TYPE
RDKIT_RGROUPDECOMPOSITION_EXPORT double fingerprintVarianceScore(const std::vector< size_t > &permutation, const std::vector< std::vector< RGroupMatch > > &matches, const std::set< int > &labels, FingerprintVarianceScoreData *fingerprintVarianceScoreData=nullptr)
const unsigned int EMPTY_CORE_LABEL
std::map< int, Atom * > getRlabels(const RWMol &mol)
Get the RLabels,atom mapping for the current molecule.
iterate through all possible permutations of the rgroups
double fingerprintVarianceGroupScore()
void addVarianceData(int matchNumber, int permutationNumber, const std::vector< std::vector< RGroupMatch > > &matches, const std::set< int > &labels)
void removeVarianceData(int matchNumber, int permutationNumber, const std::vector< std::vector< RGroupMatch > > &matches, const std::set< int > &labels)
RCore is the core common to a series of molecules.
A single rgroup attached to a given core.
boost::shared_ptr< RWMol > combinedMol
std::vector< std::vector< RGroupMatch > > matches
FingerprintVarianceScoreData prunedFingerprintVarianceScoreData
RGroupDecompData(const RWMol &inputCore, RGroupDecompositionParameters inputParams)
double score(const std::vector< size_t > &permutation, FingerprintVarianceScoreData *fingerprintVarianceScoreData=nullptr) const
double scoreFromPrunedData(const std::vector< size_t > &permutation, bool reset=true)
size_t permutationProduct
void relabelRGroup(RGroupData &rgroup, const std::map< int, int > &mappings)
std::vector< size_t > permutation
std::map< int, std::vector< int > > userLabels
RGroupDecompositionParameters params
std::map< std::string, int > newCores
void addAtoms(RWMol &mol, const std::vector< std::pair< Atom *, Atom * > > &atomsToAdd)
RGroupDecompositionProcessResult process(bool pruneMatches, bool finalize=false)
std::map< int, RCore > cores
void setRlabel(Atom *atom, int rlabel)
std::vector< int > processedRlabels
int getRlabel(Atom *atom) const
std::map< int, int > finalRlabelMapping
void addCoreUserLabels(const RWMol &core, std::set< int > &userLabels)
RGroupDecompData(const std::vector< ROMOL_SPTR > &inputCores, RGroupDecompositionParameters inputParams)
void relabelCore(RWMol &core, std::map< int, int > &mappings, UsedLabels &used_labels, const std::set< int > &indexLabels, const std::map< int, std::vector< int > > &extraAtomRLabels)
std::vector< RGroupMatch > GetCurrentBestPermutation() const
RGroupScorer rGroupScorer
bool removeAllHydrogenRGroupsAndLabels
unsigned int matchingStrategy
bool onlyMatchAtRGroups
only allow rgroup decomposition at the specified rgroups
bool removeAllHydrogenRGroups
remove all user-defined rgroups that only have hydrogens
double timeout
timeout in seconds. <=0 indicates no timeout
bool removeHydrogensPostMatch
remove all hydrogens from the output molecules
unsigned int rgroupLabelling
bool prepareCore(RWMol &, const RWMol *alignCore)