RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
DuplicatedSeedCache.h
Go to the documentation of this file.
1
//
2
// Copyright (C) 2014 Novartis Institutes for BioMedical Research
3
//
4
// @@ All Rights Reserved @@
5
// This file is part of the RDKit.
6
// The contents are covered by the terms of the BSD license
7
// which is included in the file license.txt, found at the root
8
// of the RDKit source tree.
9
//
10
#include <
RDGeneral/export.h
>
11
#pragma once
12
#include <map>
13
#include <vector>
14
#include <stdexcept>
15
#include <algorithm>
16
17
namespace
RDKit
{
18
namespace
FMCS {
19
class
DuplicatedSeedCache
{
20
public
:
21
typedef
bool
TValue
;
22
class
TKey
{
23
std::vector<unsigned> AtomIdx;
// sorted
24
std::vector<unsigned> BondIdx;
// sorted
25
public
:
26
size_t
getNumAtoms
()
const
{
return
AtomIdx.size(); }
27
size_t
getNumBonds
()
const
{
return
BondIdx.size(); }
28
29
void
addAtom
(
unsigned
i) {
30
std::vector<unsigned>::iterator
it
=
31
std::lower_bound(AtomIdx.begin(), AtomIdx.end(), i);
32
AtomIdx.insert(
it
, i);
33
}
34
void
addBond
(
unsigned
i) {
35
std::vector<unsigned>::iterator
it
=
36
std::lower_bound(BondIdx.begin(), BondIdx.end(), i);
37
BondIdx.insert(
it
, i);
38
}
39
40
bool
operator==
(
const
TKey
&
right
)
const
{
// opt.
41
return
AtomIdx.size() ==
right
.AtomIdx.size() &&
42
BondIdx.size() ==
right
.BondIdx.size() &&
43
0 ==
memcmp
(&AtomIdx[0], &
right
.AtomIdx[0],
44
AtomIdx.size() *
sizeof
(
unsigned
)) &&
45
0 ==
memcmp
(&BondIdx[0], &
right
.BondIdx[0],
46
BondIdx.size() *
sizeof
(
unsigned
));
47
}
48
49
bool
operator<
(
const
TKey
&
right
)
const
{
50
if
(AtomIdx.size() <
right
.AtomIdx.size()) {
51
return
true
;
52
}
53
if
(AtomIdx.size() >
right
.AtomIdx.size()) {
54
return
false
;
55
}
56
57
if
(BondIdx.size() <
right
.BondIdx.size()) {
58
return
true
;
59
}
60
if
(BondIdx.size() >
right
.BondIdx.size()) {
61
return
false
;
62
}
63
64
// everything is equal -> perform straight comparison
65
int
diff
;
66
diff
=
memcmp
(&AtomIdx[0], &
right
.AtomIdx[0],
67
AtomIdx.size() *
sizeof
(
unsigned
));
68
if
(
diff
< 0) {
69
return
true
;
70
}
71
if
(
diff
> 0) {
72
return
false
;
73
}
74
return
memcmp
(&BondIdx[0], &
right
.BondIdx[0],
75
BondIdx.size() *
sizeof
(
unsigned
)) < 0;
76
}
77
};
78
79
private
:
80
std::map<TKey, TValue> Index;
81
size_t
MaxAtoms{0};
// max key in the cache for fast failed find
82
public
:
83
DuplicatedSeedCache
() {}
84
void
clear
() {
85
Index.clear();
86
MaxAtoms = 0;
87
}
88
89
bool
find
(
const
TKey
& key,
TValue
& value)
const
{
90
value =
false
;
91
if
(key.
getNumAtoms
() > MaxAtoms) {
92
return
false
;
// fast check if key greater then max key in the cache
93
}
94
95
std::map<TKey, TValue>::const_iterator
entryit
= Index.find(key);
96
if
(Index.end() !=
entryit
) {
97
value =
entryit
->second;
98
}
99
return
Index.end() !=
entryit
;
100
}
101
102
void
add
(
const
TKey
& key,
TValue
found
=
true
) {
103
if
(key.
getNumAtoms
() > MaxAtoms) {
104
MaxAtoms = key.
getNumAtoms
();
105
}
106
107
Index.insert(std::pair<TKey, bool>(key,
found
));
108
}
109
110
size_t
size
()
const
{
111
return
Index.size();
// for statistics only
112
}
113
};
114
}
// namespace FMCS
115
}
// namespace RDKit
RDKit::FMCS::DuplicatedSeedCache::TKey
Definition
DuplicatedSeedCache.h:22
RDKit::FMCS::DuplicatedSeedCache::TKey::getNumAtoms
size_t getNumAtoms() const
Definition
DuplicatedSeedCache.h:26
RDKit::FMCS::DuplicatedSeedCache::TKey::addAtom
void addAtom(unsigned i)
Definition
DuplicatedSeedCache.h:29
RDKit::FMCS::DuplicatedSeedCache::TKey::operator==
bool operator==(const TKey &right) const
Definition
DuplicatedSeedCache.h:40
RDKit::FMCS::DuplicatedSeedCache::TKey::operator<
bool operator<(const TKey &right) const
Definition
DuplicatedSeedCache.h:49
RDKit::FMCS::DuplicatedSeedCache::TKey::getNumBonds
size_t getNumBonds() const
Definition
DuplicatedSeedCache.h:27
RDKit::FMCS::DuplicatedSeedCache::TKey::addBond
void addBond(unsigned i)
Definition
DuplicatedSeedCache.h:34
RDKit::FMCS::DuplicatedSeedCache
Definition
DuplicatedSeedCache.h:19
RDKit::FMCS::DuplicatedSeedCache::TValue
bool TValue
Definition
DuplicatedSeedCache.h:21
RDKit::FMCS::DuplicatedSeedCache::add
void add(const TKey &key, TValue found=true)
Definition
DuplicatedSeedCache.h:102
RDKit::FMCS::DuplicatedSeedCache::size
size_t size() const
Definition
DuplicatedSeedCache.h:110
RDKit::FMCS::DuplicatedSeedCache::DuplicatedSeedCache
DuplicatedSeedCache()
Definition
DuplicatedSeedCache.h:83
RDKit::FMCS::DuplicatedSeedCache::clear
void clear()
Definition
DuplicatedSeedCache.h:84
RDKit::FMCS::DuplicatedSeedCache::find
bool find(const TKey &key, TValue &value) const
Definition
DuplicatedSeedCache.h:89
export.h
RDKit
Std stuff.
Definition
Abbreviations.h:19
RDKit::rdvalue_is
bool rdvalue_is(const RDValue_cast_t)
Definition
RDValue-doublemagic.h:372
GraphMol
FMCS
DuplicatedSeedCache.h
Generated on Wed Sep 20 2023 13:49:49 for RDKit by
1.9.8