This file is indexed.

/usr/share/openbabel/2.3.2/MACCS.txt is in libopenbabel4v5 2.3.2+dfsg-3build1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
#Comments after SMARTS
# Extracted from RDKit r1553 Nov 2010 rdkit/Chem/MACCSKeys.py
#
# Copyright (C) 2001-2008 greg Landrum and Rational Discovery LLC
#
#   @@ All Rights Reserved @@
#  This file is part of the RDKit.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the RDKit source tree.
#
# SMARTS definitions for the publically available MACCS keys

# I compared the MACCS fingerprints generated here with those from two
# other packages (not MDL, unfortunately). Of course there are
# disagreements between the various fingerprints still, but I think
# these definitions work pretty well. Some notes:

# 1) most of the differences have to do with aromaticity
# 2) there's a discrepancy sometimes because the current RDKit
# definitions do not require multiple matches to be distinct. e.g. the
# SMILES C(=O)CC(=O) can match the (hypothetical) key O=CC twice in my
# definition. It's not clear to me what the correct behavior is.
# 3) Some keys are not fully defined in the MDL documentation
# 4) Two keys, 125 and 166, have to be done outside of SMARTS.
# 5) Key 1 (ISOTOPE) isn't defined

# these are SMARTS patterns corresponding to the MDL MACCS keys
  1:('?',0), # ISOTOPE
  #2:('[#103,#104,#105,#106,#107,#106,#109,#110,#111,#112]',0),  # ISOTOPE Not complete
  2:('[#103,#104]',0),  # ISOTOPE Not complete
  3:('[Ge,As,Se,Sn,Sb,Te,Tl,Pb,Bi]',0), # Group IVa,Va,VIa Periods 4-6 (Ge...)  *NOTE* spec wrong
  4:('[Ac,Th,Pa,U,Np,Pu,Am,Cm,Bk,Cf,Es,Fm,Md,No,Lr]',0), # actinide
  5:('[Sc,Ti,Y,Zr,Hf]',0), # Group IIIB,IVB (Sc...)  *NOTE* spec wrong
  6:('[La,Ce,Pr,Nd,Pm,Sm,Eu,Gd,Tb,Dy,Ho,Er,Tm,Yb,Lu]',0), # Lanthanide
  7:('[V,Cr,Mn,Nb,Mo,Tc,Ta,W,Re]',0), # Group VB,VIB,VIIB (V...) *NOTE* spec wrong
  8:('[!#6;!#1]1~*~*~*~1',0), # QAAA@1
  9:('[Fe,Co,Ni,Ru,Rh,Pd,Os,Ir,Pt]',0), # Group VIII (Fe...)
  10:('[Be,Mg,Ca,Sr,Ba,Ra]',0), # Group IIa (Alkaline earth)
  11:('*1~*~*~*~1',0), # 4M Ring
  12:('[Cu,Zn,Ag,Cd,Au,Hg]',0), # Group IB,IIB (Cu..)
  13:('[#8]~[#7](~[#6])~[#6]',0), # ON(C)C
  14:('[#16]-[#16]',0), # S-S
  15:('[#8]~[#6](~[#8])~[#8]',0), # OC(O)O
  16:('[!#6;!#1]1~*~*~1',0), # QAA@1
  17:('[#6]#[#6]',0), #CTC
  18:('[B,Al,Ga,In,Tl]',0), # Group IIIA (B...) *NOTE* spec wrong
  19:('*1~*~*~*~*~*~*~1',0), # 7M Ring
  20:('[Si]',0), #Si
  21:('[#6]=[#6](~[!#6;!#1])~[!#6;!#1]',0), # C=C(Q)Q
  22:('*1~*~*~1',0), # 3M Ring
  23:('[#7]~[#6](~[#8])~[#8]',0), # NC(O)O
  24:('[#7]-[#8]',0), # N-O
  25:('[#7]~[#6](~[#7])~[#7]',0), # NC(N)N
  26:('[#6]=;@[#6](@*)@*',0), # C$=C($A)$A
  27:('[I]',0), # I
  28:('[!#6;!#1]~[CH2]~[!#6;!#1]',0), # QCH2Q
  29:('[#15]',0),# P
  30:('[#6]~[!#6;!#1](~[#6])(~[#6])~*',0), # CQ(C)(C)A
  31:('[!#6;!#1]~[F,Cl,Br,I]',0), # QX
  32:('[#6]~[#16]~[#7]',0), # CSN
  33:('[#7]~[#16]',0), # NS
  34:('[CH2]=*',0), # CH2=A
  35:('[Li,Na,K,Rb,Cs,Fr]',0), # Group IA (Alkali Metal)
  36:('[#16R]',0), # S Heterocycle
  37:('[#7]~[#6](~[#8])~[#7]',0), # NC(O)N
  38:('[#7]~[#6](~[#6])~[#7]',0), # NC(C)N
  39:('[#8]~[#16](~[#8])~[#8]',0), # OS(O)O
  40:('[#16]-[#8]',0), # S-O
  41:('[#6]#[#7]',0), # CTN
  42:('F',0), # F
  43:('[!C;!c;!#1;!H0]~*~[!C;!c;!#1;!H0]',0), # QHAQH
  44:('?',0), # OTHER
  45:('[#6]=[#6]~[#7]',0), # C=CN
  46:('Br',0), # BR
  47:('[#16]~*~[#7]',0), # SAN
  48:('[#8]~[!#6;!#1](~[#8])(~[#8])',0), # OQ(O)O
  49:('[!+0]',0), # CHARGE  
  50:('[#6]=[#6](~[#6])~[#6]',0), # C=C(C)C
  51:('[#6]~[#16]~[#8]',0), # CSO
  52:('[#7]~[#7]',0), # NN
  53:('[!#6;!#1;!H0]~*~*~*~[!#6;!#1;!H0]',0), # QHAAAQH
  54:('[!#6;!#1;!H0]~*~*~[!#6;!#1;!H0]',0), # QHAAQH
  55:('[#8]~[#16]~[#8]',0), #OSO
  56:('[#8]~[#7](~[#8])~[#6]',0), # ON(O)C
  57:('[#8R]',0), # O Heterocycle
  58:('[!#6;!#1]~[#16]~[!#6;!#1]',0), # QSQ
  59:('[#16]!:*:*',0), # Snot%A%A
  60:('[#16]=[#8]',0), # S=O
  61:('*~[#16](~*)~*',0), # AS(A)A
  62:('*@*!@*@*',0), # A$!A$A
  63:('[#7]=[#8]',0), # N=O
  64:('*@*!@[#16]',0), # A$A!S
  65:('c:n',0), # C%N
  66:('[#6]~[#6](~[#6])(~[#6])~*',0), # CC(C)(C)A
  67:('[!#6;!#1]~[#16]',0), # QS
  68:('[!#6;!#1;!H0]~[!#6;!#1;!H0]',0), # QHQH (&...) FIX: incomplete definition
  69:('[!#6;!#1]~[!#6;!#1;!H0]',0), # QQH
  70:('[!#6;!#1]~[#7]~[!#6;!#1]',0), # QNQ
  71:('[#7]~[#8]',0), # NO
  72:('[#8]~*~*~[#8]',0), # OAAO
  73:('[#16]=*',0), # S=A
  74:('[CH3]~*~[CH3]',0), # CH3ACH3
  75:('*!@[#7]@*',0), # A!N$A
  76:('[#6]=[#6](~*)~*',0), # C=C(A)A
  77:('[#7]~*~[#7]',0), # NAN
  78:('[#6]=[#7]',0), # C=N
  79:('[#7]~*~*~[#7]',0), # NAAN
  80:('[#7]~*~*~*~[#7]',0), # NAAAN
  81:('[#16]~*(~*)~*',0), # SA(A)A
  82:('*~[CH2]~[!#6;!#1;!H0]',0), # ACH2QH
  83:('[!#6;!#1]1~*~*~*~*~1',0), # QAAAA@1
  84:('[NH2]',0), #NH2
  85:('[#6]~[#7](~[#6])~[#6]',0), # CN(C)C
  86:('[C;H2,H3][!#6;!#1][C;H2,H3]',0), # CH2QCH2
  87:('[F,Cl,Br,I]!@*@*',0), # X!A$A
  88:('[#16]',0), # S
  89:('[#8]~*~*~*~[#8]',0), # OAAAO
  90:('[$([!#6;!#1;!H0]~*~*~[CH2]~*),$([!#6;!#1;!H0;R]1@[R]@[R]@[CH2;R]1),$([!#6;!#1;!H0]~[R]1@[R]@[CH2;R]1)]',0), # QHAACH2A
  91:('[$([!#6;!#1;!H0]~*~*~*~[CH2]~*),$([!#6;!#1;!H0;R]1@[R]@[R]@[R]@[CH2;R]1),$([!#6;!#1;!H0]~[R]1@[R]@[R]@[CH2;R]1),$([!#6;!#1;!H0]~*~[R]1@[R]@[CH2;R]1)]',0), # QHAAACH2A
  92:('[#8]~[#6](~[#7])~[#6]',0), # OC(N)C
  93:('[!#6;!#1]~[CH3]',0), # QCH3
  94:('[!#6;!#1]~[#7]',0), # QN
  95:('[#7]~*~*~[#8]',0), # NAAO
  96:('*1~*~*~*~*~1',0), # 5 M ring
  97:('[#7]~*~*~*~[#8]',0), # NAAAO
  98:('[!#6;!#1]1~*~*~*~*~*~1',0), # QAAAAA@1
  99:('[#6]=[#6]',0), # C=C
  100:('*~[CH2]~[#7]',0), # ACH2N
  101:('[$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1)]',0), # 8M Ring or larger. This only handles up to ring sizes of 14
  102:('[!#6;!#1]~[#8]',0), # QO
  103:('Cl',0), # CL
  104:('[!#6;!#1;!H0]~*~[CH2]~*',0), # QHACH2A
  105:('*@*(@*)@*',0), # A$A($A)$A
  106:('[!#6;!#1]~*(~[!#6;!#1])~[!#6;!#1]',0), # QA(Q)Q
  107:('[F,Cl,Br,I]~*(~*)~*',0), # XA(A)A
  108:('[CH3]~*~*~*~[CH2]~*',0), # CH3AAACH2A
  109:('*~[CH2]~[#8]',0), # ACH2O
  110:('[#7]~[#6]~[#8]',0), # NCO
  111:('[#7]~*~[CH2]~*',0), # NACH2A
  112:('*~*(~*)(~*)~*',0), # AA(A)(A)A
  113:('[#8]!:*:*',0), # Onot%A%A
  114:('[CH3]~[CH2]~*',0), # CH3CH2A
  115:('[CH3]~*~[CH2]~*',0), # CH3ACH2A
  116:('[$([CH3]~*~*~[CH2]~*),$([CH3]~*1~*~[CH2]1)]',0), # CH3AACH2A
  117:('[#7]~*~[#8]',0), # NAO
  118:('[$(*~[CH2]~[CH2]~*),$(*1~[CH2]~[CH2]1)]',1), # ACH2CH2A > 1
  119:('[#7]=*',0), # N=A
  120:('[!#6;R]',1), # Heterocyclic atom > 1 (&...) FIX: incomplete definition
  121:('[#7;R]',0), # N Heterocycle
  122:('*~[#7](~*)~*',0), # AN(A)A
  123:('[#8]~[#6]~[#8]',0), # OCO
  124:('[!#6;!#1]~[!#6;!#1]',0), # QQ
  125:('?',0), # Aromatic Ring > 1
  126:('*!@[#8]!@*',0), # A!O!A
  127:('*@*!@[#8]',1), # A$A!O > 1 (&...) FIX: incomplete definition
  128:('[$(*~[CH2]~*~*~*~[CH2]~*),$([R]1@[CH2;R]@[R]@[R]@[R]@[CH2;R]1),$(*~[CH2]~[R]1@[R]@[R]@[CH2;R]1),$(*~[CH2]~*~[R]1@[R]@[CH2;R]1)]',0), # ACH2AAACH2A
  129:('[$(*~[CH2]~*~*~[CH2]~*),$([R]1@[CH2]@[R]@[R]@[CH2;R]1),$(*~[CH2]~[R]1@[R]@[CH2;R]1)]',0), # ACH2AACH2A
  130:('[!#6;!#1]~[!#6;!#1]',1), # QQ > 1 (&...)  FIX: incomplete definition
  131:('[!#6;!#1;!H0]',1), # QH > 1
  132:('[#8]~*~[CH2]~*',0), # OACH2A
  133:('*@*!@[#7]',0), # A$A!N
  134:('[F,Cl,Br,I]',0), # X (HALOGEN)
  135:('[#7]!:*:*',0), # Nnot%A%A
  136:('[#8]=*',1), # O=A>1 
  137:('[!C;!c;R]',0), # Heterocycle
  138:('[!#6;!#1]~[CH2]~*',1), # QCH2A>1 (&...) FIX: incomplete definition
  139:('[O;!H0]',0), # OH
  140:('[#8]',3), # O > 3 (&...) FIX: incomplete definition
  141:('[CH3]',2), # CH3 > 2  (&...) FIX: incomplete definition
  142:('[#7]',1), # N > 1
  143:('*@*!@[#8]',0), # A$A!O
  144:('*!:*:*!:*',0), # Anot%A%Anot%A
  145:('*1~*~*~*~*~*~1',1), # 6M ring > 1
  146:('[#8]',2), # O > 2
  147:('[$(*~[CH2]~[CH2]~*),$([R]1@[CH2;R]@[CH2;R]1)]',0), # ACH2CH2A
  148:('*~[!#6;!#1](~*)~*',0), # AQ(A)A
  149:('[C;H3,H4]',1), # CH3 > 1
  150:('*!@*@*!@*',0), # A!A$A!A
  151:('[#7;!H0]',0), # NH
  152:('[#8]~[#6](~[#6])~[#6]',0), # OC(C)C
  153:('[!#6;!#1]~[CH2]~*',0), # QCH2A
  154:('[#6]=[#8]',0), # C=O
  155:('*!@[CH2]!@*',0), # A!CH2!A
  156:('[#7]~*(~*)~*',0), # NA(A)A
  157:('[#6]-[#8]',0), # C-O
  158:('[#6]-[#7]',0), # C-N
  159:('[#8]',1), # O>1
  160:('[C;H3,H4]',0), #CH3
  161:('[#7]',0), # N
  162:('a',0), # Aromatic
  163:('*1~*~*~*~*~*~1',0), # 6M Ring
  164:('[#8]',0), # O
  165:('[R]',0), # Ring
  166:('?',0), # Fragments  FIX: this can't be done in SMARTS

# obabel -:"CNO" -oftp -xs
# 24: N-O	68: QHQH (&...) 	69: QQH	71: NO	93: QCH3	94: QN	102: QO
# 124: QQ	131: QH > 1	*2  139: OH	151: NH	158: C-N	160: CH3	161: N	164: O