This file is indexed.

/usr/share/pyshared/chemfp/openbabel.py is in python-chemfp 1.1p1-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
"Create Open Babel fingerprints"

# Copyright (c) 2010-2013 Andrew Dalke Scientific, AB (Gothenburg, Sweden)
# See the contents of "__init__.py" for full license details.

from __future__ import absolute_import

import sys
import os
import struct
import warnings
import itertools

import sys
import openbabel as ob

from . import ParseError
from . import io
from . import types
from . import error_handlers


# OpenBabel really wants these two variables. I get a segfault if
# BABEL_LIBDIR isn't defined, and from the mailing list, some of the
# code doesn't work correctly without BABEL_DATADIR. I've had problems
# where I forget to set these variables, so check for them now and
# warn about possible problems.

#if "BABEL_LIBDIR" not in os.environ:
#    warnings.warn("BABEL_LIBDIR is not set")

#else:
#  ... check that SMILES and a few other things are on the path ...
#  but note that BABEL_LIBDIR is a colon (or newline or control-return?)
#  separated field whose behaviour isn't well defined in the docs.
#  I'm not going to do additional checking without a stronger need.


# This is the only thing which I consider to be public
__all__ = ["read_structures"]

# This is a "standard" size according to the struct module
# documentation, so the following is an excess of caution
if struct.calcsize("<I") != 4:
    raise AssertionError("The chemfp.ob module assumes 32 bit integers")


# OpenBabel 2.2 doesn't expose "obErrorLog" to Python
HAS_ERROR_LOG = hasattr(ob, "obErrorLog")

# In OpenBabel 2.3.0, OBConversion() must be called before trying to
# find any plugin. This was not needed in earlier releases.
ob.OBConversion()

# OpenBabel before 2.3 didn't have a function to return the version.
# I've brought this up on the list, and it's in 2.3. I can fake
# support for older lists by reading the PDB output text.

def _emulated_OBReleaseVersion():
    "GetReleaseVersion() -> the version string for the OpenBabel toolkit"
    obconversion = ob.OBConversion()
    obconversion.SetInFormat("smi")
    obconversion.SetOutFormat("pdb")
    obmol = ob.OBMol()
    
    obconversion.ReadString(obmol, "C")
    for line in obconversion.WriteString(obmol).splitlines():
        if "GENERATED BY OPEN BABEL" in line:
            return line.split()[-1]
    return "<unknown>"

try:
    from openbabel import OBReleaseVersion
except ImportError:
    OBReleaseVersion = _emulated_OBReleaseVersion
_ob_version = OBReleaseVersion()

SOFTWARE = "OpenBabel/" + _ob_version


# OpenBabel fingerprints are stored as vector<unsigned int>.  On all
# the machines I use, ints have 32 bits.

# OpenBabel bit lengths must be at least sizeof(int)*8 bits long and
# must be a factor of two. I have no idea why this is required.

# OpenBabel supports new fingerprints through a plugin system.  I got
# it working thanks to Noel O'Boyle's excellent work with Cinfony. I
# then found out that the OB API doesn't have any way to get the
# number of bits in the fingerprint. The size is rounded up to the
# next power of two, so FP4 (307 bits) needs 512 bits (16 ints)
# instead of 320 bits (10 ints). That means I can't even get close to
# guessing the bitsize.

# In the end, I hard-coded the supported fingerprints into the system.



############

# I could have written a more general function which created these but
# there's only a few fingerprints lengths to worry about.

# This needs 128 bytes, for 1024 bits
# vectorUnsignedInt will contain 32 32-bit words = 1024 bits

_ob_get_fingerprint = {}
def _init():
    # 
    ob.OBConversion()
    for name in ("FP2", "FP3", "FP4", "MACCS"):
        ob_fingerprinter = ob.OBFingerprint.FindFingerprint(name)
        if ob_fingerprinter is None:
            _ob_get_fingerprint[name] = (None, None)
        else:
            _ob_get_fingerprint[name] = (ob_fingerprinter, ob_fingerprinter.GetFingerprint)

    if _ob_get_fingerprint["FP2"][0] is None:
        raise ImportError("Unable to load OpenBabel FP2 fingerprinter. Check $BABEL_LIBDIR")
    n = _ob_get_fingerprint["FP2"][0].Getbitsperint()
    if n != 32:
        raise AssertionError(
            "The chemfp.ob module assumes OB fingerprints have 32 bit integers")
            
_init()

def calc_FP2(mol, fp=None,
             get_fingerprint=_ob_get_fingerprint["FP2"][1],
             _pack_1024 = struct.Struct("<" + "I"*32).pack):
    if fp is None:
        fp = ob.vectorUnsignedInt()
    get_fingerprint(mol, fp)
    return _pack_1024(*fp)

# This needs 7 bytes, for 56 bits.
# vectorUnsignedInt will contain 2 32-bit words = 64 bits
def calc_FP3(mol, fp=None,
             get_fingerprint=_ob_get_fingerprint["FP3"][1],
             _pack_64 = struct.Struct("<II").pack):
    if fp is None:
        fp = ob.vectorUnsignedInt()
    get_fingerprint(mol, fp)
    return _pack_64(*fp)[:7]

# This needs 39 bytes, for 312 bits
# vectorUnsignedInt will contain 16 32-bit words = 512 bits
def calc_FP4(mol, fp=None,
             get_fingerprint=_ob_get_fingerprint["FP4"][1],
             _pack_512 = struct.Struct("<" + "I"*16).pack):
    if fp is None:
        fp = ob.vectorUnsignedInt()
    get_fingerprint(mol, fp)
    return _pack_512(*fp)[:39]

# This needs 21 bytes, for 166 bits
# vectorUnsignedInt will contain 8 32-bit words = 256 bits
# (Remember, although 6 words * 32-bits/word = 192, the OpenBabel
# fingerprint size must be a power of 2, and the closest is 8*32.)
def calc_MACCS(mol, fp=None,
               get_fingerprint=_ob_get_fingerprint["MACCS"][1],
               _pack_256 = struct.Struct("<" + "I"*8).pack):
    if fp is None:
        fp = ob.vectorUnsignedInt()
    get_fingerprint(mol, fp)
    return _pack_256(*fp)[:21]


# OpenBabel version up to 2.3.0 contained errors in the
# translation of the MACCS patterns from RDKit.
# Post-2.3.0 fixed in version control.
# MACCS might also be missing if BABEL_DATADIR doesn't exist.
HAS_MACCS = False
MACCS_VERSION = 0

def _check_for_maccs():
    global HAS_MACCS, MACCS_VERSION
    if _ob_get_fingerprint["MACCS"] == (None, None):
        if _ob_version.startswith("2.2."):
            return
        # MACCS should be here. Report the most likely reason
        if "BABEL_DATADIR" not in os.environ:
            warnings.warn("MACCS fingerprint missing; perhaps due to missing BABEL_DATADIR?")
        else:
            warnings.warn("MACCS fingerprint missing; perhaps due to BABEL_DATADIR?")
        return

    HAS_MACCS = 1

    # OpenBabel 2.3.0 released the MACCS keys but with a bug in the SMARTS.
    # While they are valid substructure keys, they are not really MACCS keys.
    # This is a run-time detection to figure out which version was installed
    obconversion = ob.OBConversion()
    obconversion.SetInFormat("smi")
    obmol = ob.OBMol()
    obconversion.ReadString(obmol, "CC1=CC(=NN1CC(=O)NNC(=O)C=CC2=C(C=CC=C2Cl)F)C")
    fp = calc_MACCS(obmol)
    if fp == "\x80\x04\x00\x00\x00\x02\x08\x00\x19\xc4@\xea\xcdl\x98\x0b\xae\xa1x\xef\x1b":
        MACCS_VERSION = 1
    elif fp == "\x00\x00\x00\x00\x00\x02\x08\x00\x19\xc4D\xea\xcdl\x98\x0b\xae\xa1x\xef\x1f":
        MACCS_VERSION = 2
    else:
        raise AssertionError("Unknown MACCS fingerprint version: %r" % (fp,))

_check_for_maccs()


#########

def is_valid_format(format):
    if format is None:
        return True
    try:
        format_name, compression = io.normalize_format(None, format, ("smi", ""))
    except ValueError:
        return False
    if compression not in ("", ".gz"):
        return False
    obconversion = ob.OBConversion()
    if not obconversion.SetInFormat(format_name):
        return False
    return True

def _get_ob_error(log):
    msgs = log.GetMessagesOfLevel(ob.obError)
    return "".join(msgs)

def read_structures(filename=None, format=None, id_tag=None, errors="strict"):
    """read_structures(filename, format) -> (id, OBMol) iterator 
    
    Iterate over structures from filename, returning the structure
    title and OBMol for each record. The structure is assumed to be
    in normalized_format(filename, format) format. If filename is None
    then this reads from stdin instead of the named file.
    """
    if not (filename is None or isinstance(filename, basestring)):
        raise TypeError("'filename' must be None or a string")
    error_handler = error_handlers.get_parse_error_handler(errors)
    
    obconversion = ob.OBConversion()
    format_name, compression = io.normalize_format(filename, format,
                                                   default=("smi", ""))
    if compression not in ("", ".gz"):
        raise ValueError("Unsupported compression type for %r" % (filename,))

    # OpenBabel auto-detects gzip compression.

    if not obconversion.SetInFormat(format_name):
        raise ValueError("Unknown structure format %r" % (format_name,))
    
    obmol = ob.OBMol()

    if not filename:
        filename = io.DEV_STDIN
        if filename is None:
            raise NotImplementedError("Unable to read from stdin on this operating system")
        success = obconversion.ReadFile(obmol, filename)
        filename_repr = "<stdin>"
         
    else:
        
        # Deal with OpenBabel's logging
        if HAS_ERROR_LOG:
            ob.obErrorLog.ClearLog()
            lvl = ob.obErrorLog.GetOutputLevel()
            ob.obErrorLog.SetOutputLevel(-1) # Suppress messages to stderr

        success = obconversion.ReadFile(obmol, filename)
        filename_repr = repr(filename)

        errmsg = None
        if HAS_ERROR_LOG:
            ob.obErrorLog.SetOutputLevel(lvl) # Restore message level
            if ob.obErrorLog.GetErrorMessageCount():
                errmsg = _get_ob_error(ob.obErrorLog)

        if not success:
            # Either there was an error or there were no structures.
            open(filename).close() # Make sure the file can be opened for reading

            # If I get here then the file exists and is readable.

            # If there was an error message then use it.
            if errmsg is not None:
                # Okay, don't know what's going on. Report OB's error
                raise IOError(5, errmsg, filename)

    # We've opened the file. Switch to the iterator.
    return _file_reader(obconversion, obmol, success, id_tag, filename_repr, error_handler)

def _file_reader(obconversion, obmol, success, id_tag, filename_repr, error_handler):
    def where():
        return " for record #%d of %s" % (recno, filename_repr)
    
    # How do I detect if the input contains a failure?
    recno = 0
    if id_tag is None:
        while success:
            recno += 1
            title = obmol.GetTitle()
            id = io.remove_special_characters_from_id(title)
            if not id:
                error_handler("Missing title" + where())
            else:
                yield id, obmol
                
            obmol.Clear()
            success = obconversion.Read(obmol)

    else:
        while success:
            recno += 1
            obj = obmol.GetData(id_tag)
            if obj is None:
                error_handler("Missing id tag %r%s" % (id_tag, where()))
            else:
                dirty_id = obj.GetValue()
                id = io.remove_special_characters_from_id(dirty_id)
                if not id:
                    msg = "Empty id tag %r" % (id_tag,)
                    error_handler(msg + where())
                else:
                    yield id, obmol
                    
            obmol.Clear()
            success = obconversion.Read(obmol)

#####

from .types import FingerprintFamilyConfig

def _read_structures(metadata, source, format, id_tag, errors):
    if metadata.aromaticity is not None:
        raise ValueError("Open Babel does not support alternate aromaticity models "
                         "(want aromaticity=%r)" % metadata.aromaticity)
    return read_structures(source, format, id_tag, errors)

_base = FingerprintFamilyConfig(
    software = SOFTWARE,
    read_structures = _read_structures,
    )


OpenBabelFP2FingerprintFamily_v1 = _base.clone(
    name = "OpenBabel-FP2/1",
    num_bits = 1021,
    make_fingerprinter = lambda: calc_FP2)

OpenBabelFP3FingerprintFamily_v1 = _base.clone(
    name = "OpenBabel-FP3/1",
    num_bits = 55,
    make_fingerprinter = lambda: calc_FP3)

OpenBabelFP4FingerprintFamily_v1 = _base.clone(
    name = "OpenBabel-FP4/1",
    num_bits = 307,
    make_fingerprinter = lambda: calc_FP4)


def _check_calc_MACCS_v1():
    assert HAS_MACCS
    assert MACCS_VERSION == 1
    return calc_MACCS

OpenBabelMACCSFingerprintFamily_v1 = _base.clone(
    name = "OpenBabel-MACCS/1",
    num_bits = 166,
    make_fingerprinter = _check_calc_MACCS_v1)


def _check_calc_MACCS_v2():
    assert HAS_MACCS
    assert MACCS_VERSION == 2
    return calc_MACCS
    
OpenBabelMACCSFingerprintFamily_v2 = _base.clone(
    name = "OpenBabel-MACCS/2",
    num_bits = 166,
    make_fingerprinter = _check_calc_MACCS_v2)