This file is indexed.

/usr/share/RDKit/Contrib/M_Kossner/Frames.py is in rdkit-data 201603.5-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#!/usr/bin/python
# encoding: utf-8

#	Jan 2011	(markus kossner)	Cleaned up the code, added some documentation
#	somwhere around Aug 2008	(markus kossner)	created
#    
#    This script extracts the molecular framework for a database of molecules.
#    You can use two modes (hard coded): 
#    - Scaff:	The molecular frame is extracted
#    - RedScaff:	All linking chains between rings are deleted. The rings are directly connected.
#    
#    You can comment in/out the code snippets indicated by the comments 
#    to force each atom of the frame to be a Carbon.
#    
#    Usage: Frames.py <database.sdf>
#    Output: 
#    - sd files containing all molecules belonging to one frame (1.sdf, 2.sdf etc)
#    - frames.smi containing the (caninical) smiles and count of occurrence
#
from __future__ import print_function

import os,sys
from Chem import AllChem as Chem

def flatten(x):
    """flatten(sequence) -> list
    Returns a single, flat list which contains all elements retrieved
    from the sequence and all nested sub-sequences (iterables).
    Examples:
    >>> [1, 2, [3,4], (5,6)]
    [1, 2, [3, 4], (5, 6)]
    >>> flatten([[[1,2,3], (42,None)], [4,5], [6], 7, MyVector(8,9,10)])
    [1, 2, 3, 42, None, 4, 5, 6, 7, 8, 9, 10]"""
    result = []
    for el in x:
        if hasattr(el, "__iter__") and not isinstance(el, basestring):
            result.extend(flatten(el))
        else:
            result.append(el)
    return result


def GetFrame(mol, mode='Scaff'):
	'''return a ganeric molecule defining the reduced scaffold of the input mol.
	mode can be 'Scaff' or 'RedScaff':
	
	Scaff	->	chop off the side chains and return the scaffold
	
	RedScaff	->	remove all linking chains and connect the rings 
	directly at the atoms where the linker was
	'''
	
	ring = mol.GetRingInfo()
	RingAtoms = flatten(ring.AtomRings())
	NonRingAtoms = [ atom.GetIdx() for atom in mol.GetAtoms() if atom.GetIdx() not in RingAtoms ]
	RingNeighbors = []
	Paths = []
	for NonRingAtom in NonRingAtoms:
		for neighbor in mol.GetAtomWithIdx(NonRingAtom).GetNeighbors():
			if neighbor.GetIdx() in RingAtoms:
				RingNeighbors.append(NonRingAtom)
				Paths.append([neighbor.GetIdx(),NonRingAtom]) #The ring Atoms having a non ring Nieghbor will be the start of a walk
				break
	PosConnectors = [x for x in NonRingAtoms if x not in RingNeighbors] #Only these Atoms are potential starting points of a Linker chain
	#print 'PosConnectors:'
	#print PosConnectors	
	Framework = [ x for x in RingAtoms ]
	#Start a list of pathways which we will have to walk 
	#print 'Path atoms:'
	#print Paths
	Linkers = []
	while len(Paths)>0:
		NewPaths = []
		for P in Paths:
			if P == None:
				print('ooh')
			else:
				for neighbor in mol.GetAtomWithIdx(P[-1]).GetNeighbors():
					if neighbor.GetIdx() not in P:
						if neighbor.GetIdx() in NonRingAtoms:
							n = P[:]
							n.append(neighbor.GetIdx())
							NewPaths.append(n[:])
						elif neighbor.GetIdx() in RingAtoms:
							#print 'adding the following path to Framework:'
							#print P
							n = P[:]
							n.append(neighbor.GetIdx())
							Linkers.append(n)
							Framework=Framework+P[:]

		Paths = NewPaths[:]
	#print 'Linkers:',Linkers
	#print 'RingAtoms:',RingAtoms
	#em.AddBond(3,4,Chem.BondType.SINGLE)
	if mode == 'RedScaff':
		Framework = list(set(Framework))
		todel = []
		NonRingAtoms.sort(reverse=True)
		em = Chem.EditableMol(mol)
		BondsToAdd = [ sorted([i[0],i[-1]]) for i in Linkers ]
		mem = []
		for i in BondsToAdd:
			if i not in mem:
				em.AddBond(i[0],i[1],Chem.BondType.SINGLE)
				mem.append(i)
		for i in NonRingAtoms:
			todel.append(i)
		for i in todel:
			em.RemoveAtom(i)
		m = em.GetMol()
		#===================================#
		#  Now do the flattening of atoms and bonds!
		#  Any heavy atom will become a carbon and any bond will become a single bond!	#
		#===================================#
#		for atom in m.GetAtoms():                                                 #
#			atom.SetAtomicNum(6)                                                    #
#			atom.SetFormalCharge(0)                                                #
#		for bond in m.GetBonds():                                                   #
#			bond.SetBondType(Chem.BondType.SINGLE)                 #
#		Chem.SanitizeMol(m)                                                          #
		#===================================#
		return m

	if mode == 'Scaff':
		Framework = list(set(Framework))
		todel = []
		NonRingAtoms.sort(reverse=True)
		for i in NonRingAtoms:
			if i != None:
				if i not in Framework:
					todel.append(i)
		em = Chem.EditableMol(mol)
		for i in todel:
			em.RemoveAtom(i)
		m = em.GetMol()
		#===================================#
		#  Now do the flattening of atoms and bonds!
		#  Any heavy atom will become a carbon and any bond will become a single bond!!		#
		#===================================#
#		for atom in m.GetAtoms():                                                 #
#			atom.SetAtomicNum(6)                                                    #
#			atom.SetFormalCharge(0)                                                #
#		for bond in m.GetBonds():                                                   #
#			bond.SetBondType(Chem.BondType.SINGLE)                 #
#		Chem.SanitizeMol(m)                                                          #
		#===================================#
		return m

if __name__=='__main__':
	if len(sys.argv) < 2:
		print("No input file provided: Frames.py filetosprocess.ext")
		sys.exit(1)


	suppl = Chem.SDMolSupplier(sys.argv[1])
	FrameDict = {}

	for mol in suppl:
		m = GetFrame(mol)
		cansmiles = Chem.MolToSmiles(m, isomericSmiles=True)
		if FrameDict.has_key(cansmiles):
			FrameDict[cansmiles].append(mol)
		else:
			FrameDict[cansmiles]=[mol,]

	counter=0
	w=open('frames.smi','w')
	for key,item in FrameDict.items():
		counter+=1
		d=Chem.SDWriter(str(counter)+'.sdf')
		for i in item:
			i.SetProp('Scaffold',key)
			i.SetProp('Cluster',str(counter))
			d.write(i)
		print(key,len(item))
		w.write(key+'\t'+str(len(item))+'\n')
	w.close
	print('number of Clusters: %d' %(counter))