This file is indexed.

/usr/lib/python2.7/dist-packages/pbsuite/utils/longCCSCombine.py is in python-pbsuite-utils 15.8.24+dfsg-2.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/python
import sys, argparse
from collections import defaultdict, namedtuple
from pbsuite.utils.FileHandlers import FastqFile


USAGE = "Replace single pass reads from a ZMW with CCS read when created"
class Sequence():
    def __init__(self, name, seq, qual):
        self.name = name
        self.seq  = seq
        self.qual = qual
    def toString(self):
        return "@%s\n%s\n+\n%s\n" % (self.name, self.seq, self.qual)
    
def fastqIterator(fn):
        """
        Uses yield to allow fastqFile iteration
        Input, a file name
        """
        fh = open(fn,'r')
        while True:
                
                name = fh.readline().strip()[1:]
                seq = fh.readline().strip()
                plus = fh.readline().strip()
                qul = fh.readline().strip()
                
                if qul == "":
                        break
                
                yield Sequence(name, seq, qul)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description=USAGE)
    parser.add_argument("filtered_subreads", type=str, \
                        help="Fastq of single pass reads")
    parser.add_argument("ccs_reads", type=str, \
                        help="Fastq of ccs reads")
    
    parser.add_argument("-o", "--output", type=str, default=None, \
                        help="Output fastq file (STDOUT)")
    args = parser.parse_args()

    ccs = FastqFile(args.ccs_reads)
    
    cKeys = ccs.keys()
    if args.output != None:
        output = open(args.output,'w')
    else:
        output = sys.stdout
    
        
    #name: numBases
    ccsReads = defaultdict(int)
    subReads = {}
    ccsPases = defaultdict(int)
    
    #sub = FastqFile(args.filtered_subreads)
    for read in fastqIterator(args.filtered_subreads):
        ccsKey = "/".join(read.name.split('/')[:2])
        try:
            seq = ccs[ccsKey]
            ccsPases[ccsKey] += 1
            subReads[read] = len(read.seq)
            if ccsReads[ccsKey] > 0:
                continue
            ccsReads[ccsKey] = len(read.seq)
        except KeyError:
            seq = read
            
        output.write(seq.toString())
    
    output.close()

    def medianLen(lst):
        lst.sort()
        p = len(lst)/2
        return lst[p]
    
    a = len(ccsReads.keys())
    sys.stderr.write("+CCS reads    : %d\n" % (a))
    b = sum(ccsReads.values())
    sys.stderr.write("+CCS bases    : %d\n" % (b))
    c = len(subReads.keys())
    sys.stderr.write("-SUB reads    : %d\n" % (c))
    d = sum(subReads.values())
    sys.stderr.write("-SUB bases    : %d\n" % (d))
    sys.stderr.write("Avg SUB Len   : %d\n" % (int(sum(subReads.values()))/float(len(subReads))))
    sys.stderr.write("Avg CCS Len   : %d\n" % (int(sum(ccsReads.values()))/float(len(ccsReads))))
    sys.stderr.write("Avg SUB / CCS : %.3f\n" % (sum(ccsPases.values())/float(len(ccsPases))))
    
    #sys.stderr.write("Med CCS RdLen : %.2f\n" % (medianLen(ccsReads.values())))
    #sys.stderr.write("Med Sub RdLen : %.2f\n" % (medianLen(subReads.values())))