This file is indexed.

/usr/share/pyshared/scitools/EfficiencyTable.py is in python-scitools 0.9.0-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
"""
This module contains a class for managing efficiency/performance
experiments.
"""


class EfficiencyTable:
    """
    Manage the CPU times of efficiency experiments and make
    tabular reports with sorted results.

    >>> e = EfficiencyTable('some description of the experiments')
    >>> e.add('about an experiment', cpu_time)
    >>> e.add('about another experiment', cpu_time)
    >>> e.add('about a third experiment', cpu_time)
    >>> print e  # prints a sorted table with scaled CPU times
    >>> e += e2  # add experiments from EfficiencyTable e2 to e
    """
    def __init__(self, description, normalization_time=None):
        """
        @param description: a string acting as headline for this test.
        @param normalization_time: all CPU times will be divided by this value
        (if not set, the class will find the smallest (best) CPU
        time and divide all others by this value.

        The best_time parameter can also be set in the
        set_normalization_time method. The normalization time is not
        used before an instance is printed (str method).)
        """
        self.description = description
        self.experiments = {}  # key=description, value=[CPU-time1, CPU-time2, ]
        self._normalization_time = normalization_time

    def add(self, description, CPU_time, function_name=None):
        """
        Add the CPU time of an experiment, together with a description
        and an optional function_name (which is appended to the
        description string).
        """
        if function_name is not None:
            # include name of the tested function as part of the description:
            description = description + ' (%s)' % function_name
        if not description in self.experiments:
            self.experiments[description] = []
        self.experiments[description].append(CPU_time)

    def __iadd__(self, other):
        """
        Add results in other to present results.
        New items (descriptions) are simply registered, while
        identical items (descriptions) will have a list of CPU times,
        corresponding to the different EfficiencyTable instances.
        """
        self.description += '\n' + other.description
        for e in other.experiments:
            if e in self.experiments:
                # this experiment is already registered, add the lists of CPU times:
                self.experiments[e].extend(other.experiments[e])
            else:
                self.experiments[e] = other.experiments[e]
        return self
            
    def __add__(self, other):
        """As e += table (see __iadd__)."""
        e = EfficiencyTable(self.description)
        e.experiments = self.experiments.copy()
        e.__iadd__(other)
        return e

    def set_normalization_time(self, t):
        """
        Set the CPU time by which all other CPU times will be divided.
        By default, this is the maximum CPU time encountered in the data.
        """
        self._normalization_time = t
        
    def _reference_CPU_time(self, experiment_idx=0):
        if self._normalization_time is not None:
            # try first to see if there is an experiment with the
            # given normalization time, and if so, use the corresponding
            # description, otherwise use a dummy description:
            for description in self.experiments:
                if abs(self.experiments[description][experiment_idx] - \
                       self._normalization_time) < 1.0E-10:
                    return self._normalization_time, description
            # no experiment coincides with the given normalization time
            description = 'some external experiment'
            self.experiments[description] = [self._normalization_time]
            return self._normalization_time, description
            
                
        # no given normalization time, find best performance:
        # (only search among positive CPU times for an experiment with
        # index experiment_idx)
        best = 1.0E+20
        cpu_eps = 1.0E-9  # smallest reliable CPU time (but many repetitions
                          # may produce small time per call while the
                          # measurements are reliable)
        for description in self.experiments:
            cpu_time = self.experiments[description][experiment_idx]
            # drop counting very small (unreliable) or negative
            # (erroneous timings) CPU times:
            if cpu_time > cpu_eps:
                if cpu_time < best:
                    best = cpu_time
                    best_key = description

        if best == 1.0E+20:
            # did not find any CPU time > cpu_eps
            raise ValueError('too small CPU times (all less than %E)' % cpu_eps)
        return best, best_key
    
    def __str__(self):
        """
        Print out a sorted list (with respect to CPU times) of the experiments.
        In case of multiple CPU times per description of an experiment,
        the table is sorted with respect to the first CPU time entry of each
        multiple CPU times list. All CPU times are divided by a normalization
        time, which is given to the constructor or to the
        set_normalization_time method, or if not prescribed, this class
        finds the smallest reliable CPU time (neglecting very small
        CPU time).
        """
        # inv_dict is the inverse dictionary of self.experiments, i.e.,
        # CPU time is the key and the description is the valid.
        # Only the first CPU time entry is used.
        
        # inv_dict computation does not work if the CPU times are very
        # small (0.00 is the key of many), so we need to add a small
        # random number to very small CPU times
        import random, math
        inv_dict = {}  # inverse of self.experiments
        for k in self.experiments:
            CPU_time = self.experiments[k][0]
            if math.fabs(CPU_time) < 1.0E-7:
                CPU_time += 1.0E-14*random.random()
            if CPU_time in inv_dict:
                # this destroys the one-to-one mapping, perturb CPU_time:
                CPU_time *= 1.0 + 1.0E-3*random.random()
            self.experiments[k][0] = CPU_time
            inv_dict[CPU_time] = k
        # sort CPU-times:
        cpu_times0 = inv_dict.keys()
        cpu_times0.sort()
        s = '\n\n' + '*'*80 + '\n' + self.description + '\n' + '*'*80 + '\n'
        self.best, self.best_key = self._reference_CPU_time(0)
        s += 'reference CPU time based on the experiment\n   "%s"\nwith '\
             'CPU time:\n  %s\n\n' % \
             (self.best_key, str(self.experiments[self.best_key])[1:-1])

        max_length = max([len(string) for string in self.experiments.keys()])
        for cpu_time_key in cpu_times0:
            description = inv_dict[cpu_time_key]
            s += '%%-%ds | ' % max_length % description
            for cpu_time, ref_time in \
                    zip(self.experiments[description],
                        self.experiments[self.best_key]):
                nc = cpu_time/ref_time
                if abs(nc) > 9999.0:
                    s += '%10.1e' % nc
                else:
                    s += '%8.2f' % nc
            s += '\n'
        return s

def plot(filename):
    f = open(filename, 'r')
    lines = f.readlines()
    f.close()
    for i in range(len(lines)):
        if lines[i].find('CPU times') >= 0:
            start = i+3
            break
    counter = 1
    curves = {}
    labels = open('tmp_plot_labels', 'w')
    import math
    for line in lines[start:]:
        name, numbers = line.split('|')
        curves[name] = [float(x) for x in numbers.split()]
        for i in range(len(curves[name])):
            try:
                curves[name][i] = math.log10(curves[name][i])
            except ValueError:
                curves[name][i] = 0.0
        f = open('tmp_plot_%02d' % counter, 'w')
        for i in range(len(curves[name])):
            v = curves[name][i]
            if v > 0.0:
                f.write('%2d  %g\n' % (i+1, v))
        f.close()
        counter += 1
        labels.write('%2d: %s\n' % (counter, name))
    labels.close()
    # generate Gnuplot script:
    plotfiles = ['"tmp_plot_%02d" title "%d" with lines' % (i,i) \
                 for i in range(1,len(lines[start:])+1)]
    cmd = 'plot ' + ', '.join(plotfiles)
    f = open('tmp_plot.gnuplot', 'w')
    f.write("""
set xrange [0:%d]
%s
""" % (len(curves[name])+1, cmd))
    f.close()
                   
        
        
def _test(n):
    # how much does it cost to run an empty loop with
    # range, xrange and iseq?
    e = EfficiencyTable('Empty loops, loop length = %d' % n)
    import timeit
    t1 = timeit.Timer('for i in range(n): pass',
                      setup='n=%d' % n).timeit(5)
    e.add('for i in range(n): pass', t1)
    t2 = timeit.Timer('for i in xrange(n): pass',
                      setup='n=%d' % n).timeit(5)
    e.add('for i in xrange(n): pass', t2)
    t3 = timeit.Timer('for i in iseq(stop=n-1): pass',
                      setup='from scitools.numpyutils import iseq;' +
                      'n=%d' % n).timeit(5)
    e.add('for i in iseq(stop=n-1): pass', t3)
    print e
    
if __name__ == '__main__':
    import sys
    try:
        n = int(sys.argv[1])
    except:
        n = 100
    _test(n)