This file is indexed.

/usr/lib/python2.7/dist-packages/pyFAI/ocl_azim.py is in pyfai 0.10.2-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
# -*- coding: utf-8 -*-
#
#    Project: Azimuthal integration
#             https://github.com/kif/pyFAI
#
#    Copyright (C) European Synchrotron Radiation Facility, Grenoble, France
#
#    Principal author:       Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
#                            D. Karkoulis (dimitris.karkoulis@gmail.com)
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

"""
C++ less implementation of Dimitris' code based on PyOpenCL

TODO and trick from dimitris still missing:
  * dark-current subtraction is still missing
  * In fact you might want to consider doing the conversion on the GPU when
    possible. Think about it, you have a uint16 to float which for large arrays
    was slow.. You load on the graphic card a uint16 (2x transfer speed) and
    you convert to float inside so it should be blazing fast.


"""
__author__ = "Jérôme Kieffer"
__license__ = "GPLv3"
__date__ = "29/09/2014"
__copyright__ = "2012, ESRF, Grenoble"
__contact__ = "jerome.kieffer@esrf.fr"

import os
import logging
import threading
import numpy
from .utils import concatenate_cl_kernel
from .opencl import ocl, pyopencl, allocate_cl_buffers, release_cl_buffers
if pyopencl:
    mf = pyopencl.mem_flags
else:
    raise ImportError("pyopencl is not installed")
logger = logging.getLogger("pyFAI.ocl_azim_pyocl")


class Integrator1d(object):
    """
    Attempt to implements ocl_azim using pyopencl
    """
    BLOCK_SIZE = 128
    def __init__(self, filename=None):
        """

        @param filename: file in which profiling information are saved
        """
        self.BLOCK_SIZE = 128
        self.tdim = (self.BLOCK_SIZE,)
        self.wdim_bins = None
        self.wdim_data = None
        self._tth_min = self._tth_max = self.tth_min = self.tth_max = None
        self.nBins = -1
        self.nData = -1
        self.platformid = -1
        self.deviceid = -1
        self.useFp64 = False
        self.devicetype = "gpu"
        self.filename = filename
        self.tth_out = None
        if "write" in dir(filename):
            self.logfile = filename
        elif filename:
            self.logfile = open(self.filename, "a")
        else:
            self.logfile = None
        self.lock = threading.Semaphore()
        #Those are pointer to memory on the GPU (or None if uninitialized
        self._cl_mem = {"tth": None,
                        "tth_delta": None,
                        "image": None,
                        "solidangle": None,
                        "dark": None,
                        "mask": None,
                        "histogram": None,
                        "uhistogram": None,
                        "weights": None,
                        "uweights": None,
                        "span_ranges": None,
                        "tth_min_max": None,
                        "tth_range": None,
                        "dummyval": None,
                        "dummyval_delta": None}
        self._cl_kernel_args = {"uimemset2": [],
                                "create_histo_binarray": [],
                                "imemset": [],
                                "ui2f2": [],
                                "get_spans": [],
                                "group_spans": [],
                                "solidangle_correction": [],
                                "dummyval_correction": []}
        self._cl_program = None
        self._ctx = None
        self._queue = None
        self.do_solidangle = None
        self.do_dummy = None
        self.do_mask = None
        self.do_dark = None
        self.useTthRange = None

    def __dealloc__(self):
        self.tth_out = None
        self._queue.finish()
        self._free_buffers()
        self._free_kernels()
        self._cl_program = None
        self._queue = None
        self._ctx = None

    def __repr__(self):
        return os.linesep.join(
            [("PyOpenCL implementation of ocl_xrpd1d.ocl_xrpd1D_fullsplit"
              " C++ class. Logging in %s") % self.filename,
             ("device: %s, platform %s device %s"
              " 64bits:%s image size: %s histogram size: %s") %
             (self.devicetype, self.platformid, self.deviceid,
              self.useFp64, self.nData, self.nBins)])

    def log(self, **kwarg):
        """
        log in a file all opencl events
        """
        if self.logfile:
            for key, event in kwarg.items():
#                if  event is an event
                event.wait()
                self.logfile.write(
                    " %s: %.3fms\t" %
                    (key, (1e-6 * (event.profile.END - event.profile.START))))
            self.logfile.write(os.linesep)
            self.logfile.flush()

    def _allocate_buffers(self):
        """
        Allocate OpenCL buffers required for a specific configuration

        allocate_CL_buffers() is a private method and is called by
        configure().  Given the size of the image and the number of
        the bins, all the required OpenCL buffers are allocated.

        The method performs a basic check to see if the memory
        required by the configuration is smaller than the total global
        memory of the device. However, there is no built-in way in
        OpenCL to check the real available memory.

        In the case allocate_CL_buffers fails while allocating
        buffers, it will automatically deallocate the buffers that did
        not fail and leave the flag hasBuffers to 0.

        Note that an OpenCL context also requires some memory, as well
        as Event and other OpenCL functionalities which cannot and are
        not taken into account here.

        The memory required by a context varies depending on the
        device. Typical for GTX580 is 65Mb but for a 9300m is ~15Mb

        In addition, a GPU will always have at least 3-5Mb of memory
        in use.

        Unfortunately, OpenCL does NOT have a built-in way to check
        the actual free memory on a device, only the total memory.
        """
        utype = None
        if self.useFp64:
            utype = numpy.int64
        else:
            utype = numpy.int32

        buffers = [
            ("tth", mf.READ_ONLY, numpy.float32, self.nData),
            ("tth_delta", mf.READ_ONLY, numpy.float32, self.nData),
            ("tth_min_max", mf.READ_ONLY, numpy.float32, 2),
            ("tth_range", mf.READ_ONLY, numpy.float32, 2),
            ("mask", mf.READ_ONLY, numpy.int32, self.nData),
            ("image", mf.READ_ONLY, numpy.float32, self.nData),
            ("solidangle", mf.READ_ONLY, numpy.float32, self.nData),
            ("dark", mf.READ_ONLY, numpy.float32, self.nData),
            ("histogram", mf.READ_WRITE, numpy.float32, self.nBins),
            ("uhistogram", mf.READ_WRITE, utype, self.nBins),
            ("weights", mf.READ_WRITE, numpy.float32, self.nBins),
            ("uweights", mf.READ_WRITE, utype, self.nBins),
            ("span_ranges", mf.READ_WRITE, numpy.float32, self.nData),
            ("dummyval", mf.READ_ONLY, numpy.float32, 1),
            ("dummyval_delta", mf.READ_ONLY, numpy.float32, 1),
        ]

        if self.nData < self.BLOCK_SIZE:
            raise RuntimeError(("Fatal error in allocate_CL_buffers."
                                " nData (%d) must be >= BLOCK_SIZE (%d)\n"),
                               self.nData, self.BLOCK_SIZE)

        # TODO it seems that the device should be a member parameter
        # like in the most recent code
        device = ocl.platforms[self.platformid].devices[self.deviceid]
        self._cl_mem = allocate_cl_buffers(buffers, device, self._ctx)

    def _free_buffers(self):
        """
        free all memory allocated on the device
        """
        self._cl_mem = release_cl_buffers(self._cl_mem)

    def _free_kernels(self):
        """
        free all kernels
        """
        for kernel in self._cl_kernel_args:
            self._cl_kernel_args[kernel] = []
        self._cl_program = None

    def _compile_kernels(self, kernel_file=None):
        """
        Compile the kernel
        
        @param kernel_file: filename of the kernel (to test other kernels)
        """
        kernel_file = kernel_file or "ocl_azim_kernel_2.cl"
        kernel_src = concatenate_cl_kernel([kernel_file])

        compile_options = "-D BLOCK_SIZE=%i  -D BINS=%i -D NN=%i" % \
                            (self.BLOCK_SIZE, self.nBins, self.nData)
        if self.useFp64:
            compile_options += " -D ENABLE_FP64"

        try:
            self._cl_program = pyopencl.Program(self._ctx, kernel_src)
            self._cl_program.build(options=compile_options)
        except pyopencl.MemoryError as error:
            raise MemoryError(error)

    def _set_kernel_arguments(self):
        """Tie arguments of OpenCL kernel-functions to the actual kernels

        set_kernel_arguments() is a private method, called by configure().
        It uses the dictionary _cl_kernel_args.

        Note that by default, since TthRange is disabled, the
        integration kernels have tth_min_max tied to the tthRange
        argument slot.

        When setRange is called it replaces that argument with
        tthRange low and upper bounds. When unsetRange is called, the
        argument slot is reset to tth_min_max.
        """
        self._cl_kernel_args["create_histo_binarray"] = \
            [self._cl_mem[i] for i in ("tth", "tth_delta", "uweights",
                                       "tth_min_max", "image", "uhistogram",
                                       "span_ranges", "mask", "tth_min_max")]
        self._cl_kernel_args["get_spans"] = \
            [self._cl_mem[i] for i in ["tth", "tth_delta",
                                       "tth_min_max", "span_ranges"]]
        self._cl_kernel_args["solidangle_correction"] = \
            [self._cl_mem["image"], self._cl_mem["solidangle"]]
        self._cl_kernel_args["dummyval_correction"] = \
            [self._cl_mem["image"], self._cl_mem["dummyval"],
             self._cl_mem["dummyval_delta"]]
        self._cl_kernel_args["uimemset2"] = \
            [self._cl_mem["uweights"], self._cl_mem["uhistogram"]]
        self._cl_kernel_args["imemset"] = [self._cl_mem["mask"], ]
        self._cl_kernel_args["ui2f2"] = \
            [self._cl_mem[i] for i in ("uweights", "uhistogram",
                                       "weights", "histogram")]

    def _calc_tth_out(self, lower, upper):
        """
        Calculate the bin-center position in 2theta
        """
        self.tth_min = numpy.float32(lower)
        self.tth_max = numpy.float32(upper)
        delta = (upper - lower) / (self.nBins)
        self.tth_out = numpy.linspace(lower + 0.5 * delta,
                                      upper - 0.5 * delta,
                                      self.nBins)

    def getConfiguration(self, Nimage, Nbins, useFp64=None):
        """getConfiguration gets the description of the integrations
        to be performed and keeps an internal copy

        @param Nimage: number of pixel in image
        @param Nbins: number of bins in regrouped histogram
        @param useFp64: use double precision. By default the same as init!
        """
        if Nimage < 1 or Nbins < 1:
            raise RuntimeError(("getConfiguration with Nimage=%s and"
                                " Nbins=%s makes no sense") % (Nimage, Nbins))
        if useFp64 is not None:
            self.useFp64 = bool(useFp64)
        self.nBins = Nbins
        self.nData = Nimage
        self.wdim_data = (Nimage + self.BLOCK_SIZE - 1) & \
            ~ (self.BLOCK_SIZE - 1),
        self.wdim_bins = (Nbins + self.BLOCK_SIZE - 1) & \
            ~ (self.BLOCK_SIZE - 1),

    def configure(self, kernel=None):
        """
        The method configure() allocates the OpenCL resources required
        and compiled the OpenCL kernels.  An active context must exist
        before a call to configure() and getConfiguration() must have
        been called at least once. Since the compiled OpenCL kernels
        carry some information on the integration parameters, a change
        to any of the parameters of getConfiguration() requires a
        subsequent call to configure() for them to take effect.

        If a configuration exists and configure() is called, the
        configuration is cleaned up first to avoid OpenCL memory leaks

        @param kernel_path: is the path to the actual kernel
        """
        if self.nBins < 1 or self.nData < 1:
            raise RuntimeError(("configure() with Nimage=%s and"
                                " Nbins=%s makes no sense") %
                               (self.nData, self.nBins))
        if not self._ctx:
            raise RuntimeError("You may not call config() at this point."
                               " There is no Active context."
                               " (Hint: run init())")

        #If configure is recalled, force cleanup of OpenCL resources
        #to avoid accidental leaks
        self.clean(True)
        with self.lock:
            self._allocate_buffers()
            self._compile_kernels(kernel)
            self._set_kernel_arguments()
            # We need to initialise the Mask to 0
            imemset = self._cl_program.imemset(self._queue, self.wdim_data,
                                               self.tdim, self._cl_mem["mask"])
        if self.logfile:
            self.log(memset_mask=imemset)

    def loadTth(self, tth, dtth, tth_min=None, tth_max=None):
        """
        Load the 2th arrays along with the min and max value.

        loadTth maybe be recalled at any time of the execution in
        order to update the 2th arrays.

        loadTth is required and must be called at least once after a
        configure()
        """

        if not self._ctx:
            raise RuntimeError("You may not call loadTth() at this point."
                               " There is no Active context."
                               " (Hint: run init())")
        if not self._cl_mem["tth"]:
            raise RuntimeError("You may not call loadTth() at this point,"
                               " OpenCL is not configured"
                               " (Hint: run configure())")

        ctth = numpy.ascontiguousarray(tth.ravel(), dtype=numpy.float32)
        cdtth = numpy.ascontiguousarray(dtth.ravel(), dtype=numpy.float32)
        with self.lock:
            self._tth_max = (ctth + cdtth).max() * \
                (1.0 + numpy.finfo(numpy.float32).eps)
            self._tth_min = max(0.0, (ctth - cdtth).min())
            if tth_min is None:
                tth_min = self._tth_min

            if tth_max is None:
                tth_max = self._tth_max
            copy_tth = pyopencl.enqueue_copy(self._queue,
                                             self._cl_mem["tth"], ctth)
            copy_dtth = pyopencl.enqueue_copy(self._queue,
                                              self._cl_mem["tth_delta"], cdtth)
            pyopencl.enqueue_copy(self._queue, self._cl_mem["tth_min_max"],
                                  numpy.array((self._tth_min, self._tth_max),
                                              dtype=numpy.float32))
            logger.debug("kernel get_spans sizes: \t%s %s",
                         self.wdim_data, self.tdim)
            get_spans = \
                self._cl_program.get_spans(self._queue, self.wdim_data,
                                           self.tdim,
                                           *self._cl_kernel_args["get_spans"])
            # Group 2th span ranges group_spans(__global float *span_range)
            logger.debug("kernel group_spans sizes: \t%s %s",
                         self.wdim_data, self.tdim)
            group_spans = \
                self._cl_program.group_spans(self._queue,
                                             self.wdim_data,
                                             self.tdim,
                                             self._cl_mem["span_ranges"])
            self._calc_tth_out(tth_min, tth_max)
        if self.logfile:
            self.log(copy_2th=copy_tth, copy_delta2th=copy_dtth,
                     get_spans=get_spans, group_spans=group_spans)

    def setSolidAngle(self, solidAngle):
        """
        Enables SolidAngle correction and uploads the suitable array
        to the OpenCL device.

        By default the program will assume no solidangle correction
        unless setSolidAngle() is called.  From then on, all
        integrations will be corrected via the SolidAngle array.

        If the SolidAngle array needs to be changes, one may just call
        setSolidAngle() again with that array

        @param solidAngle: the solid angle of the given pixel
        @type solidAngle: ndarray
        """
        if not self._ctx:
            raise RuntimeError("You may not call Integrator1d.setSolidAngle()"
                               " at this point. There is no Active context."
                               " (Hint: run init())")
        cSolidANgle = numpy.ascontiguousarray(solidAngle.ravel(),
                                              dtype=numpy.float32)
        with self.lock:
            self.do_solidangle = True
            copy_solidangle = pyopencl.enqueue_copy(self._queue,
                                                    self._cl_mem["solidangle"],
                                                    cSolidANgle)
        if self.logfile:
            self.log(copy_solidangle=copy_solidangle)

    def unsetSolidAngle(self):
        """
        Instructs the program to not perform solidangle correction from now on.

        SolidAngle correction may be turned back on at any point
        """
        with self.lock:
            self.do_solidangle = False

    def setMask(self, mask):
        """
        Enables the use of a Mask during integration. The Mask can be
        updated by recalling setMask at any point.

        The Mask must be a PyFAI Mask. Pixels with 0 are masked
        out. TODO: check and invert!

        @param mask: numpy.ndarray of integer.
        """
        if not self._ctx:
            raise RuntimeError("You may not call"
                               " Integrator1d.setDummyValue(dummy,delta_dummy)"
                               " at this point. There is no Active context."
                               " (Hint: run init())")
        cMask = numpy.ascontiguousarray(mask.ravel(), dtype=numpy.int32)
        with self.lock:
            self.do_mask = True
            copy_mask = pyopencl.enqueue_copy(self._queue,
                                              self._cl_mem["mask"], cMask)
        if self.logfile:
            self.log(copy_mask=copy_mask)

    def unsetMask(self):
        """
        Disables the use of a Mask from that point.
        It may be re-enabled at any point via setMask
        """
        with self.lock:
            self.do_mask = False

    def setDummyValue(self, dummy, delta_dummy):
        """
        Enables dummy value functionality and uploads the value to the
        OpenCL device.

        Image values that are similar to the dummy value are set to 0.

        @param dummy: value in image of missing values (masked pixels?)
        @param delta_dummy: precision for dummy values
        """
        if not self._ctx:
            raise RuntimeError("You may not call"
                               " Integrator1d.setDummyValue(dummy,delta_dummy)"
                               " at this point. There is no Active context."
                               " (Hint: run init())")
        else:
            with self.lock:
                self.do_dummy = True
                pyopencl.enqueue_copy(self._queue, self._cl_mem["dummyval"],
                                      numpy.array((dummy,),
                                                  dtype=numpy.float32))
                pyopencl.enqueue_copy(self._queue,
                                      self._cl_mem["dummyval_delta"],
                                      numpy.array((delta_dummy,),
                                                  dtype=numpy.float32))

    def unsetDummyValue(self):
        """Disable a dummy value.
        May be re-enabled at any time by setDummyValue
        """
        with self.lock:
            self.do_dummy = False

    def setRange(self, lowerBound, upperBound):
        """
        Instructs the program to use a user - defined range for 2th
        values

        setRange is optional. By default the integration will use the
        tth_min and tth_max given by loadTth() as integration
        range. When setRange is called it sets a new integration range
        without affecting the 2th array. All values outside that range
        will then be discarded when interpolating.  Currently, if the
        interval of 2th (2th + -d2th) is not all inside the range
        specified, it is discarded. The bins of the histogram are
        RESCALED to the defined range and not the original tth_max -
        tth_min range.

        setRange can be called at any point and as many times required
        after a valid configuration is created.

        @param lowerBound: lower bound of the integration range
        @type lowerBound: float
        @param upperBound: upper bound of the integration range
        @type upperBound: float
        """
        if self._ctx is None:
            raise RuntimeError("You may not call setRange() at this point."
                               " There is no Active context."
                               " (Hint: run init())")
        if not (self.nData > 1 and self._cl_mem["tth_range"]):
            raise RuntimeError("You may not call setRange() at this point,"
                               " the required buffers are not allocated"
                               " (Hint: run config())")

        with self.lock:
            self.useTthRange = True
            copy_2thrange = \
                pyopencl.enqueue_copy(self._queue, self._cl_mem["tth_range"],
                                      numpy.array((lowerBound, upperBound),
                                                  dtype=numpy.float32))
            self._cl_kernel_args["create_histo_binarray"][8] = \
                self._cl_mem["tth_range"]
            self._cl_kernel_args["get_spans"][2] = self._cl_mem["tth_range"]

        if self.logfile:
            self.log(copy_2thrange=copy_2thrange)

    def unsetRange(self):
        """
        Disable the use of a user-defined 2th range and revert to
        tth_min,tth_max range

        unsetRange instructs the program to revert to its default
        integration range. If the method is called when no
        user-defined range had been previously specified, no action
        will be performed
        """

        with self.lock:
            if self.useTthRange:
                self._calc_tth_out(self._tth_min, self._tth_max)
            self.useTthRange = False
            self._cl_kernel_args["create_histo_binarray"][8] = \
                self._cl_mem["tth_min_max"]
            self._cl_kernel_args["get_spans"][2] = self._cl_mem["tth_min_max"]

    def execute(self, image):
        """
        Perform a 1D azimuthal integration

        execute() may be called only after an OpenCL device is
        configured and a Tth array has been loaded (at least once) It
        takes the input image and based on the configuration provided
        earlier it performs the 1D integration.  Notice that if the
        provided image is bigger than N then only N points will be
        taked into account, while if the image is smaller than N the
        result may be catastrophic.  set/unset and loadTth methods
        have a direct impact on the execute() method.  All the rest of
        the methods will require at least a new configuration via
        configure().

        Takes an image, integrate and return the histogram and weights

        @param image: image to be processed as a numpy array
        @return: tth_out, histogram, bins

        TODO: to improve performances, the image should be casted to
        float32 in an optimal way: currently using numpy machinery but
        would be better if done in OpenCL
        """
        assert image.size == self.nData
        if not self._ctx:
            raise RuntimeError("You may not call execute() at this point."
                               " There is no Active context."
                               " (Hint: run init())")
        if not self._cl_mem["histogram"]:
            raise RuntimeError("You may not call execute() at this point,"
                               " kernels are not configured"
                               " (Hint: run configure())")
        if not self._tth_max:
            raise RuntimeError("You may not call execute() at this point."
                               " There is no 2th array loaded."
                               " (Hint: run loadTth())")

        with self.lock:
            copy_img = pyopencl.enqueue_copy(
                self._queue,
                self._cl_mem["image"],
                numpy.ascontiguousarray(image.ravel(),
                                        dtype=numpy.float32))
            logger.debug("kernel uimemset2 sizes: \t%s %s",
                         self.wdim_bins, self.tdim)
            memset = \
                self._cl_program.uimemset2(self._queue, self.wdim_bins,
                                           self.tdim,
                                           *self._cl_kernel_args["uimemset2"])

            if self.do_dummy:
                logger.debug("kernel dummyval_correction sizes: \t%s %s",
                             self.wdim_data, self.tdim)
                dummy = self._cl_program.dummyval_correction(
                    self._queue, self.wdim_data, self.tdim,
                    self._cl_kernel_args["dummyval_correction"])

            if self.do_solidangle:
                sa = self._cl_program.solidangle_correction(
                    self._queue, self.wdim_data, self.tdim,
                    *self._cl_kernel_args["solidangle_correction"])
            logger.debug("kernel create_histo_binarray sizes: \t%s %s",
                         self.wdim_data, self.tdim)
            integrate = self._cl_program.create_histo_binarray(
                self._queue, self.wdim_data, self.tdim,
                *self._cl_kernel_args["create_histo_binarray"])
            #convert to float
            convert = self._cl_program.ui2f2(self._queue, self.wdim_data,
                                             self.tdim,
                                             *self._cl_kernel_args["ui2f2"])
            histogram = numpy.empty(self.nBins, dtype=numpy.float32)
            bins = numpy.empty(self.nBins, dtype=numpy.float32)
            copy_hist = pyopencl.enqueue_copy(self._queue, histogram,
                                              self._cl_mem["histogram"])
            copy_bins = pyopencl.enqueue_copy(self._queue, bins,
                                              self._cl_mem["weights"])

            if self.logfile:
                self.log(copy_in=copy_img, memset2=memset)
                if self.do_dummy:
                    self.log(dummy_corr=dummy)
                if self.do_solidangle:
                    self.log(solid_angle=sa)
                self.log(integrate=integrate, convert_uint2float=convert,
                         copy_hist=copy_hist, copy_bins=copy_bins)
            pyopencl.enqueue_barrier(self._queue).wait()

        return self.tth_out, histogram, bins

    def init(self, devicetype="GPU", useFp64=True,
             platformid=None, deviceid=None):
        """Initial configuration: Choose a device and initiate a
        context.  Devicetypes can be GPU, gpu, CPU, cpu, DEF, ACC,
        ALL. Suggested are GPU,CPU. For each setting to work there
        must be such an OpenCL device and properly installed. E.g.: If
        Nvidia driver is installed, GPU will succeed but CPU will
        fail. The AMD SDK kit (AMD APP) is required for CPU via
        OpenCL.

        @param devicetype: string in ["cpu","gpu", "all", "acc"]
        @param useFp64: boolean specifying if double precision will be used
        @param platformid: integer
        @param devid: integer
        """
        if self._ctx is None:
            self._ctx = ocl.create_context(devicetype, useFp64,
                                           platformid, deviceid)
            device = self._ctx.devices[0]

            self.devicetype = pyopencl.device_type.to_string(device.type)
            if (self.devicetype == "CPU")\
                    and (device.platform.vendor == "Apple"):
                logger.warning("This is a workaround for Apple's OpenCL"
                               " on CPU: enforce BLOCK_SIZE=1")
                self.BLOCK_SIZE = 1
                self.tdim = (self.BLOCK_SIZE,)

                if self.nBins:
                    self.wdim_bins = (self.nBins + self.BLOCK_SIZE - 1) & \
                        ~ (self.BLOCK_SIZE - 1),
                if self.nData:
                    self.wdim_data = (self.nData + self.BLOCK_SIZE - 1) & \
                        ~ (self.BLOCK_SIZE - 1),

            self.useFp64 = "fp64" in device.extensions
            platforms = pyopencl.get_platforms()
            self.platformid = platforms.index(device.platform)
            devices = platforms[self.platformid].get_devices()
            self.deviceid = devices.index(device)
            if self.filename:
                self._queue = pyopencl.CommandQueue(
                    self._ctx,
                    properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
            else:
                self._queue = pyopencl.CommandQueue(self._ctx)
        else:
            logger.warning("Recycling existing context ..."
                           " if you want to get start from scratch,"
                           " use clean()")

    def clean(self, preserve_context=False):
        """
        Free OpenCL related resources allocated by the library.

        clean() is used to reinitiate the library back in a vanilla
        state.  It may be asked to preserve the context created by
        init or completely clean up OpenCL. Guard/Status flags that
        are set will be reset.

        @param preserve_context: preserves or destroys all OpenCL resources
        @type preserve_context: bool
        """

        with self.lock:
            self._free_buffers()
            self._free_kernels()
            if not preserve_context:
                self._queue = None
                self._ctx = None

    def get_status(self):
        """return a dictionnary with the status of the integrator: for
        compatibilty with former implementation"""
        out = {'dummy': bool(self.do_dummy),
               'mask': bool(self.do_mask),
               'dark': bool(self.do_dark),
               "solid_angle": bool(self.do_solidangle),
               "pos1": False,
               'pos0': (self._tth_max is not None),
               'compiled': (self._cl_program is not None),
               'size': self.nData,
               'context': (self._ctx is not None)}
        return out