/usr/lib/python2.7/dist-packages/pyFAI/ocl_azim_LUT.cl is in pyfai 0.10.2-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | /*
* Project: Azimuthal regroupping OpenCL kernel for PyFAI.
* Kernel with full pixel-split using a LUT
*
*
* Copyright (C) 2012-2014 European Synchrotron Radiation Facility
* Grenoble, France
*
* Principal authors: J. Kieffer (kieffer@esrf.fr)
* Last revision: 11/10/2014
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file
* \brief OpenCL kernels for 1D azimuthal integration
*
* Needed constant:
* NLUT: maximum size of the LUT
* NBINS: number of output bins for histograms
* ON_CPU: 0 for GPU, 1 for CPU and probably Xeon Phi
*/
struct lut_point_t
{
int idx;
float coef;
};
/**
* \brief Performs 1d azimuthal integration with full pixel splitting based on a LUT
*
* An image instensity value is spread across the bins according to the positions stored in the LUT.
* The lut is an 2D-array of index (contains the positions of the pixel in the input array)
* and coeficients (fraction of pixel going to the bin)
* Values of 0 in the mask are processed and values of 1 ignored as per PyFAI
*
* This implementation is especially efficient on CPU where each core reads adjacents memory.
* the use of local pointer can help on the CPU.
*
* @param weights Float pointer to global memory storing the input image.
* @param lut Pointer to an 2D-array of (unsigned integers,float) containing the index of input pixels and the fraction of pixel going to the bin
* @param do_dummy Bool/int: shall the dummy pixel be checked. Dummy pixel are pixels marked as bad and ignored
* @param dummy Float: value for bad pixels
* @param delta_dummy Float: precision for bad pixel value
* @param do_dark Bool/int: shall dark-current correction be applied ?
* @param dark Float pointer to global memory storing the dark image.
* @param do_flat Bool/int: shall flat-field correction be applied ? (could contain polarization corrections)
* @param flat Float pointer to global memory storing the flat image.
* @param outData Float pointer to the output 1D array with the weighted histogram
* @param outCount Float pointer to the output 1D array with the unweighted histogram
* @param outMerged Float pointer to the output 1D array with the diffractogram
*
*/
__kernel void
lut_integrate( const __global float *weights,
const __global struct lut_point_t *lut,
const int do_dummy,
const float dummy,
__global float *outData,
__global float *outCount,
__global float *outMerge
)
{
int idx, k, j, i= get_global_id(0);
float sum_data = 0.0f;
float sum_count = 0.0f;
float cd = 0.0f;
float cc = 0.0f;
float t, y;
const float epsilon = 1e-10f;
float coef, data;
if(i < NBINS)
{
for (j=0;j<NLUT;j++)
{
if (ON_CPU){
//On CPU best performances are obtained when each single thread reads adjacent memory
k = i*NLUT+j;
}
else{
//On GPU best performances are obtained when threads are reading adjacent memory
k = j*NBINS+i;
}
idx = lut[k].idx;
coef = lut[k].coef;
if((idx <= 0) && (coef <= 0.0f))
break;
data = weights[idx];
if( (!do_dummy) || (data!=dummy) )
{
//sum_data += coef * data;
//sum_count += coef;
//Kahan summation allows single precision arithmetics with error compensation
//http://en.wikipedia.org/wiki/Kahan_summation_algorithm
y = coef*data - cd;
t = sum_data + y;
cd = (t - sum_data) - y;
sum_data = t;
y = coef - cc;
t = sum_count + y;
cc = (t - sum_count) - y;
sum_count = t;
};//end if dummy
};//for j
outData[i] = sum_data;
outCount[i] = sum_count;
if (sum_count > epsilon)
outMerge[i] = sum_data / sum_count;
else
outMerge[i] = dummy;
};//if NBINS
};//end kernel
|