/usr/lib/python2.7/dist-packages/pyFAI/reduction_test.cl is in pyfai 0.10.2-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 | /*
* Project: Azimuthal integration for PyFAI.
* Reduction Kernels
*
*
* Copyright (C) 2014 European Synchrotron Radiation Facility
* Grenoble, France
*
* Principal authors: Giannis Ashiotis <giannis.ashiotis@gmail.com>
* J. Kieffer (kieffer@esrf.fr)
* Last revision: 20/10/2014
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
__kernel
void reduce1(__global float* buffer,
__const int length,
__global float2* preresult) {
int global_index = get_global_id(0);
int global_size = get_global_size(0);
float2 accumulator;
accumulator.x = INFINITY;
accumulator.y = -INFINITY;
// Loop sequentially over chunks of input vector
while (global_index < length) {
float element = buffer[global_index];
accumulator.x = (accumulator.x < element) ? accumulator.x : element;
accumulator.y = (accumulator.y > element) ? accumulator.y : element;
global_index += global_size;
}
__local float2 scratch[WORKGROUP_SIZE];
// Perform parallel reduction
int local_index = get_local_id(0);
scratch[local_index] = accumulator;
barrier(CLK_LOCAL_MEM_FENCE);
int active_threads = get_local_size(0);
while (active_threads != 2)
{
active_threads /= 2;
if (thread_id_loc < active_threads)
{
float2 other = scratch[local_index + active_threads];
float2 mine = scratch[local_index];
mine.x = (mine.x < other.x) ? mine.x : other.x;
mine.y = (mine.y > other.y) ? mine.y : other.y;
/*
float2 tmp;
tmp.x = (mine.x < other.x) ? mine.x : other.x;
tmp.y = (mine.y > other.y) ? mine.y : other.y;
scratch[local_index] = tmp;
*/
scratch[local_index] = mine;
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if (local_index == 0) {
preresult[get_group_id(0)] = scratch[0];
}
}
__kernel
void reduce2(__global float2* preresult,
__global float4* result) {
__local float2 scratch[WORKGROUP_SIZE];
int local_index = get_local_id(0);
scratch[local_index] = preresult[local_index];
barrier(CLK_LOCAL_MEM_FENCE);
int active_threads = get_local_size(0);
while (active_threads != 2)
{
active_threads /= 2;
if (thread_id_loc < active_threads)
{
float2 other = scratch[local_index + active_threads];
float2 mine = scratch[local_index];
mine.x = (mine.x < other.x) ? mine.x : other.x;
mine.y = (mine.y > other.y) ? mine.y : other.y;
/*
float2 tmp;
tmp.x = (mine.x < other.x) ? mine.x : other.x;
tmp.y = (mine.y > other.y) ? mine.y : other.y;
scratch[local_index] = tmp;
*/
scratch[local_index] = mine;
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if (local_index == 0) {
result[0] = vload4(0,scratch);
}
}
|