/usr/include/volk/volk_32u_byteswap.h is in gnuradio-dev 3.7.2.1-5.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | #ifndef INCLUDED_volk_32u_byteswap_u_H
#define INCLUDED_volk_32u_byteswap_u_H
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Byteswaps (in-place) an aligned vector of int32_t's.
\param intsToSwap The vector of data to byte swap
\param numDataPoints The number of data points
*/
static inline void volk_32u_byteswap_u_sse2(uint32_t* intsToSwap, unsigned int num_points){
unsigned int number = 0;
uint32_t* inputPtr = intsToSwap;
__m128i input, byte1, byte2, byte3, byte4, output;
__m128i byte2mask = _mm_set1_epi32(0x00FF0000);
__m128i byte3mask = _mm_set1_epi32(0x0000FF00);
const uint64_t quarterPoints = num_points / 4;
for(;number < quarterPoints; number++){
// Load the 32t values, increment inputPtr later since we're doing it in-place.
input = _mm_loadu_si128((__m128i*)inputPtr);
// Do the four shifts
byte1 = _mm_slli_epi32(input, 24);
byte2 = _mm_slli_epi32(input, 8);
byte3 = _mm_srli_epi32(input, 8);
byte4 = _mm_srli_epi32(input, 24);
// Or bytes together
output = _mm_or_si128(byte1, byte4);
byte2 = _mm_and_si128(byte2, byte2mask);
output = _mm_or_si128(output, byte2);
byte3 = _mm_and_si128(byte3, byte3mask);
output = _mm_or_si128(output, byte3);
// Store the results
_mm_storeu_si128((__m128i*)inputPtr, output);
inputPtr += 4;
}
// Byteswap any remaining points:
number = quarterPoints*4;
for(; number < num_points; number++){
uint32_t outputVal = *inputPtr;
outputVal = (((outputVal >> 24) & 0xff) | ((outputVal >> 8) & 0x0000ff00) | ((outputVal << 8) & 0x00ff0000) | ((outputVal << 24) & 0xff000000));
*inputPtr = outputVal;
inputPtr++;
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Byteswaps (in-place) an aligned vector of int32_t's.
\param intsToSwap The vector of data to byte swap
\param numDataPoints The number of data points
*/
static inline void volk_32u_byteswap_generic(uint32_t* intsToSwap, unsigned int num_points){
uint32_t* inputPtr = intsToSwap;
unsigned int point;
for(point = 0; point < num_points; point++){
uint32_t output = *inputPtr;
output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) | ((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000));
*inputPtr = output;
inputPtr++;
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_32u_byteswap_u_H */
#ifndef INCLUDED_volk_32u_byteswap_a_H
#define INCLUDED_volk_32u_byteswap_a_H
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Byteswaps (in-place) an aligned vector of int32_t's.
\param intsToSwap The vector of data to byte swap
\param numDataPoints The number of data points
*/
static inline void volk_32u_byteswap_a_sse2(uint32_t* intsToSwap, unsigned int num_points){
unsigned int number = 0;
uint32_t* inputPtr = intsToSwap;
__m128i input, byte1, byte2, byte3, byte4, output;
__m128i byte2mask = _mm_set1_epi32(0x00FF0000);
__m128i byte3mask = _mm_set1_epi32(0x0000FF00);
const uint64_t quarterPoints = num_points / 4;
for(;number < quarterPoints; number++){
// Load the 32t values, increment inputPtr later since we're doing it in-place.
input = _mm_load_si128((__m128i*)inputPtr);
// Do the four shifts
byte1 = _mm_slli_epi32(input, 24);
byte2 = _mm_slli_epi32(input, 8);
byte3 = _mm_srli_epi32(input, 8);
byte4 = _mm_srli_epi32(input, 24);
// Or bytes together
output = _mm_or_si128(byte1, byte4);
byte2 = _mm_and_si128(byte2, byte2mask);
output = _mm_or_si128(output, byte2);
byte3 = _mm_and_si128(byte3, byte3mask);
output = _mm_or_si128(output, byte3);
// Store the results
_mm_store_si128((__m128i*)inputPtr, output);
inputPtr += 4;
}
// Byteswap any remaining points:
number = quarterPoints*4;
for(; number < num_points; number++){
uint32_t outputVal = *inputPtr;
outputVal = (((outputVal >> 24) & 0xff) | ((outputVal >> 8) & 0x0000ff00) | ((outputVal << 8) & 0x00ff0000) | ((outputVal << 24) & 0xff000000));
*inputPtr = outputVal;
inputPtr++;
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Byteswaps (in-place) an aligned vector of int32_t's.
\param intsToSwap The vector of data to byte swap
\param numDataPoints The number of data points
*/
static inline void volk_32u_byteswap_a_generic(uint32_t* intsToSwap, unsigned int num_points){
uint32_t* inputPtr = intsToSwap;
unsigned int point;
for(point = 0; point < num_points; point++){
uint32_t output = *inputPtr;
output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) | ((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000));
*inputPtr = output;
inputPtr++;
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_32u_byteswap_a_H */
|