/usr/include/volk/volk_64u_byteswap.h is in gnuradio-dev 3.7.2.1-5.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 | #ifndef INCLUDED_volk_64u_byteswap_u_H
#define INCLUDED_volk_64u_byteswap_u_H
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Byteswaps (in-place) an aligned vector of int64_t's.
\param intsToSwap The vector of data to byte swap
\param numDataPoints The number of data points
*/
static inline void volk_64u_byteswap_u_sse2(uint64_t* intsToSwap, unsigned int num_points){
uint32_t* inputPtr = (uint32_t*)intsToSwap;
__m128i input, byte1, byte2, byte3, byte4, output;
__m128i byte2mask = _mm_set1_epi32(0x00FF0000);
__m128i byte3mask = _mm_set1_epi32(0x0000FF00);
uint64_t number = 0;
const unsigned int halfPoints = num_points / 2;
for(;number < halfPoints; number++){
// Load the 32t values, increment inputPtr later since we're doing it in-place.
input = _mm_loadu_si128((__m128i*)inputPtr);
// Do the four shifts
byte1 = _mm_slli_epi32(input, 24);
byte2 = _mm_slli_epi32(input, 8);
byte3 = _mm_srli_epi32(input, 8);
byte4 = _mm_srli_epi32(input, 24);
// Or bytes together
output = _mm_or_si128(byte1, byte4);
byte2 = _mm_and_si128(byte2, byte2mask);
output = _mm_or_si128(output, byte2);
byte3 = _mm_and_si128(byte3, byte3mask);
output = _mm_or_si128(output, byte3);
// Reorder the two words
output = _mm_shuffle_epi32(output, _MM_SHUFFLE(2, 3, 0, 1));
// Store the results
_mm_storeu_si128((__m128i*)inputPtr, output);
inputPtr += 4;
}
// Byteswap any remaining points:
number = halfPoints*2;
for(; number < num_points; number++){
uint32_t output1 = *inputPtr;
uint32_t output2 = inputPtr[1];
output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
*inputPtr++ = output2;
*inputPtr++ = output1;
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Byteswaps (in-place) an aligned vector of int64_t's.
\param intsToSwap The vector of data to byte swap
\param numDataPoints The number of data points
*/
static inline void volk_64u_byteswap_generic(uint64_t* intsToSwap, unsigned int num_points){
uint32_t* inputPtr = (uint32_t*)intsToSwap;
unsigned int point;
for(point = 0; point < num_points; point++){
uint32_t output1 = *inputPtr;
uint32_t output2 = inputPtr[1];
output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
*inputPtr++ = output2;
*inputPtr++ = output1;
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_64u_byteswap_u_H */
#ifndef INCLUDED_volk_64u_byteswap_a_H
#define INCLUDED_volk_64u_byteswap_a_H
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Byteswaps (in-place) an aligned vector of int64_t's.
\param intsToSwap The vector of data to byte swap
\param numDataPoints The number of data points
*/
static inline void volk_64u_byteswap_a_sse2(uint64_t* intsToSwap, unsigned int num_points){
uint32_t* inputPtr = (uint32_t*)intsToSwap;
__m128i input, byte1, byte2, byte3, byte4, output;
__m128i byte2mask = _mm_set1_epi32(0x00FF0000);
__m128i byte3mask = _mm_set1_epi32(0x0000FF00);
uint64_t number = 0;
const unsigned int halfPoints = num_points / 2;
for(;number < halfPoints; number++){
// Load the 32t values, increment inputPtr later since we're doing it in-place.
input = _mm_load_si128((__m128i*)inputPtr);
// Do the four shifts
byte1 = _mm_slli_epi32(input, 24);
byte2 = _mm_slli_epi32(input, 8);
byte3 = _mm_srli_epi32(input, 8);
byte4 = _mm_srli_epi32(input, 24);
// Or bytes together
output = _mm_or_si128(byte1, byte4);
byte2 = _mm_and_si128(byte2, byte2mask);
output = _mm_or_si128(output, byte2);
byte3 = _mm_and_si128(byte3, byte3mask);
output = _mm_or_si128(output, byte3);
// Reorder the two words
output = _mm_shuffle_epi32(output, _MM_SHUFFLE(2, 3, 0, 1));
// Store the results
_mm_store_si128((__m128i*)inputPtr, output);
inputPtr += 4;
}
// Byteswap any remaining points:
number = halfPoints*2;
for(; number < num_points; number++){
uint32_t output1 = *inputPtr;
uint32_t output2 = inputPtr[1];
output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
*inputPtr++ = output2;
*inputPtr++ = output1;
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Byteswaps (in-place) an aligned vector of int64_t's.
\param intsToSwap The vector of data to byte swap
\param numDataPoints The number of data points
*/
static inline void volk_64u_byteswap_a_generic(uint64_t* intsToSwap, unsigned int num_points){
uint32_t* inputPtr = (uint32_t*)intsToSwap;
unsigned int point;
for(point = 0; point < num_points; point++){
uint32_t output1 = *inputPtr;
uint32_t output2 = inputPtr[1];
output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
*inputPtr++ = output2;
*inputPtr++ = output1;
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_64u_byteswap_a_H */
|