其他
SIMD系列-GATHER/SCATTER操作
1、Stried access跨步访问
float a[LARGE_DATA_SIZE];
uint32_t STRIDE = 8;
...
for(int i = 0; i < PROBLEM_SIZE; i+=8) {
SIMDVec<float, 8> vec;
// Note that we have to scale the loop index.
int offset = i*STRIDE;
// 'load' the data to vec.
vec.gather(&a[offset], STRIDE);
// do something useful
vec += 3.14;
// store the result at original locations
vec.scatter(&a[offset], STRIDE);
}
2、Indexed access索引访问
float a[LARGE_DATA_SIZE];
int indices[PROBLEM_SIZE];
uint32_t STRIDE = 4;
...
for(int i = 0; i < PROBLEM_SIZE; i+=8) {
SIMDVec<float, 8> vec;
// Here we are using precomputed indices,
// but they can be computed on-the-fly if necessary.
SIMDVec<uint32_t, 8> indices_vec(&indices[i];
// 'load' the data to vec.
vec.gather(&a[0], indices_vec);
// do something useful
vec += 3.14;
// store the result at original locations
vec.scatter(&a[0], indices_vec);
}
3、确保有条件访问
float a[PROBLEM_SIZE], b[PROBLEM_SIZE];
float c[LARGE_DATASET_SIZE];
...
for(int i = 0; i < PROBLEM_SIZE; i++) {
// Here we are checking if for some reason one of the
// values in (a[i],b[i]) pair is not determined properly.
if (std::isfin(a[i] - b[i])) {
// Calculate the index only if both 'a' and 'b' are well defined
int index = int(a[i] - b[i]);
// 'gather' single element at a time
float temp = c[index];
// Do something with the value
temp += 3.14;
// Update the values of 'c'
c[index] = temp;
}
}
float a[PROBLEM_SIZE], b[PROBLEM_SIZE];
float c[LARGE_DATASET_SIZE];
...
// For simplification we are assuming that: ((PROBLEM_SIZE % 8) == 0)
for(int i = 0; i < PROBLEM_SIZE; i+= 8) {
// Here we are checking if for some reason (e.g. a design decision) one
// of the values in (a[i],b[i]) pair is not determined properly.
SIMDVec<float, 8> a_vec(&a[i]), b_vec(&b[i]);
SIMDVecMask<8> condition = (a_vec - b_vec).isfin();
// if (std::isfin(a[i] - b[i])) {
SIMDVec<uint32_t, 8> indices = a_vec - b_vec;
SIMDVec<float, 8> temp;
temp.gather(&c[0], indices); // This is WRONG!!!
temp.adda(condition, 3.14); // only change selected elements
temp.scatter(&c[0], indices); // Again WRONG!!!
}
float a[PROBLEM_SIZE], b[PROBLEM_SIZE];
float c[LARGE_DATASET_SIZE];
...
// For simplification we are assuming that: ((PROBLEM_SIZE % 8) == 0)
for(int i = 0; i < PROBLEM_SIZE; i+= 8) {
// Here we are checking if for some reason (e.g. by design?) one of the
// values in (a[i],b[i]) pair is not determined properly.
SIMDVec<float, 8> a_vec(&a[i]), b_vec(&b[i]);
SIMDVecMask<8> condition = (a_vec - b_vec).isfin();
// if (std::isfin(a[i] - b[i])) {
SIMDVec<uint32_t, 8> indices = a_vec - b_vec;
SIMDVec<float, 8> temp;
temp.gather(condition, &c[0], indices); // Now the read is CORRECT!!!
temp += 3.14; // we don't have to mask this operation
temp.scatter(condition, &c[0], indices); // Again no problems here.
}