Use Intel SDLT

The cost of rewriting code to organize data using SoA instead of AoS may outweigh the benefit. To fix: Use Intel SIMD Data Layout Templates (Intel SDLT), introduced in version 16.1 of the Intel compiler, to mitigate the cost. Intel SDLT is a C++11 template library that may reduce code rewrites to just a few lines.

Example (original code)

...
struct kValues {
    float Kx;
    float Ky;
    float Kz;
    float PhiMag;
};
std::vector<kValues> dataset(count);
...
Using SDLT instead of STL containers may improve the memory access pattern for more efficient vector processing.
struct kValues {
    float Kx;
    float Ky;
    float Kz;
    float PhiMag;
};

std::vector<kValues> dataset(count);

// Initialization step
for(int i=0; i < count; ++i) {
    kValues[i].Kx = kx[i];
    kValues[i].Ky = ky[i];
    kValues[i].Kz = kz[i];
    kValues[i].PhiMag = phiMag[i];
}

// Calculation step
for (indexK = 0; indexK < numK; indexK++) {
    expArg = PIx2 * (kValues[indexK].Kx * x[indexX] +
    kValues[indexK].Ky * y[indexX] +
    kValues[indexK].Kz * z[indexX]);
    cosArg = cosf(expArg);
    sinArg = sinf(expArg);
    float phi = kValues[indexK].PhiMag;
    QrSum += phi * cosArg;
    QiSum += phi * sinArg;
}

Example (revised code)

#include <sdlt/sdlt.h>
...
SDLT_PRIMITIVE(kValues, Kx, Ky, Kz, PhiMag)
sdlt::soa1d_container<kValues> dataset(count);
...
#include <sdlt/sdlt.h>

struct kValues {
    float Kx;
    float Ky;
    float Kz;
    float PhiMag;
};
SDLT_PRIMITIVE(kValues, Kx, Ky, Kz, PhiMag)

sdlt::soa1d_container<kValues> dataset(count);

// Initialization step
auto kValues = dataset.access();
for (k = 0; k < numK; k++) {
    kValues [k].Kx() = kx[k];
    kValues [k].Ky() = ky[k];
    kValues [k].Kz() = kz[k];
    kValues [k].PhiMag() = phiMag[k];
}

// Calculation step
auto kVals = dataset.const_access();
#pragma omp simd private(expArg, cosArg, sinArg) reduction(+:QrSum, QiSum)
for (indexK = 0; indexK < numK; indexK++) {
    expArg = PIx2 * (kVals[indexK].Kx() * x[indexX] +
    kVals[indexK].Ky() * y[indexX] +
    kVals[indexK].Kz() * z[indexX]);
    cosArg = cosf(expArg);
    sinArg = sinf(expArg);
    float phi = kVals[indexK].PhiMag();
    QrSum += phi * cosArg;
    QiSum += phi * sinArg;
}

Read More