Use the Intel short vector math library for vector intrinsics

Your application calls scalar instead of vectorized versions of math functions. To fix: Do all of the following:

Example

gcc program.c -O2 -ftree-vectorize -funsafe-math-optimizations -mveclibabi=svml -L/opt/intel/lib/intel64 -lm -lsvml -Wl,-rpath=/opt/intel/lib/intel64
gcc program.c -O2 -ftree-vectorize -funsafe-math-optimizations -mveclibabi=svml -L/opt/intel/lib/intel64 -lm -lsvml -Wl,-rpath=/opt/intel/lib/intel64
#include "math.h"
#include "stdio.h"
#define N 100000

int main()
{
    double angles[N], results[N];
    int i;
    srand(86456);

    for (i = 0; i < N; i++)
    {
        angles[i] = rand();
    }

    // the loop will be auto-vectorized
    for (i = 0; i < N; i++)
    {
        results[i] = cos(angles[i]);
    }

   return 0;
}

Read More