1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
// compile
// gcc -Icontrib/simde -mavx -o avx-test avx.c && ./avx-test
// #include <immintrin.h> // native instructions
#include <simde/x86/avx512.h>
#include <stdio.h>
int main() {
/* Initialize the two argument vectors */
__m256 evens = _mm256_set_ps(2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 17.0);
__m256 odds = _mm256_set_ps(1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0);
/* Display the elements of the result vector */
__m256 result = _mm256_sub_ps(evens, odds);
float* f = (float *)&result;
printf("float %f %f %f %f %f %f %f %f\n",
f[0], f[1], f[2], f[3], f[4], f[5], f[6], f[7]);
// try doubles
__m256d evens1 = _mm256_set_pd(2.0, 4.0, 6.0, 8.0);
__m256d odds1 = _mm256_set_pd(1.0, 3.0, 5.0, 6.0);
/* Compute the difference between the two vectors */
__m256d result1 = _mm256_sub_pd(evens1, odds1);
/* Display the elements of the result vector */
double* d = (double*)&result1;
printf("double %f %f %f %f\n",
d[0], d[1], d[2], d[3]);
// initialize from RAM
double l1[] = { 2.0, 4.0, 6.0, 8.8 };
double l2[] = { 0.0, 3.0, 5.0, 7.0 };
__m256d evens3 = _mm256_loadu_pd(l1); // pd — “packed double” — 4 doubles
__m256d odds3 = _mm256_loadu_pd(l2);
__m256d result3 = _mm256_sub_pd(evens3, odds3);
double* d3 = (double*)&result3;
printf("double %f %f %f %f\n",
d3[0], d3[1], d3[2], d3[3]);
simde__m256d evens4 = simde_mm256_loadu_pd(l1); // pd — “packed double” — 4 doubles
simde__m256d odds4 = simde_mm256_loadu_pd(l2);
simde__m256d result4 = simde_mm256_sub_pd(evens3, odds3);
double* d4 = (double*)&result4;
printf("double %f %f %f %f\n",
d4[0], d4[1], d4[2], d4[3]);
return 0;
}
|