aboutsummaryrefslogtreecommitdiff
path: root/prescheme-nim-local/avx.c
diff options
context:
space:
mode:
Diffstat (limited to 'prescheme-nim-local/avx.c')
-rw-r--r--prescheme-nim-local/avx.c48
1 files changed, 48 insertions, 0 deletions
diff --git a/prescheme-nim-local/avx.c b/prescheme-nim-local/avx.c
new file mode 100644
index 0000000..d69dddc
--- /dev/null
+++ b/prescheme-nim-local/avx.c
@@ -0,0 +1,48 @@
+// compile
+// gcc -Icontrib/simde -mavx -o avx-test avx.c && ./avx-test
+
+// #include <immintrin.h> // native instructions
+#include <simde/x86/avx512.h>
+#include <stdio.h>
+
+int main() {
+
+ /* Initialize the two argument vectors */
+ __m256 evens = _mm256_set_ps(2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 17.0);
+ __m256 odds = _mm256_set_ps(1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0);
+ /* Display the elements of the result vector */
+ __m256 result = _mm256_sub_ps(evens, odds);
+ float* f = (float *)&result;
+ printf("float %f %f %f %f %f %f %f %f\n",
+ f[0], f[1], f[2], f[3], f[4], f[5], f[6], f[7]);
+
+ // try doubles
+ __m256d evens1 = _mm256_set_pd(2.0, 4.0, 6.0, 8.0);
+ __m256d odds1 = _mm256_set_pd(1.0, 3.0, 5.0, 6.0);
+ /* Compute the difference between the two vectors */
+ __m256d result1 = _mm256_sub_pd(evens1, odds1);
+
+ /* Display the elements of the result vector */
+ double* d = (double*)&result1;
+ printf("double %f %f %f %f\n",
+ d[0], d[1], d[2], d[3]);
+
+ // initialize from RAM
+ double l1[] = { 2.0, 4.0, 6.0, 8.8 };
+ double l2[] = { 0.0, 3.0, 5.0, 7.0 };
+ __m256d evens3 = _mm256_loadu_pd(l1); // pd — “packed double” — 4 doubles
+ __m256d odds3 = _mm256_loadu_pd(l2);
+ __m256d result3 = _mm256_sub_pd(evens3, odds3);
+ double* d3 = (double*)&result3;
+ printf("double %f %f %f %f\n",
+ d3[0], d3[1], d3[2], d3[3]);
+
+ simde__m256d evens4 = simde_mm256_loadu_pd(l1); // pd — “packed double” — 4 doubles
+ simde__m256d odds4 = simde_mm256_loadu_pd(l2);
+ simde__m256d result4 = simde_mm256_sub_pd(evens3, odds3);
+ double* d4 = (double*)&result4;
+ printf("double %f %f %f %f\n",
+ d4[0], d4[1], d4[2], d4[3]);
+
+ return 0;
+}