NIM
Compiles to C with
nim -c -d:release c example
See ~/.cache/nim/example_r/\@mexample.nim.c
Using simde
https://github.com/guzba/nimsimd/tree/master
# SIMD floating point multiplication
let
a = mm_set1_ps(1.0) # Vector of 4 float32 each with value 1.0
b = mm_set1_ps(2.0) # Vector of 4 float32 each with value 2.0
c = mm_mul_ps(a, b) # SIMD vector multiplication operator
# Cast the vector to echo as separate float32 values
echo cast[array[4, float32]](c)
Translates to C
tyArray__nHXaesL0DJZHyVS07ARPRA T1_;
union { __m128 source; tyArray__9aTCwbdg1cSQimdVd0OEKVw dest; } LOC2;
echoBinSafe(TM__NqVfYNeiCrBLDU2vGCYO9bA_2, 1);
a__simd_2 = _mm_set1_ps(1.0f);
b__simd_3 = _mm_set1_ps(2.0f);
c__simd_4 = _mm_mul_ps(a__simd_2, b__simd_3);
nimZeroMem((void*)T1_, sizeof(tyArray__nHXaesL0DJZHyVS07ARPRA));
LOC2.source = c__simd_4;
T1_[0] = dollar___simd_70(LOC2.dest);
echoBinSafe(T1_, 1);
var l1 = [1.0, 2.1, 3.2, 4.3]
var l2 = [0.0, 1.1, 2.1, 3.3]
var regs1 = mm256_loadu_pd(l1[0].addr)
var regs2 = mm256_loadu_pd(l2[0].addr)
var result = mm256_sub_pd(regs1,regs2)
echo cast[array[4, float64]](result)
regs1__simd_124 = _mm256_loadu_pd(((void*) ((&l1__simd_122[(((NI) 0))- 0]))));
regs2__simd_125 = _mm256_loadu_pd(((void*) ((&l2__simd_123[(((NI) 0))- 0]))));
result__simd_126 = _mm256_sub_pd(regs1__simd_124, regs2__simd_125);
nimZeroMem((void*)T3_, sizeof(tyArray__nHXaesL0DJZHyVS07ARPRA));
LOC4.source = result__simd_126;
T3_[0] = dollar___simd_192(LOC4.dest);
echoBinSafe(T3_, 1);
Output:
TESTING AVX
[2.0, 2.0, 2.0, 2.0]
[1.0, 1.0, 1.1, 1.0]