Below is the test source and compiles the source.
The compile options are: Below is the test source.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int main() {
int arrRand1[1000];
int arrRand2[1000];
int arrSum[1000];
long int total;
srand(time(NULL));
for (int i = 0; i<1000; i++) {
arrRand1[i] = rand() % 2000 - 1000;
arrRand2[i] = rand() % 2000 - 1000;
arrSum[i] = arrRand1[i] + arrRand2[i];
total += arrSum[i];
}
printf("%ld\n", total);
return 0;
}
Disassemble the simple program above.
0000000000400560 <main>:
400560: d13f83ff sub sp, sp, #0xfe0
400564: d2800000 mov x0, #0x0 // #0
400568: a9007bfd stp x29, x30, [sp]
40056c: 910003fd mov x29, sp
400570: a9025bf5 stp x21, x22, [sp, #32]
400574: 5289ba75 mov w21, #0x4dd3 // #19923
400578: a90153f3 stp x19, x20, [sp, #16]
40057c: 913f83b6 add x22, x29, #0xfe0
400580: f9001bf7 str x23, [sp, #48]
400584: 72a20c55 movk w21, #0x1062, lsl #16
400588: 5280fa14 mov w20, #0x7d0 // #2000
40058c: 910103b7 add x23, x29, #0x40
400590: 97ffffd8 bl 4004f0 <time@plt>
400594: 97ffffeb bl 400540 <srand@plt>
400598: 97ffffde bl 400510 <rand@plt>
40059c: 97ffffdd bl 400510 <rand@plt>
4005a0: 9b357c01 smull x1, w0, w21
4005a4: b84046e2 ldr w2, [x23], #4
4005a8: 9367fc21 asr x1, x1, #39
4005ac: eb1602ff cmp x23, x22
4005b0: 4b807c21 sub w1, w1, w0, asr #31
4005b4: 1b148020 msub w0, w1, w20, w0
4005b8: 510fa000 sub w0, w0, #0x3e8
4005bc: 0b020000 add w0, w0, w2
4005c0: 8b20c273 add x19, x19, w0, sxtw
4005c4: 54fffea1 b.ne 400598 <main+0x38> // b.any
4005c8: aa1303e1 mov x1, x19
4005cc: 90000000 adrp x0, 400000 <_init-0x4b8>
4005d0: 911ee000 add x0, x0, #0x7b8
4005d4: 97ffffdf bl 400550 <printf@plt>
4005d8: a9407bfd ldp x29, x30, [sp]
4005dc: 52800000 mov w0, #0x0 // #0
4005e0: a94153f3 ldp x19, x20, [sp, #16]
4005e4: a9425bf5 ldp x21, x22, [sp, #32]
4005e8: f9401bf7 ldr x23, [sp, #48]
4005ec: 913f83ff add sp, sp, #0xfe0
4005f0: d65f03c0 ret
4005f4: 00000000 .inst 0x00000000 ; undefined
Vectorization basically performs the same operation on successive data. Vectorization is a set of instructions that provides a SIMD (Single Instruction Multiple Data) architectures, in which the same operations are performed concurrently on successive data. Naturally, vectorization can result in higher performance than Single Instruction Single Data (SISD), which processes single data with a single existing instruction.