diff options
author | Rowan Lee <rowan.lee@arm.com> | 2017-09-06 15:43:09 +0100 |
---|---|---|
committer | Pierre Langlois <pierre.langlois@arm.com> | 2017-09-19 11:58:32 +0100 |
commit | 389a2422e6732b971635211b46ab6a899a899de0 (patch) | |
tree | 310a7fb5a8f0215d8d2cc0f89bb5abaa74a7afd1 /examples | |
parent | f9a3bc17461518bd96d7afff61fb4cb5e3d48964 (diff) |
Added aarch32 SIMD example computing the Mandelbrot Set
Change-Id: I0246127a87a645b782951aee8ef6d53e5dd4a392
Diffstat (limited to 'examples')
-rw-r--r-- | examples/aarch32/mandelbrot.cc | 219 |
1 files changed, 219 insertions, 0 deletions
diff --git a/examples/aarch32/mandelbrot.cc b/examples/aarch32/mandelbrot.cc new file mode 100644 index 00000000..331ff229 --- /dev/null +++ b/examples/aarch32/mandelbrot.cc @@ -0,0 +1,219 @@ +// Copyright 2017, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "examples.h" + +#define __ masm-> + +void GenerateMandelBrot(MacroAssembler* masm) { + const QRegister kCReal = q0; + const QRegister kCImag = q1; + + const QRegister kCRealStep = q13; + const QRegister kCImagStep = q14; + + const QRegister kModSqLimit = q15; + + // Save register values. + __ Push(RegisterList(r4, r5, r6)); + + __ Vmov(F32, kCRealStep, 0.125); + __ Vmov(F32, kCImagStep, 0.0625); + + const Register kZero = r2; + __ Mov(kZero, 0); + + const DRegister kStars = d6; + const DRegister kSpaces = d7; + // Output characters - packed 4 characters into 32 bits. + __ Vmov(I8, kStars, '*'); + __ Vmov(I8, kSpaces, ' '); + + const DRegisterLane kNegTwo = DRegisterLane(d7, 1); + __ Vmov(s15, -2.0); + + // Imaginary part of c. + __ Vdup(Untyped32, kCImag, kNegTwo); + + // Max modulus squared. + __ Vmov(F32, kModSqLimit, 4.0); + + // Height of output in characters. + __ Mov(r4, 64); + + // String length will be 129, so need 132 bytes of space. + const uint32_t kStringLength = 132; + + // Make space for our string. + __ Sub(sp, sp, kStringLength); + + // Set up a starting pointer for the string. + const Register kStringPtr = r6; + __ Mov(kStringPtr, sp); + + // Loop over imaginary values of c from -2 to 2, taking + // 64 equally spaced values in the range. + { + Label c_imag_loop; + + __ Bind(&c_imag_loop); + + // Real part of c. + // Store 4 equally spaced values in q0 (kCReal) to use SIMD. + __ Vmov(s0, -2.0); + __ Vmov(s1, -1.96875); + __ Vmov(s2, -1.9375); + __ Vmov(s3, -1.90625); + + // Width of output in terms of sets of 4 characters - twice that + // of height to compensate for ratio of character height to width. + __ Mov(r5, 32); + + const Register kWriteCursor = r3; + // Set a cursor ready to write the next line. + __ Mov(kWriteCursor, kStringPtr); + + // Loop over real values of c from -2 to 2, processing + // 4 different values simultaneously using SIMD. + { + const QRegister kFlags = q2; + const DRegister kLowerFlags = d4; + + Label c_real_loop; + __ Bind(&c_real_loop); + + // Get number of iterations. + __ Add(r1, r0, 1); + + // Perform the iterations of z(n+1) = zn^2 + c using SIMD. + // If the result is that c is in the set, the element of + // kFlags will be 0, else ~0. + { + const QRegister kZReal = q8; + const QRegister kZImag = q9; + + // Real part of z. + __ Vmov(F32, kZReal, 0.0); + + // Imaginary part of z. + __ Vmov(F32, kZImag, 0.0); + + __ Vmov(F32, kFlags, 0.0); + + Label iterative_formula_start, iterative_formula_end; + __ Bind(&iterative_formula_start); + __ Subs(r1, r1, 1); + __ B(le, &iterative_formula_end); + + // z(n+1) = zn^2 + c. + // re(z(n+1)) = re(c) + re(zn)^2 - im(zn)^2. + // im(z(n+1)) = im(c) + 2 * re(zn) * im(zn) + + __ Vmul(F32, q10, kZReal, kZImag); // re(zn) * im(zn) + + __ Vmul(F32, kZReal, kZReal, kZReal); // re(zn)^2 + __ Vadd(F32, kZReal, kCReal, kZReal); // re(c) + re(zn)^2 + __ Vmls(F32, kZReal, kZImag, kZImag); // re(c) + re(zn)^2 - im(zn)^2 + + __ Vmov(F32, kZImag, kCImag); // im(c) + __ Vmls(F32, kZImag, q10, kNegTwo); // im(c) + 2 * re(zn) * im(zn) + + __ Vmul(F32, q10, kZReal, kZReal); // re(z(n+1))^2 + __ Vmla(F32, q10, kZImag, kZImag); // re(z(n+1))^2 + im(z(n+1))^2 + __ Vcgt(F32, q10, q10, kModSqLimit); // |z(n+1)|^2 > 4 ? ~0 : 0 + __ Vorr(F32, kFlags, kFlags, q10); // (~0/0) | above result + + __ B(&iterative_formula_start); + __ Bind(&iterative_formula_end); + } + + // Narrow twice so that each mask is 8 bits, packed into + // a single 32 bit register s4. + // kLowerFlags is the lower half of kFlags, so the second narrow will + // be working on the results of the first to halve the size of each + // representation again. + __ Vmovn(I32, kLowerFlags, kFlags); + __ Vmovn(I16, kLowerFlags, kFlags); + + // '*' if in set, ' ' if not. + __ Vbsl(Untyped32, kLowerFlags, kSpaces, kStars); + + // Add this to the string. + __ Vst1(Untyped32, + NeonRegisterList(kLowerFlags, 0), + AlignedMemOperand(kWriteCursor, k32BitAlign, PostIndex)); + + // Increase real part of c. + __ Vadd(F32, kCReal, kCReal, kCRealStep); + + __ Subs(r5, r5, 1); + __ B(ne, &c_real_loop); + } + + // Put terminating character. + __ Strb(kZero, MemOperand(kWriteCursor)); + + // Print the string. + __ Printf("%s\n", kStringPtr); + + // Increase imaginary part of c. + __ Vadd(F32, kCImag, kCImag, kCImagStep); + + __ Subs(r4, r4, 1); + __ B(ne, &c_imag_loop); + } + // Restore stack pointer. + __ Add(sp, sp, kStringLength); + // Restore register values. + __ Pop(RegisterList(r4, r5, r6)); + __ Bx(lr); +} + +#ifndef TEST_EXAMPLES +int main() { + MacroAssembler masm; + // Generate the code for the example function. + Label mandelbrot; + masm.Bind(&mandelbrot); + GenerateMandelBrot(&masm); + masm.FinalizeCode(); +#ifdef VIXL_INCLUDE_SIMULATOR_AARCH32 + // There is no simulator defined for VIXL AArch32. + printf("This example cannot be simulated\n"); +#else + byte* code = masm.GetBuffer()->GetStartAddress<byte*>(); + uint32_t code_size = masm.GetSizeOfCodeGenerated(); + ExecutableMemory memory(code, code_size); + // Run the example function. + double (*mandelbrot_func)(uint32_t) = + memory.GetEntryPoint<double (*)(uint32_t)>(mandelbrot, + masm.GetInstructionSetInUse()); + uint32_t iterations = 1000; + (*mandelbrot_func)(iterations); +#endif + return 0; +} +#endif // TEST_EXAMPLES |