109 const float* aVector,
111 unsigned int num_points)
113 const unsigned int eighthPoints = num_points / 8;
115 float* cPtr = cVector;
116 const float* aPtr = aVector;
118 const __m256 bVal = _mm256_set1_ps(scalar);
119 for (
unsigned int number = 0; number < eighthPoints; number++) {
120 __m256 aVal = _mm256_loadu_ps(aPtr);
122 __m256 cVal = _mm256_mul_ps(aVal, bVal);
124 _mm256_storeu_ps(cPtr, cVal);
130 for (
unsigned int number = eighthPoints * 8; number < num_points; number++) {
131 *cPtr++ = (*aPtr++) * scalar;
188 const float* aVector,
190 unsigned int num_points)
192 const unsigned int eighthPoints = num_points / 8;
194 float* cPtr = cVector;
195 const float* aPtr = aVector;
197 const __m256 bVal = _mm256_set1_ps(scalar);
198 for (
unsigned int number = 0; number < eighthPoints; number++) {
199 __m256 aVal = _mm256_load_ps(aPtr);
201 __m256 cVal = _mm256_mul_ps(aVal, bVal);
203 _mm256_store_ps(cPtr, cVal);
209 for (
unsigned int number = eighthPoints * 8; number < num_points; number++) {
210 *cPtr++ = (*aPtr++) * scalar;
219 const float* aVector,
221 unsigned int num_points)
223 const unsigned int quarterPoints = num_points / 4;
225 const float* inputPtr = aVector;
226 float* outputPtr = cVector;
228 for (
unsigned int number = 0; number < quarterPoints; number++) {
229 float32x4_t aVal = vld1q_f32(inputPtr);
230 float32x4_t cVal = vmulq_n_f32(aVal, scalar);
231 vst1q_f32(outputPtr, cVal);
236 for (
unsigned int number = quarterPoints * 4; number < num_points; number++) {
237 *outputPtr++ = (*inputPtr++) * scalar;
255 volk_32f_s32f_multiply_32f_a_orc_impl(cVector, aVector, scalar, num_points);