CORDIC算法与泰勒级数计算三角函数sin,cos的速度与精度对比

CORDIC算法与泰勒级数计算三角函数sin,cos的速度与精度对比如果 CORDIC 算法中不使用移位运算 而是直接使用浮点数乘法 除以浮点常数的除法会被 C 编译器优化为乘法 那么 CORDIC 算法的浮点乘法次数大大超过同精度的泰勒级数 完全失去了加快计算速度的意义

在学习CORDIC(Coordinate Rotation Digital Computer)算法的时候,最主要的疑问是:浮点数怎么做移位运算?如果CORDIC算法中不使用移位运算,而是直接使用浮点数乘法(除以浮点常数的除法会被C++编译器优化为乘法),那么CORDIC算法的浮点乘法次数大大超过同精度的泰勒级数,完全失去了加快计算速度的意义。本文的代码使用整型变量实现CORDIC算法中的移位运算,并与浮点数实现的泰勒级数算法进行速度与精度的对比。在电脑和Arduino nano上进行了测试:两个不同平台上,泰勒级数的速度均大大超过了CORDIC算法。在电脑CPU上,泰勒级数的速度与标准库接近。在Arduino nano上,泰勒级数的速度只有标准库的一半。

int n = (int)(Q * d2_pi);      
Q -= n * pi_2;
return cosSign * x * scale;
return sinSign * y * scale;

这些浮点运算。

注意,在电脑上的代码中,数组angles的类型是浮点数,在Arduino nano上,angles(x1e8)的类型是32bit的整数,就是对应的浮点数乘上10^8再取整。这样做比使用浮点型的angles要快得多。

#include <stdio.h> #include <cstdint> #include <math.h> #include <time.h> #include <bitset> #include <immintrin.h> //#define SINGLE_PRECISION #define FMADD // K=1/(sqrt(1+1)*sqrt(1+1/4)*sqrt(1+1/16)*sqrt(1+1/64)...) 是一个无穷乘积 // K=0.... // 在单精度情形,先将K * 10^9化成整数(要确保不超过2^32 =), // 再进行加减法和移位运算,最后乘上1e-9化为浮点数。双精度情形类似。 #ifdef SINGLE_PRECISION typedef float real; typedef int32_t integer; constexpr int32_t K = ; // 2^32 = constexpr float scale = 1e-9; constexpr uint8_t numIter = 32; //设得更大就会出错 constexpr float pi_2 = 1.4897; // pi/2 constexpr float d2_pi = 0.75814; // 2/pi constexpr float pi_128 = 0.017026; // pi/128 constexpr float d128_pi = 40.521; // 128/pi #else typedef double real; typedef int64_t integer; constexpr int64_t K = ; // 2^64 = constexpr double scale = 1e-18; constexpr uint8_t numIter = 40; constexpr double pi_2 = 1.4897; // pi/2 constexpr double d2_pi = 0.75814; // 2/pi constexpr double pi_128 = 0.017026; // pi/128 constexpr double d128_pi = 40.521; // 128/pi #endif // 如果constexpr int64_t K = ; // 2^64 = // 2^63 =  // constexpr double scale = 1e-19; // // 则Q=1.5时,迭代中会出现x,y超过2^63的情况,导致溢出 // 把int64_t改为uint64_t也不行,Q=1.5时,会出现小数减大数的情况,导致错误。 struct { const char cosine = 0; const char sine = 1; }triangle; // atan(1), atan(1/2), atan(1/4), atan(1/8), atan(1/16), atan(1/32), atan(1/64), ... constexpr real angles[41] = { 0.7448, 0.0806, 0.6864, 0.6761, 0.0957, 0.0268, 0.0477, 0.0001, 0.0067, 0.0079, 0.0009, 0.0005, 0.0009, 0.0004, 0.0000, 0.0000, 0.0000, 0.00000, 0.00000, 0.00000, 0.000000, 0.000000, 0.000000, 0.000000, 0.0000000, 0.0000000, 0.0000000, 0.00000000, 0.00000000, 0.00000000, 0.000000000, 0.000000000, 0.000000000, 0.000000000, 0.000000000058208, 0.000000000029104, 0.000000000014552, 0.000000000007276, 0.000000000003638, 0.000000000001819, 0.000000000000909 }; // cos(0),cos(pi/128),cos(2*pi/128),...,cos(63*pi/128),cos(pi/2) constexpr real cosTable[65] = { 1.0, 0.62042, 0.51724, 0.86902, 0.21969, 0.87100, 0.47810, 0.89412, 0.32304, 0.85286, 0.45440, 0.54398, 0.22088, 0.30367, 0.30208, 0.47390, 0.12867, 0.35307, 0.34433, 0.55153, 0.83550, 0.87115, 0.02721, 0.97071, 0.25452, 0.15837, 0.06449, 0.66063, 0.27370, 0.64846, 0.49592, 0.14670, 0.65476, 0.70669, 0.70184, 0.37769, 0.36455, 0.06268, 0.24335, 0.78453, 0.96023, 0.70973, 0.32217, 0.97841, 0.59978, 0.46066, 0.02822, 0.49899, 0.50898, 0.49883, 0.22201, 0.88916, 0.44623, 0.48984, 0.32640, 0.68698, 0.61283, 0.03014, 0.53617, 0.92163, 0.05608, 0.06675, 0.04181, 0.09123, 0.0, }; real cordic(real Q, char cos_or_sin) { // Q的单位是弧度 short sinSign = 1, cosSign = 1; if (Q < 0) { Q = -Q; sinSign = -1; } int n = (int)(Q * d2_pi); Q -= n * pi_2; // 减去pi/2的整数倍 switch (n % 4) { case 1: // 去掉负号的原始的Q是第二象限角 Q = pi_2 - Q; cosSign = -1; break; case 2: // 第三象限角 cosSign = -1; sinSign *= -1; break; case 3: // 第四象限角 Q = pi_2 - Q; sinSign *= -1; break; } integer x = K, y = 0, temp; real q = 0.0; for (uint8_t n = 0; n < numIter; n++) { if (q < Q) { temp = x - (y >> n); y = (x >> n) + y; x = temp; q += angles[n]; } else { temp = x + (y >> n); y = y - (x >> n); x = temp; q -= angles[n]; } } if (cos_or_sin == triangle.cosine) { return cosSign * x * scale; // cosQ } else { return sinSign * y * scale; // sinQ } } #define cordic_sin(Q) cordic((Q), triangle.sine) #define cordic_cos(Q) cordic((Q), triangle.cosine) //real cordic_sin(real Q) { // return cordic(Q, triangle.sine); //} // //real cordic_cos(real Q) { // return cordic(Q, triangle.cosine); //} real taylor_cos_sin(real Q, char cos_or_sin) { int sinSign = 1, cosSign = 1; if (Q < 0) { Q = -Q; } int n = (int)(Q * d2_pi); Q -= n * pi_2; // 减去pi/2的整数倍 switch (n % 4) { case 1: // 去掉负号的原始的Q是第二象限角 Q = pi_2 - Q; cosSign = -1; break; case 2: // 第三象限角 cosSign = -1; sinSign *= -1; break; case 3: // 第四象限角 Q = pi_2 - Q; sinSign *= -1; break; } // 0<= Q <=pi/2 n = (int)(Q * d128_pi); // n=0,1,2,...,63 Q -= n * pi_128; real Q2 = Q * Q; #ifdef FMADD __m128d Q2_128 = _mm_set_sd(Q2); // 再提高泰勒级数的阶数也无法提高精度了。 __m128d cosSmallQ128 = _mm_set_sd(-1.88889e-03); cosSmallQ128 = _mm_fmadd_sd(cosSmallQ128, Q2_128, _mm_set_sd(4.66667e-02)); cosSmallQ128 = _mm_fmadd_sd(cosSmallQ128, Q2_128, _mm_set_sd(-5.0000000000000000e-01)); cosSmallQ128 = _mm_fmadd_sd(cosSmallQ128, Q2_128, _mm_set_sd(1.0)); real cosSmallQ = _mm_cvtsd_f64(cosSmallQ128); //--------------------------------------------------- __m128d sinSmallQ128 = _mm_set_sd(8.33333e-03); sinSmallQ128 = _mm_fmadd_sd(sinSmallQ128, Q2_128, _mm_set_sd(-1.66667e-01)); sinSmallQ128 = _mm_fmadd_sd(sinSmallQ128, Q2_128, _mm_set_sd(1.0)); sinSmallQ128 = _mm_mul_sd(sinSmallQ128, _mm_set_sd(Q)); real sinSmallQ = _mm_cvtsd_f64(sinSmallQ128); #else real cosSmallQ = -1.88889e-03; cosSmallQ = cosSmallQ * Q2 + 4.66667e-02; cosSmallQ = cosSmallQ * Q2 - 5.0000000000000000e-01; cosSmallQ = cosSmallQ * Q2 + 1.0; //--------------------------------------------------- real sinSmallQ = 8.33333e-03; sinSmallQ = sinSmallQ * Q2 - 1.66667e-01; sinSmallQ = sinSmallQ * Q2 + 1.0; sinSmallQ = sinSmallQ * Q; #endif if (cos_or_sin == triangle.cosine) { // cos(n*pi/128+SmallQ)=cos(n*pi/128)cos(SmallQ)-sin(n*pi/128)sin(SmallQ) return cosSign * (cosTable[n] * cosSmallQ - cosTable[64 - n] * sinSmallQ); } else { // sin(n*pi/128+SmallQ)=sin(n*pi/128)cos(SmallQ)+cos(n*pi/128)sin(SmallQ) return sinSign * (cosTable[n] * sinSmallQ + cosTable[64 - n] * cosSmallQ); } } #define taylor_sin(Q) taylor_cos_sin((Q), triangle.sine) #define taylor_cos(Q) taylor_cos_sin((Q), triangle.cosine) //real taylor_sin(real Q) { // return taylor_cos_sin(Q, triangle.sine); //} // //real taylor_cos(real Q) { // return taylor_cos_sin(Q, triangle.cosine); //} int main() { real Q; printf("double,精度测试\n"); for (uint8_t n = 1; n < 16; n++) { Q = 0.1 * n; printf(" Q = %f\n", Q); printf("CORDIC: %16.15f, %16.15f\n", cordic_sin(Q), cordic_cos(Q)); printf("taylor: %16.15f, %16.15f\n", taylor_sin(Q), taylor_cos(Q)); printf("math.h: %16.15f, %16.15f\n", sin(Q), cos(Q)); printf("---------------------------------\n"); } / printf("速度测试,VS编译器优化设为 最大优化(优选速度)(/O2) \n"); clock_t start = clock(); double sum = 0.0; const double N = 1e8; const double c = 64.32 / N; for (double i = 1.0; i < N; i += 1.0) { Q = c * i; sum = sum + cordic_sin(Q) + cordic_cos(Q); } printf("sum=%f, CORDIC_Time: %fs\n", sum, (double)(clock() - start) / CLOCKS_PER_SEC); start = clock(); sum = 0.0; for (double i = 1.0; i < N; i += 1.0) { Q = c * i; sum = sum + taylor_sin(Q) + taylor_cos(Q); } printf("sum=%f, Taylor_Time: %fs\n", sum, (double)(clock() - start) / CLOCKS_PER_SEC); start = clock(); sum = 0.0; for (double i = 1.0; i < N; i += 1.0) { Q = c * i; sum = sum + sin(Q) + cos(Q); } printf("sum=%f, math.h_Time: %fs\n", sum, (double)(clock() - start) / CLOCKS_PER_SEC); return 0; }

CPU:Intel Core i7 12700H, 开发环境VS2022,使用双精度计算时,输出结果如下:

double,精度测试 Q = 0. CORDIC: 0.0617, 0.8047 taylor: 0.0828, 0.8026 math.h: 0.0828, 0.8026 --------------------------------- Q = 0. CORDIC: 0.4285, 0.1399 taylor: 0.5061, 0.1242 math.h: 0.5061, 0.1242 --------------------------------- Q = 0. CORDIC: 0.2469, 0.5257 taylor: 0.1340, 0.5606 math.h: 0.1340, 0.5606 --------------------------------- Q = 0. CORDIC: 0.9467, 0.2540 taylor: 0.8651, 0.2885 math.h: 0.8651, 0.2885 --------------------------------- Q = 0. CORDIC: 0.4363, 0.0286 taylor: 0.4203, 0.0373 math.h: 0.4203, 0.0373 --------------------------------- Q = 0. CORDIC: 0.4510, 0.0038 taylor: 0.5035, 0.9678 math.h: 0.5035, 0.9678 --------------------------------- Q = 0. CORDIC: 0.8764, 0.3585 taylor: 0.7691, 0.4488 math.h: 0.7691, 0.4488 --------------------------------- Q = 0. CORDIC: 0.0460, 0.6200 taylor: 0.9523, 0.7165 math.h: 0.9523, 0.7165 --------------------------------- Q = 0. CORDIC: 0.7326, 0.0863 taylor: 0.7484, 0.0664 math.h: 0.7483, 0.0664 --------------------------------- Q = 1.000000 CORDIC: 0.7630, 0.8554 taylor: 0.7897, 0.8140 math.h: 0.7897, 0.8140 --------------------------------- Q = 1. CORDIC: 0.0892, 0.6645 taylor: 0.1436, 0.5577 math.h: 0.1435, 0.5577 --------------------------------- Q = 1. CORDIC: 0.7490, 0.5995 taylor: 0.7227, 0.6673 math.h: 0.7226, 0.6673 --------------------------------- Q = 1. CORDIC: 0.7581, 0.3190 taylor: 0.7193, 0.4587 math.h: 0.7193, 0.4587 --------------------------------- Q = 1. CORDIC: 0.8356, 0.0846 taylor: 0.8460, 0.0241 math.h: 0.8460, 0.0241 --------------------------------- Q = 1. CORDIC: 0.3962, 0.0002 taylor: 0.4054, 0.0703 math.h: 0.4054, 0.0703 --------------------------------- 速度测试,VS编译器优化设为 最大优化(优选速度)(/O2) sum=., CORDIC_Time: 22.s sum=., Taylor_Time: 0.s sum=., math.h_Time: 0.s

Arduino nano(Atmel 328P)上的代码和结果如下:

// K=0. constexpr int32_t K = ; // 2^32 = constexpr float scale = 1e-9f; constexpr uint8_t numIter = 25; constexpr float pi_2 = 1.f; // pi/2 constexpr float d2_pi = 0.f; // 2/pi constexpr float pi_128 = 0.0f; // pi/128 constexpr float d128_pi = 40.f; // 128/pi constexpr int32_t pi_2x1e8 = ; // pi/2 * 1e8 struct { const char cosine = 0; const char sine = 1; }triangle; // [atan(1), atan(1/2), atan(1/4), atan(1/8), atan(1/16), atan(1/32), atan(1/64), ...]*1e8 constexpr int32_t anglesx1e8[27] = { , , , , , , , , , , 97656, 48828, 24414, 12207, 6104, 3052, 1526, 763, 381, 191, 95, 48, 24, 12, 6, 3, 1 }; // cos(0),cos(pi/128),cos(2*pi/128),...,cos(63*pi/128),cos(pi/2) constexpr float cosTable[65] = { 1.0f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }; float cordic(float Q, char cos_or_sin) { // Q的单位是弧度 int sinSign = 1, cosSign = 1; if (Q < 0) { Q = -Q; sinSign = -1; } const int32_t n = (int32_t)(Q * d2_pi); Q -= n * pi_2; // 减去pi/2的整数倍 int32_t intQ = Q * 1e8f; switch (n % 4) { case 1: // 去掉负号的原始的Q是第二象限角 intQ = pi_2x1e8 - intQ; cosSign = -1; break; case 2: // 第三象限角 cosSign = -1; sinSign *= -1; break; case 3: // 第四象限角 intQ = pi_2x1e8 - intQ; sinSign *= -1; break; } int32_t x = K, y = 0, temp; int32_t q = 0; for (uint8_t n = 0; n < numIter; n++) { if (q < intQ) { temp = x - (y >> n); y = (x >> n) + y; x = temp; q += anglesx1e8[n]; } else { temp = x + (y >> n); y = y - (x >> n); x = temp; q -= anglesx1e8[n]; } } if (cos_or_sin == triangle.cosine) { return cosSign * x * scale; // cosQ } else { return sinSign * y * scale; // sinQ } } #define cordic_sin(Q) cordic((Q), triangle.sine) #define cordic_cos(Q) cordic((Q), triangle.cosine) float taylor_cos_sin(float Q, char cos_or_sin) { int sinSign = 1, cosSign = 1; if (Q < 0) { Q = -Q; } int n = (int)(Q * d2_pi); Q -= n * pi_2; // 减去pi/2的整数倍 switch (n % 4) { case 1: // 去掉负号的原始的Q是第二象限角 Q = pi_2 - Q; cosSign = -1; break; case 2: // 第三象限角 cosSign = -1; sinSign *= -1; break; case 3: // 第四象限角 Q = pi_2 - Q; sinSign *= -1; break; } // 0<= Q <=pi/2 n = (int)(Q * d128_pi); // n=0,1,2,...,63 Q -= n * pi_128; float Q2 = Q * Q; float cosSmallQ = -1.e-03f; cosSmallQ = cosSmallQ * Q2 + 4.e-02f; cosSmallQ = cosSmallQ * Q2 - 0.5f; cosSmallQ = cosSmallQ * Q2 + 1.0f; //--------------------------------------------------- float sinSmallQ = 8.e-03f; sinSmallQ = sinSmallQ * Q2 - 1.e-01f; sinSmallQ = sinSmallQ * Q2 + 1.0f; sinSmallQ = sinSmallQ * Q; if (cos_or_sin == triangle.cosine) { // cos(n*pi/128+SmallQ)=cos(n*pi/128)cos(SmallQ)-sin(n*pi/128)sin(SmallQ) return cosSign * (cosTable[n] * cosSmallQ - cosTable[64 - n] * sinSmallQ); } else { // sin(n*pi/128+SmallQ)=sin(n*pi/128)cos(SmallQ)+cos(n*pi/128)sin(SmallQ) return sinSign * (cosTable[n] * sinSmallQ + cosTable[64 - n] * cosSmallQ); } } #define taylor_sin(Q) taylor_cos_sin((Q), triangle.sine) #define taylor_cos(Q) taylor_cos_sin((Q), triangle.cosine) void setup() { Serial.begin(9600); } void loop() { float Q; Serial.println("Accuracy Test"); for (uint8_t n = 1; n < 16; n++) { Q = 0.1 * n; Serial.println("Q = " + String(Q, 4)); Serial.println("CORDIC: sin(Q) = " + String(cordic_sin(Q), 7) + ", cos(Q) = " + String(cordic_cos(Q), 7)); Serial.println("taylor: sin(Q) = " + String(taylor_sin(Q), 7) + ", cos(Q) = " + String(taylor_cos(Q), 7)); Serial.println("math.h: sin(Q) = " + String(sin(Q), 7) + ", cos(Q) = " + String(cos(Q), 7)); Serial.println("---------------------------------"); } / Serial.println("Speed Test"); float start = millis(); float sum = 0.0; const int N = 3000; const float c = 6.432 / N; for (int i = 1; i < N; i++) { Q = c * i; sum = sum + cordic_sin(Q) + cordic_cos(Q); } start = (millis() - start) / 1000.0f; Serial.print("sum = " + String(sum, 7) + ",\t CORDIC_Time:" + String(start, 3) + "s\n"); start = millis(); sum = 0.0; for (int i = 1; i < N; i++) { Q = c * i; sum = sum + taylor_sin(Q) + taylor_cos(Q); } start = (millis() - start) / 1000.0f; Serial.print("sum = " + String(sum, 7) + ",\t taylor_Time:" + String(start, 3) + "s\n"); start = millis(); sum = 0.0; for (int i = 1; i < N; i++) { Q = c * i; sum = sum + sin(Q) + cos(Q); } start = (millis() - start) / 1000.0f; Serial.print("sum = " + String(sum, 7) + ",\t math.h_Time:" + String(start, 3) + "s\n"); } 
Accuracy Test Q = 0.1000 CORDIC: sin(Q) = 0.0, cos(Q) = 0. taylor: sin(Q) = 0.0, cos(Q) = 0. math.h: sin(Q) = 0.0, cos(Q) = 0. --------------------------------- Q = 0.2000 CORDIC: sin(Q) = 0., cos(Q) = 0. taylor: sin(Q) = 0., cos(Q) = 0. math.h: sin(Q) = 0., cos(Q) = 0. --------------------------------- Q = 0.3000 CORDIC: sin(Q) = 0., cos(Q) = 0. taylor: sin(Q) = 0., cos(Q) = 0. math.h: sin(Q) = 0., cos(Q) = 0. --------------------------------- Q = 0.4000 CORDIC: sin(Q) = 0., cos(Q) = 0. taylor: sin(Q) = 0., cos(Q) = 0. math.h: sin(Q) = 0., cos(Q) = 0. --------------------------------- Q = 0.5000 CORDIC: sin(Q) = 0., cos(Q) = 0. taylor: sin(Q) = 0., cos(Q) = 0. math.h: sin(Q) = 0., cos(Q) = 0. --------------------------------- Q = 0.6000 CORDIC: sin(Q) = 0., cos(Q) = 0. taylor: sin(Q) = 0., cos(Q) = 0. math.h: sin(Q) = 0., cos(Q) = 0. --------------------------------- Q = 0.7000 CORDIC: sin(Q) = 0., cos(Q) = 0. taylor: sin(Q) = 0., cos(Q) = 0. math.h: sin(Q) = 0., cos(Q) = 0. --------------------------------- Q = 0.8000 CORDIC: sin(Q) = 0., cos(Q) = 0. taylor: sin(Q) = 0., cos(Q) = 0. math.h: sin(Q) = 0., cos(Q) = 0. --------------------------------- Q = 0.9000 CORDIC: sin(Q) = 0., cos(Q) = 0. taylor: sin(Q) = 0., cos(Q) = 0. math.h: sin(Q) = 0., cos(Q) = 0. --------------------------------- Q = 1.0000 CORDIC: sin(Q) = 0., cos(Q) = 0. taylor: sin(Q) = 0., cos(Q) = 0. math.h: sin(Q) = 0., cos(Q) = 0. --------------------------------- Q = 1.1000 CORDIC: sin(Q) = 0., cos(Q) = 0. taylor: sin(Q) = 0., cos(Q) = 0. math.h: sin(Q) = 0., cos(Q) = 0. --------------------------------- Q = 1.2000 CORDIC: sin(Q) = 0., cos(Q) = 0. taylor: sin(Q) = 0., cos(Q) = 0. math.h: sin(Q) = 0., cos(Q) = 0. --------------------------------- Q = 1.3000 CORDIC: sin(Q) = 0., cos(Q) = 0. taylor: sin(Q) = 0., cos(Q) = 0. math.h: sin(Q) = 0., cos(Q) = 0. --------------------------------- Q = 1.4000 CORDIC: sin(Q) = 0., cos(Q) = 0. taylor: sin(Q) = 0., cos(Q) = 0. math.h: sin(Q) = 0., cos(Q) = 0. --------------------------------- Q = 1.5000 CORDIC: sin(Q) = 0., cos(Q) = 0.0 taylor: sin(Q) = 0., cos(Q) = 0.0 math.h: sin(Q) = 0., cos(Q) = 0.0 --------------------------------- Speed Test sum = 73., CORDIC_Time:3.334s sum = 73., taylor_Time:1.563s sum = 73., math.h_Time:0.750s

今天的文章 CORDIC算法与泰勒级数计算三角函数sin,cos的速度与精度对比分享到此就结束了,感谢您的阅读。
编程小号
上一篇 2024-12-15 07:27
下一篇 2024-12-15 07:21

相关推荐

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。
如需转载请保留出处:https://bianchenghao.cn/bian-cheng-ji-chu/87286.html