From f72f369b2ac174d4603c87e5dc8460e0a34d82b7 Mon Sep 17 00:00:00 2001 From: ZhuangYumin Date: Tue, 31 Oct 2023 12:24:56 +0800 Subject: [PATCH] feat: write new version --- data/CMakeLists.txt | 2 +- include/int2048.h | 6 +- src/CMakeLists.txt | 2 +- src/int2048.cpp | 253 +++++++++++++++++++++++++++----------------- tester/cases/3.py | 4 +- 5 files changed, 160 insertions(+), 107 deletions(-) diff --git a/data/CMakeLists.txt b/data/CMakeLists.txt index 3aea297..0432e9a 100644 --- a/data/CMakeLists.txt +++ b/data/CMakeLists.txt @@ -1,6 +1,6 @@ set(PROJECT_NAME ${CMAKE_PROJECT_NAME}) set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_FLAGS "-O2") +set(CMAKE_CXX_FLAGS "-fsanitize=address -g") set(ENV{MAKEFLAGS} "-j16") include_directories(${PROJECT_SOURCE_DIR}/include) link_directories(${PROJECT_SOURCE_DIR}/src) diff --git a/include/int2048.h b/include/int2048.h index aca88de..b247e9d 100644 --- a/include/int2048.h +++ b/include/int2048.h @@ -65,8 +65,8 @@ root= 6 void ClaimMem(size_t); inline friend int UnsignedCmp(const int2048 &, const int2048 &); - inline friend void UnsignedAdd(int2048 &, const int2048 *); - inline friend void UnsignedMinus(int2048 &, const int2048 *); + inline friend void UnsignedAdd(int2048 &, const int2048 *, bool); + inline friend void UnsignedMinus(int2048 &, const int2048 *, bool); int2048 &add(const int2048 &); friend int2048 add(int2048, const int2048 &); @@ -85,7 +85,7 @@ root= 6 int2048 &operator-=(const int2048 &); friend int2048 operator-(int2048, const int2048 &); - inline friend void UnsignedMultiply(int2048 &, const int2048 *); + inline friend void UnsignedMultiply(int2048 &, const int2048 *, bool); int2048 &Multiply(const int2048 &); friend int2048 Multiply(int2048, const int2048 &); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index accb036..7267cb9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,5 @@ set(PROJECT_NAME ${CMAKE_PROJECT_NAME}) set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_FLAGS "-O2") +set(CMAKE_CXX_FLAGS "-fsanitize=address -g") include_directories(${PROJECT_SOURCE_DIR}/include) add_library(int2048 STATIC int2048.cpp) \ No newline at end of file diff --git a/src/int2048.cpp b/src/int2048.cpp index 9f42b24..ee06e0f 100644 --- a/src/int2048.cpp +++ b/src/int2048.cpp @@ -23,6 +23,7 @@ */ #include "int2048.h" +#include #include #include #include @@ -172,18 +173,34 @@ inline int UnsignedCmp(const int2048 &A, const int2048 &B) { if (A.val[i] != B.val[i]) return A.val[i] < B.val[i] ? -1 : 1; return 0; } +inline void UnsignedMinus(int2048 &, const int2048 *, bool inverse = false); -inline void UnsignedAdd(int2048 &A, const int2048 *const pB) { +inline void UnsignedAdd(int2048 &A, const int2048 *const pB, + bool inverse = false) { if (&A == pB) throw "UnsignedAdd: A and B are the same object"; A.ClaimMem(std::max(A.num_length, pB->num_length) + 2); - for (int i = 0; - i < (std::max(A.num_length, pB->num_length) + int2048::kNum - 1) / - int2048::kNum; - i++) { - if (i < (pB->num_length + int2048::kNum - 1) / int2048::kNum) - A.val[i] += pB->val[i]; - if (i + 1 < A.buf_length) A.val[i + 1] += A.val[i] / int2048::kMod; - A.val[i] %= int2048::kMod; + if (!inverse) { + for (int i = 0; + i < (std::max(A.num_length, pB->num_length) + int2048::kNum - 1) / + int2048::kNum; + i++) { + if (i < (pB->num_length + int2048::kNum - 1) / int2048::kNum) + A.val[i] += pB->val[i]; + if (i + 1 < A.buf_length) A.val[i + 1] += A.val[i] / int2048::kMod; + A.val[i] %= int2048::kMod; + } + } else { + for (int i = (std::max(A.num_length, pB->num_length) + int2048::kNum - 1) / + int2048::kNum - + 1; + i >= 0; i--) { + if (i < (pB->num_length + int2048::kNum - 1) / int2048::kNum) + A.val[i] += pB->val[i]; + if (A.val[i] >= int2048::kMod && i - 1 >= 0) { + A.val[i - 1] += A.val[i] / int2048::kMod; + A.val[i] %= int2048::kMod; + } + } } A.num_length = std::max(A.num_length, pB->num_length); const static int kPow10[9] = {1, 10, 100, 1000, 10000, @@ -237,14 +254,28 @@ int2048 add(int2048 A, const int2048 &B) { return std::move(A.add(B)); } -inline void UnsignedMinus(int2048 &A, const int2048 *const pB) { +inline void UnsignedMinus(int2048 &A, const int2048 *const pB, bool inverse) { if (&A == pB) throw "UnsignedMinus: A and B are the same object"; - for (int i = 0; i < (pB->num_length + int2048::kNum - 1) / int2048::kNum; - i++) { - A.val[i] -= pB->val[i]; - if (A.val[i] < 0) { - A.val[i] += int2048::kMod; - A.val[i + 1]--; + if (!inverse) { + for (int i = 0; i < (pB->num_length + int2048::kNum - 1) / int2048::kNum; + i++) { + A.val[i] -= pB->val[i]; + if (A.val[i] < 0 && i + 1 < A.buf_length) { + A.val[i] += int2048::kMod; + A.val[i + 1]--; + } + } + } else { + int blocks_A = (A.num_length + int2048::kNum - 1) / int2048::kNum; + int blocks_B = (pB->num_length + int2048::kNum - 1) / int2048::kNum; + if (blocks_A < blocks_B) A.ClaimMem(blocks_A * int2048::kNum); + for (int i = (pB->num_length + int2048::kNum - 1) / int2048::kNum - 1; + i >= 0; i--) { + A.val[i] -= pB->val[i]; + if (A.val[i] < 0 && i - 1 >= 0) { + A.val[i] += int2048::kMod; + A.val[i - 1]--; + } } } const static int kPow10[9] = {1, 10, 100, 1000, 10000, @@ -391,7 +422,8 @@ void int2048::NTTTransform(__int128_t *a, int NTT_blocks, for (int i = 0; i < NTT_blocks; i++) (a[i] *= inv) %= int2048::kNTTMod; } } -inline void UnsignedMultiply(int2048 &A, const int2048 *pB) { +inline void UnsignedMultiply(int2048 &A, const int2048 *pB, + bool inverse = false) { if (&A == pB) throw "UnsignedMultiply: A and B are the same object"; int blocks_of_A = ((A.num_length + int2048::kNum - 1) / int2048::kNum); int blocks_of_B = ((pB->num_length + int2048::kNum - 1) / int2048::kNum); @@ -414,12 +446,21 @@ inline void UnsignedMultiply(int2048 &A, const int2048 *pB) { for (int i = 0; i < NTT_blocks; i++) pDC[i] = (pDA[i] * pDB[i]) % int2048::kNTTMod; A.NTTTransform(pDC, NTT_blocks, true); - for (int i = 0; i < NTT_blocks - 1; i++) { - pDC[i + 1] += pDC[i] / int2048::kNTTBlcokBase; - pDC[i] %= int2048::kNTTBlcokBase; + if (!inverse) { + for (int i = 0; i < NTT_blocks - 1; i++) { + pDC[i + 1] += pDC[i] / int2048::kNTTBlcokBase; + pDC[i] %= int2048::kNTTBlcokBase; + } + if (pDC[NTT_blocks - 1] >= int2048::kNTTBlcokBase) + throw "UnsignedMultiply: NTT result overflow"; + } else { + for (int i = NTT_blocks - 1; i > 0; i--) { + if (i - 1 >= 0) pDC[i - 1] += pDC[i] / int2048::kNTTBlcokBase; + pDC[i] %= int2048::kNTTBlcokBase; + } + if (pDC[0] >= int2048::kNTTBlcokBase) + throw "UnsignedMultiply: NTT result overflow"; } - if (pDC[NTT_blocks - 1] >= int2048::kNTTBlcokBase) - throw "UnsignedMultiply: NTT result overflow"; int flag_store = A.flag; A.ClaimMem(NTT_blocks * 4); memset(A.val, 0, A.buf_length * sizeof(int)); @@ -514,89 +555,101 @@ inline void UnsignedDivide(int2048 &A, const int2048 *pB) { A = std::move(int2048(0)); return; } - int2048 x; - /*init x as 10^(L1-L2)*/ - x.ClaimMem(L1 - L2 + 1); - x.num_length = L1 - L2 + 1; - memset(x.val, 0, x.buf_length * sizeof(int)); + /** + * Now pre-process has done. We can start the main algorithm: + * 1. Convert B to scientific counting method and process the index. + * 2. In the state of reversing, calculate 1/B' using Newton-Raphson method. + * 3. Reverse the iterative results again and calculate the answer. + * + * Warning: in reversed mode, num_length has no exact meaning, just operate a + * block as a whole + */ + int2048 origin_A(A); + int pow_A = (L1 + int2048::kNum - 1) / int2048::kNum - 1; + int pow_B = (L2 + int2048::kNum - 1) / int2048::kNum - 1; + // pow_B+1 is the number of blocks (with number) of B' + int2048 inverse_B(*pB); + for (int i = 0; (i << 1) < (pow_B + 1); i++) + std::swap(inverse_B.val[i], inverse_B.val[pow_B - i]); + int2048 x(int2048::kMod); + assert(x.val[1] == 1); + int *store[2]; + store[0] = new int[pow_A + 5](); + store[1] = new int[pow_A + 5](); + int tot = 0; + for (int i = 0; i < pow_A + 1; i++) { + store[0][i] = A.val[i]; + store[1][i] = -1; + } + while (true) { + int2048 invsere_two(2), tmp_x(x); + UnsignedMultiply(tmp_x, &inverse_B, true); + UnsignedMinus(invsere_two, &tmp_x, true); + UnsignedMultiply(x, &invsere_two, true); + /** + * now x is the next x, store[tot] stores last x, store[tot^1] stores the x + * previous to store[x] + */ + int blocks_of_x = (x.num_length + int2048::kNum - 1) / int2048::kNum; + if (blocks_of_x > pow_A + 3) { + x.ClaimMem((pow_A + 3) * int2048::kNum); + blocks_of_x = pow_A + 3; + } + bool pre_same = true, pre_pre_same = true; + for (int i = 0; i < pow_A + 3; i++) { + if (store[tot][i] != (i < blocks_of_x ? x.val[i] : 0)) { + pre_same = false; + break; + } + } + for (int i = 0; i < pow_A + 3; i++) { + if (store[tot ^ 1][i] != (i < blocks_of_x ? x.val[i] : 0)) { + pre_pre_same = false; + break; + } + } + if (pre_pre_same || pre_same) break; + memcpy(store[tot ^ 1], store[tot], (pow_A + 3) * sizeof(int)); + for (int i = 0; i < pow_A + 3; i++) { + if (i < blocks_of_x) + store[tot][i] = x.val[i]; + else + store[tot][i] = 0; + } + tot ^= 1; + } + delete[] store[0]; + delete[] store[1]; + /** + * Now reverse x back. + */ + int blocks_of_x = (x.num_length + int2048::kNum - 1) / int2048::kNum; + int pow_x = blocks_of_x - 1; + for (int i = 0; i < blocks_of_x / 2; i++) + std::swap(x.val[i], x.val[blocks_of_x - i - 1]); + x.num_length = blocks_of_x * int2048::kNum; const static int kPow10[9] = {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000}; - x.val[(x.num_length - 1) / int2048::kNum] = - kPow10[(x.num_length - 1) % int2048::kNum]; - /*reset x.num_length*/ - while (x.val[(x.num_length - 1) / int2048::kNum] / - kPow10[(x.num_length - 1) % int2048::kNum] == - 0) { + /*Now get the accurate x.num_length for future computing*/ + while (x.num_length > 0 && + x.val[(x.num_length - 1) / int2048::kNum] / + kPow10[(x.num_length - 1) % int2048::kNum] == + 0) x.num_length--; - if (x.num_length == 0) throw "UnsignedMultiply: num_length==0"; + UnsignedMultiply(A, &x); + A.RightMoveBy((pow_B + pow_x) * int2048::kNum); + /*Now we begin to process error*/ + int2048 tmp(*pB),kOne(1); + UnsignedMultiply(tmp, &A); + while (UnsignedCmp(origin_A, tmp) < 0) { + UnsignedMinus(A,&kOne); + UnsignedMinus(tmp, pB); } - /*check the highest number of B*/ - if (pB->val[(pB->num_length - 1) / int2048::kNum] / - kPow10[(pB->num_length - 1) % int2048::kNum] == - 1) { - /* x=5*x */ - int2048 tmp(x); - tmp.add(tmp); - tmp.add(tmp); - x.add(tmp); - } else if (pB->val[(pB->num_length - 1) / int2048::kNum] / - kPow10[(pB->num_length - 1) % int2048::kNum] < - 3) { - /* x=3*x */ - int2048 tmp(x); - tmp.add(tmp); - x.add(tmp); - } else if (pB->val[(pB->num_length - 1) / int2048::kNum] / - kPow10[(pB->num_length - 1) % int2048::kNum] < - 5) { - /* x=2*x */ - x.add(x); + UnsignedMinus(origin_A, &tmp); + while (UnsignedCmp(origin_A, *pB) >= 0) { + UnsignedAdd(A,&kOne); + UnsignedMinus(origin_A, pB); } - int2048 x_pre(x); - int2048 kOne(1); - UnsignedMinus(x_pre, &kOne); - // int cnt = 0; - while (true) { - /** - * x_{n+1}=2*x_n-x_n*x_n*B/(10^L1)) - */ - int2048 tmp = *pB; - UnsignedMultiply(tmp, &x); - UnsignedMultiply(tmp, &x); - // std::cerr << "max length ratio during computing" - // << (double)tmp.num_length / (double)L1 << std::endl; - tmp.RightMoveBy(L1); - int2048 x_next = x; - UnsignedAdd(x_next, &x); - UnsignedMinus(x_next, &tmp); - if (UnsignedCmp(x_next, x) == 0) break; - if (UnsignedCmp(x_next, x_pre) == 0) break; - x_pre = std::move(x); - x = std::move(x_next); - // std::cerr << "length ratio of x after each step" - // << (double)x.num_length / (double)L1 << std::endl; - // cnt++; - } - /*ret=A*x/10^(L1)*/ - UnsignedMultiply(x, &A); - x.RightMoveBy(L1); - /*remain=A -B*ret*/ - int2048 tmp = *pB; - UnsignedMultiply(tmp, &x); - if (UnsignedCmp(A, tmp) < 0) { - x -= 1; - tmp = *pB; - UnsignedMultiply(tmp, &x); - } - UnsignedMinus(A, &tmp); - int2048 remain = std::move(A); - while (UnsignedCmp(remain, *pB) >= 0) { - UnsignedMinus(remain, pB); - UnsignedAdd(x, &kOne); - // cnt++; - } - // std::cerr << cnt << std::endl; - A = std::move(x); } int2048 &int2048::Divide(const int2048 &B) { if (this == &B) { diff --git a/tester/cases/3.py b/tester/cases/3.py index 22f3797..60cab52 100755 --- a/tester/cases/3.py +++ b/tester/cases/3.py @@ -36,7 +36,7 @@ opt_python=[] if True: for i in range(0,10): - val=randint(-10**100,10**100) + val=randint(-10**2,10**2) opt_cpp.append("a_"+str(i)+"=int2048(\""+str(val)+"\");") opt_python.append("a_"+str(i)+"="+str(val)) opt_cpp.append("a_"+str(i)+".print(); puts(\"\");") @@ -80,7 +80,7 @@ for opt in opt_cpp: print(opt,file=sourc_cpp) print(code_cpp_suf,file=sourc_cpp) sourc_cpp.close() -system("g++ /tmp/3.cpp -I /home/happyzym/CSWorkSpace/Proc/BigHomework/BH-int2048-2023/include/ -L /home/happyzym/CSWorkSpace/Proc/BigHomework/BH-int2048-2023/build/src/ -lint2048 -g -o /tmp/3") +system("g++ /tmp/3.cpp -I /home/happyzym/CSWorkSpace/Proc/BigHomework/BH-int2048-2023/include/ -L /home/happyzym/CSWorkSpace/Proc/BigHomework/BH-int2048-2023/build/src/ -lint2048 -fsanitize=address -g -o /tmp/3") system("/tmp/3 > /tmp/3_cpp.out") sourc_python=open("/tmp/3.py","w")