问题
多项式环
R
=
G
F
[
p
d
]
[
x
]
/
(
x
n
?
1
)
R=GF[p^d][x]/(x^n-1)
R=GF[pd][x]/(xn?1),其中有限域
G
F
(
p
d
)
?
Z
p
[
x
]
/
(
m
(
x
)
)
GF(p^d) \cong Z_p[x]/(m(x))
GF(pd)?Zp?[x]/(m(x)),
m
(
x
)
m(x)
m(x)是
Z
p
Z_p
Zp?上
d
d
d次不可约多项式。对于任意
f
,
g
∈
R
f,g \in R
f,g∈R,如何快速计算
f
?
g
f\cdot g
f?g?
NTL
代码
#include <iostream>
#include "tools.h"
#include <NTL/ZZ_p.h> // integers mod p
#include <NTL/ZZ_pX.h> // polynomials over ZZ_p
#include <NTL/ZZ_pE.h> // ring/field extension of ZZ_p
#include <NTL/ZZ_pEX.h> // polynomials over ZZ_pE
#include <NTL/ZZ_pXFactoring.h>
#include <NTL/ZZ_pEXFactoring.h>
using namespace std;
using namespace NTL;
#pragma comment(lib, "NTL")
int main()
{
ZZ p(2); //初始化为2
//群Z_p
ZZ_p::init(p);
//随机生成Z_p[x]中的d次不可约多项式
int d = 149;
ZZ_pX m;
//BuildIrred(m, d);
SetCoeff(m, d);
SetCoeff(m, 10);
SetCoeff(m, 9);
SetCoeff(m, 7);
SetCoeff(m, 0);
//域GF(p^d) = Z_p[x]/m(x)
ZZ_pE::init(m);
//GF(p^d)[x]中的多项式
ZZ_pEX f, g, h, P;
int n = 53;
//P(x)=x^n-1
SetCoeff(P, 0, -1);
SetCoeff(P, n);
// f(x) = x^8 - 1
SetCoeff(f, 8); //将 x^8 系数置为 1
SetCoeff(f, 0, -1); //将 x^0 系数置为 -1
//随机生成n长多项式g(x)
random(g, n-1);
// 环上多项式的运算
h = g * g;
cout << "p = " << p << endl;
cout << "d = " << d << endl;
cout << "n = " << n << endl;
cout << "m(x) = " << m << endl;
cout << "P(x) = " << P << endl;
//cout << "f = " << f << endl;
//cout << "g = " << g << endl;
//cout << "h = " << h << endl;
int loop = 10;
printf("%d rounds, NTL mul: ", loop);
Loop(10, f*g);
printf("%d rounds, NTL mod: ", loop);
Loop(10, h%P);
return 0;
}
结果
p = 2
d = 149
n = 53
m(x) = [1 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
P(x) = [[1] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [1]]
10 rounds, NTL mul: 5.20162 s
10 rounds, NTL mod: 17.3201 s
NTT+Barrett
代码
IncompleteNTT
#include "IncompleteNTT.h"
int32 brv(int32 b, int32 l)
{
int32 bb = 0;
for (int32 i = 0; i < l; i++)
{
bb <<= 1;
bb |= (b & 1);
b >>= 1;
}
return bb;
}
//int64 exgcd(int64* x, int64* y, int64 a, int64 b)
//{
// if (b == 0)
// {
// *x = 1;
// *y = 0;
// return a;
// }
// int64 ret = exgcd(x, y, b, a%b);
// int64 tmp = *x;
// *x = *y;
// *y = tmp - (a / b) * (*y);
// return ret;
//}
//int32 deg(Poly& f)
//{
// int32 d = f.size() - 1;
// int64* pf = f.data();
//
// while (pf[d] == 0)
// d--;
// return d;
//}
ostream& operator<<(ostream& cout, Poly& f)
{
int32 d = deg(f);
int64* pf = f.data();
printf("[ ");
for (int32 i = 0; i < d; i++)
printf("%lld, ", pf[i]);
printf("%lld ]", pf[d]);
return cout;
}
ostream& print_vector(int64* f, int32 len)
{
len--;
printf("[ ");
for (int32 i = 0; i < len; i++)
printf("%lld, ", f[i]);
printf("%lld ]", f[len]);
return std::cout;
}
//############################# 分割线 ###############################
void IncompleteNTT::init(int64 p, int64 w, int32 k, int32 h)
{
this->p = p;
this->w = w;
this->k = k;
this->h = h;
this->m = (1L << k);
this->n = this->h * this->m;
int64 minv, pinv;
int64 gcd = exgcd(&minv, &pinv, m, p);
if (gcd != 1)
ErrorInfo("%s\n", "gcd(2^k, p) != 1");
this->factor = minv < 0 ? minv + p : minv;
this->factor_pre = PreCompute(this->factor, p);
this->one_pre = PreComputeMod(p);
this->W.resize(this->m + 1); //重复存储:w^{0} = w^{m}
this->W_pre.resize(this->m + 1);
int64 *pW = W.data();
int64 *pW_pre = W_pre.data();
int64 wi = 1; //单位根的幂次
for (int32 i = 0; i <= this->m; i++)
{
pW[i] = wi;
pW_pre[i] = PreCompute(wi, p);
wi = (wi * w) % p;
}
brvlist.resize(k + 1);
for (int j = 0; j <= k; j++)
{
auto& list = brvlist[j];
int32 len = (1L << j);
for (int i = 0; i < len; i++)
list.push_back(brv(i, j));
}
}
void IncompleteNTT::FwdNTT(NTTRep& F, Poly& f)
{
int32 d = deg(f);
if (d >= this->n)
ErrorInfo("%s\n", "deg(f) > n - 1");
if (F.size() < this->n)
F.resize(this->n);
int64* pF = F.data();
int64* pf = f.data();
FwdNTT(pF, pf, f.size());
}
void IncompleteNTT::FwdNTT(int64* pF, int64* pf, int32 flen)
{
if (pF != pf)
{
int32 nn = min(flen, n);
for (int32 i = 0; i < nn; i++)
pF[i] = BarrettMod(pf[i], one_pre, p);
}
int64* pW = W.data();
int64* pW_pre = W_pre.data();
int64 P = 2 * p;
for (int32 j = 0; j < k; j++)
{
int32 blocknum = 1 << j; //对j层分块
int32 blocksize = this->n >> j; //对j层块大小
int32 offset = blocksize >> 1; //计算第j+1层时蝴蝶的偏移量(块大小的一半)
int32* list = brvlist[j].data();
if (offset >= 4 * h)
for (int32 i = 0; i < blocknum; i++)
{
int64 X, WY;
int32 s = i * blocksize; //块的起始位置
int64 ww = pW[list[i] << (k - j - 1)]; //第j层第i个分块使用的单位根,w_{2^{j+1}}^{brv_{j+1}(2i)}
int64 ww_pre = pW_pre[list[i] << (k - j - 1)];
int64* pX = pF + (s);
int64* pY = pF + (s + offset);
for (int32 k = 0; k < offset; k += 4)
{
//CT蝴蝶(Harvey,输入输出范围[0,4p))
X = *pX;
X -= ((P - X) >> 63)&P;
WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
*pX = X + WY;
*pY = X + (P - WY);
pX++;
pY++;
//CT蝴蝶(Harvey,输入输出范围[0,4p))
X = *pX;
X -= ((P - X) >> 63)&P;
WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
*pX = X + WY;
*pY = X + (P - WY);
pX++;
pY++;
//CT蝴蝶(Harvey,输入输出范围[0,4p))
X = *pX;
X -= ((P - X) >> 63)&P;
WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
*pX = X + WY;
*pY = X + (P - WY);
pX++;
pY++;
//CT蝴蝶(Harvey,输入输出范围[0,4p))
X = *pX;
X -= ((P - X) >> 63)&P;
WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
*pX = X + WY;
*pY = X + (P - WY);
pX++;
pY++;
}
}
else if (offset == 2 * h)
for (int32 i = 0; i < blocknum; i++)
{
int64 X, WY;
int32 s = i * blocksize; //块的起始位置
int64 ww = pW[list[i] << (k - j - 1)]; //第j层第i个分块使用的单位根,w_{2^{j+1}}^{brv_{j+1}(2i)}
int64 ww_pre = pW_pre[list[i] << (k - j - 1)];
int64* pX = pF + (s);
int64* pY = pF + (s + offset);
for (int32 k = 0; k < h; k++)
{
//CT蝴蝶(Harvey,输入输出范围[0,4p))
X = *pX;
X -= ((P - X) >> 63)&P;
WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
*pX = X + WY;
*pY = X + (P - WY);
pX++;
pY++;
//CT蝴蝶(Harvey,输入输出范围[0,4p))
X = *pX;
X -= ((P - X) >> 63)&P;
WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
*pX = X + WY;
*pY = X + (P - WY);
pX++;
pY++;
}
}
else
for (int32 i = 0; i < blocknum; i++)
{
int64 X, WY;
int32 s = i * blocksize; //块的起始位置
int64 ww = pW[list[i] << (k - j - 1)]; //第j层第i个分块使用的单位根,w_{2^{j+1}}^{brv_{j+1}(2i)}
int64 ww_pre = pW_pre[list[i] << (k - j - 1)];
int64* pX = pF + (s);
int64* pY = pF + (s + offset);
for (int32 k = 0; k < h; k++)
{
//CT蝴蝶(Harvey,输入输出范围[0,4p))
X = *pX;
X -= ((P - X) >> 63)&P;
WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
*pX = X + WY;
*pY = X + (P - WY);
pX++;
pY++;
}
}
}
//迭代结束,结果是m=2^k个h长多项式
//约束范围到[0,2p)
if (k >= 2)
for (int32 i = 0; i < n; i += 4)
{
*pF -= ((P - *pF) >> 63)&P;
pF++;
*pF -= ((P - *pF) >> 63)&P;
pF++;
*pF -= ((P - *pF) >> 63)&P;
pF++;
*pF -= ((P - *pF) >> 63)&P;
pF++;
}
else
for (int32 i = 0; i < n; i++)
{
*pF -= ((P - *pF) >> 63)&P;
pF++;
}
}
void IncompleteNTT::InvNTT(Poly& f, NTTRep& F)
{
int32 d = deg(F);
if (d >= this->n)
ErrorInfo("%s\n", "deg(F) > n - 1");
if (f.size() < this->n)
f.resize(this->n);
int64* pF = F.data();
int64* pf = f.data();
InvNTT(pf, pF);
}
void IncompleteNTT::InvNTT(int64* pf, int64* pF)
{
if (pF != pf)
{
for (int32 i = 0; i < n; i++)
pf[i] = pF[i];
}
int64* pW = W.data();
int64* pW_pre = W_pre.data();
int64 P = 2 * p;
for (int32 j = k - 1; j >= 0; j--)
{
int32 blocknum = 1 << j; //对j+1层分块
int32 blocksize = this->n >> j; //第j+1层块大小
int32 offset = blocksize >> 1; //计算第j层时蝴蝶的偏移量(块大小的一半)
int32* list = brvlist[j].data();
if (offset >= 4 * h)
for (int32 i = 0; i < blocknum; i++)
{
int64 XY, T;
int32 s = i * blocksize; //块的起始位置
int64 ww = pW[m - (list[i] << (k - j - 1))]; //第j+1层第i个分块使用的单位根,w_{2^{j+1}}^{-brv_{j+1}(2i)}
int64 ww_pre = pW_pre[m - (list[i] << (k - j - 1))];
int64* pX = pf + (s);
int64* pY = pf + (s + offset);
for (int32 k = 0; k < offset; k += 4)
{
//GS蝴蝶(Harvey,输入输出范围[0,2p))
XY = *pX + *pY;
XY -= ((P - XY) >> 63)&P;
T = *pX + (P - *pY);
*pX = XY;
*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
pX++;
pY++;
//GS蝴蝶(Harvey,输入输出范围[0,2p))
XY = *pX + *pY;
XY -= ((P - XY) >> 63)&P;
T = *pX + (P - *pY);
*pX = XY;
*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
pX++;
pY++;
//GS蝴蝶(Harvey,输入输出范围[0,2p))
XY = *pX + *pY;
XY -= ((P - XY) >> 63)&P;
T = *pX + (P - *pY);
*pX = XY;
*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
pX++;
pY++;
//GS蝴蝶(Harvey,输入输出范围[0,2p))
XY = *pX + *pY;
XY -= ((P - XY) >> 63)&P;
T = *pX + (P - *pY);
*pX = XY;
*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
pX++;
pY++;
}
}
else if (offset == 2 * h)
for (int32 i = 0; i < blocknum; i++)
{
int64 XY, T;
int32 s = i * blocksize; //块的起始位置
int64 ww = pW[m - (list[i] << (k - j - 1))]; //第j+1层第i个分块使用的单位根,w_{2^{j+1}}^{-brv_{j+1}(2i)}
int64 ww_pre = pW_pre[m - (list[i] << (k - j - 1))];
int64* pX = pf + (s);
int64* pY = pf + (s + offset);
for (int32 k = 0; k < h; k++)
{
//GS蝴蝶(Harvey,输入输出范围[0,2p))
XY = *pX + *pY;
XY -= ((P - XY) >> 63)&P;
T = *pX + (P - *pY);
*pX = XY;
*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
pX++;
pY++;
//GS蝴蝶(Harvey,输入输出范围[0,2p))
XY = *pX + *pY;
XY -= ((P - XY) >> 63)&P;
T = *pX + (P - *pY);
*pX = XY;
*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
pX++;
pY++;
}
}
else
for (int32 i = 0; i < blocknum; i++)
{
int64 XY, T;
int32 s = i * blocksize; //块的起始位置
int64 ww = pW[m - (list[i] << (k - j - 1))]; //第j+1层第i个分块使用的单位根,w_{2^{j+1}}^{-brv_{j+1}(2i)}
int64 ww_pre = pW_pre[m - (list[i] << (k - j - 1))];
int64* pX = pf + (s);
int64* pY = pf + (s + offset);
for (int32 k = 0; k < h; k++)
{
//GS蝴蝶(Harvey,输入输出范围[0,2p))
XY = *pX + *pY;
XY -= ((P - XY) >> 63)&P;
T = *pX + (P - *pY);
*pX = XY;
*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
pX++;
pY++;
}
}
}
//迭代结束,再整理一下
int64 tmp;
if (k >= 2)
for (int32 i = 0; i < n; i += 4)
{
tmp = BarrettMulMod(*pf, factor, factor_pre, p);
*pf = tmp - (tmp >= p)*p;
pf++;
tmp = BarrettMulMod(*pf, factor, factor_pre, p);
*pf = tmp - (tmp >= p)*p;
pf++;
tmp = BarrettMulMod(*pf, factor, factor_pre, p);
*pf = tmp - (tmp >= p)*p;
pf++;
tmp = BarrettMulMod(*pf, factor, factor_pre, p);
*pf = tmp - (tmp >= p)*p;
pf++;
}
else
for (int32 i = 0; i < n; i++)
{
tmp = BarrettMulMod(*pf, factor, factor_pre, p);
*pf = tmp - (tmp >= p)*p;
pf++;
}
}
void IncompleteNTT::convolution(int64* FG, int64* F, int64* G, int64 w)
{
for (int32 i = 0; i < h; i++)
{
FG[i] = 0;
int32 s = h + i;
for (int32 j = 0; j <= i; j++)
FG[i] = BarrettMod(FG[i] + G[j] * F[i - j], one_pre, p); //冗余,[0,2p)
for (int32 j = i + 1; j < h; j++)
{
int64 tmp = FG[i] + BarrettMod(w * G[j], one_pre, p) * F[s - j];
FG[i] = BarrettMod(tmp, one_pre, p); //冗余,[0,2p)
}
}
}
void IncompleteNTT::NTTMul(NTTRep& FG, NTTRep& F, NTTRep& G)
{
if (F.size() < n || G.size() < n)
ErrorInfo("%s\n", "F.size < n or G.size < n");
if (FG.size() < n)
FG.resize(n);
int64* pFG = FG.data();
int64* pF = F.data();
int64* pG = G.data();
NTTMul(pFG, pF, pG);
}
void IncompleteNTT::NTTMul(int64* pFG, int64* pF, int64* pG)
{
int64* pW = W.data();
int64 ww;
int64 r0, r1;
int64 P = 2 * p;
int32* list = brvlist[k].data();
if (h == 1)
if (k >= 2)
for (int32 i = 0; i < m; i += 4) //m个h长小多项式
{
//使用冗余表示,[0,2p)
*pFG = BarrettMod(*pF * *pG, one_pre, p);
pFG++;
pF++;
pG++;
//使用冗余表示,[0,2p)
*pFG = BarrettMod(*pF * *pG, one_pre, p);
pFG++;
pF++;
pG++;
//使用冗余表示,[0,2p)
*pFG = BarrettMod(*pF * *pG, one_pre, p);
pFG++;
pF++;
pG++;
//使用冗余表示,[0,2p)
*pFG = BarrettMod(*pF * *pG, one_pre, p);
pFG++;
pF++;
pG++;
}
else
for (int32 i = 0; i < m; i++) //m个h长小多项式
{
//使用冗余表示,[0,2p)
*pFG = BarrettMod(*pF * *pG, one_pre, p);
pFG++;
pF++;
pG++;
}
else if (h == 2)
if (k >= 2)
for (int32 i = 0; i < m; i += 4) //m个h长小多项式
{
int64 tmp;
//使用冗余表示,[0,2p)
ww = pW[list[i]]; //第k层第i个多项式使用的单位根,w_{2^k}^{brv_k(i)}
tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
pFG[0] = r0 - (((P - r0) >> 63)&P);
pFG[1] = r1 - (((P - r1) >> 63)&P);
pFG += 2;
pF += 2;
pG += 2;
//使用冗余表示,[0,2p)
ww = pW[list[i + 1]]; //第k层第i个多项式使用的单位根,w_{2^k}^{brv_k(i)}
tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
pFG[0] = r0 - (((P - r0) >> 63)&P);
pFG[1] = r1 - (((P - r1) >> 63)&P);
pFG += 2;
pF += 2;
pG += 2;
//使用冗余表示,[0,2p)
ww = pW[list[i + 2]]; //第k层第i个多项式使用的单位根,w_{2^k}^{brv_k(i)}
tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
pFG[0] = r0 - (((P - r0) >> 63)&P);
pFG[1] = r1 - (((P - r1) >> 63)&P);
pFG += 2;
pF += 2;
pG += 2;
//使用冗余表示,[0,2p)
ww = pW[list[i + 3]]; //第k层第i个多项式使用的单位根,w_{2^k}^{brv_k(i)}
tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
pFG[0] = r0 - (((P - r0) >> 63)&P);
pFG[1] = r1 - (((P - r1) >> 63)&P);
pFG += 2;
pF += 2;
pG += 2;
}
else
for (int32 i = 0; i < m; i++) //m个h长小多项式
{
//使用冗余表示,[0,2p)
ww = pW[list[i]]; //第k层第i个多项式使用的单位根,w_{2^k}^{brv_k(i)}
int64 tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
pFG[0] = r0 - (((P - r0) >> 63)&P);
pFG[1] = r1 - (((P - r1) >> 63)&P);
pFG += 2;
pF += 2;
pG += 2;
}
else
for (int32 i = 0; i < m; i++) //m个h长小多项式
{
ww = pW[list[i]]; //第k层第i个多项式使用的单位根,w_{2^k}^{brv_k(i)}
convolution(pFG, pF, pG, ww);
pFG += h;
pF += h;
pG += h;
}
}
void IncompleteNTT::Param()
{
cout << "p = " << p; pn;
cout << "w = " << w; pn;
cout << "k = " << k; pn;
cout << "h = " << h; pn;
cout << "n = " << n; pn;
cout << "factor = " << factor; pn;
}
//############################# 分割线 ###############################
void NegacyclicNTT::init(int64 p, int64 w, int32 k, int32 h, bool tag)
{
if (tag == 0)
IncompleteNTT::init(p, w, k, 2 * h); //构建父类IncompleteNTT,并初始化
else
IncompleteNTT::init(p, w, k + 1, h); //构建父类IncompleteNTT,并初始化
hh = h;
hm = m >> 1;
hn = n >> 1;
int64 hminv, pinv;
int64 gcd = exgcd(&hminv, &pinv, hm, p);
if (gcd != 1)
ErrorInfo("%s\n", "gcd(2^{k-1}, p) != 1");
this->factor = hminv < 0 ? hminv + p : hminv;
this->factor_pre = PreCompute(this->factor, p);
}
void NegacyclicNTT::FwdNTT(NTTRep& F, Poly& f)
{
int32 d = deg(f);
if (d >= this->hn)
ErrorInfo("%s\n", "deg(f) > hn - 1");
if (F.size() < this->hn)
F.resize(this->hn);
int64* pF = F.data();
int64* pf = f.data();
FwdNTT(pF, pf, f.size());
}
void NegacyclicNTT::FwdNTT(int64* pF, int64* pf, int32 flen)
{
if (pF != pf)
{
int32 nn = min(flen, hn);
for (int32 i = 0; i < nn; i++)
pF[i] = BarrettMod(pf[i], one_pre, p);
}
int64* pW = W.data();
int64* pW_pre = W_pre.data();
int64 P = 2 * p;
for (int32 j = 1; j < k; j++) //从j=1层开始迭代
{
int32 blocknum = 1 << j; //对j层分块
int32 blocksize = this->n >> j; //第j层块大小
int32 offset = blocksize >> 1; //计算第j+1层时蝴蝶的偏移量(块大小的一半)
int32* list = brvlist[j].data();
if (offset >= 4 * hh)
for (int32 i = blocknum >> 1; i < blocknum; i++) //只处理下半部分
{
int64 X, WY;
int32 s = (i - (blocknum >> 1)) * blocksize; //块的起始位置
int64 ww = pW[list[i] << (k - j - 1)]; //第j层第i个分块使用的单位根,w_{2^{j+1}}^{brv_{j+1}(2i)}
int64 ww_pre = pW_pre[list[i] << (k - j - 1)];
int64* pX = pF + (s);
int64* pY = pF + (s + offset);
for (int32 k = 0; k < offset; k += 4)
{
//CT蝴蝶(Harvey,输入输出范围[0,4p))
X = *pX;
X -= ((P - X) >> 63)&P;
WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
*pX = X + WY;
*pY = X + (P - WY);
pX++;
pY++;
//CT蝴蝶(Harvey,输入输出范围[0,4p))
X = *pX;
X -= ((P - X) >> 63)&P;
WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
*pX = X + WY;
*pY = X + (P - WY);
pX++;
pY++;
//CT蝴蝶(Harvey,输入输出范围[0,4p))
X = *pX;
X -= ((P - X) >> 63)&P;
WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
*pX = X + WY;
*pY = X + (P - WY);
pX++;
pY++;
//CT蝴蝶(Harvey,输入输出范围[0,4p))
X = *pX;
X -= ((P - X) >> 63)&P;
WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
*pX = X + WY;
*pY = X + (P - WY);
pX++;
pY++;
}
}
else if (offset == 2 * hh)
for (int32 i = blocknum >> 1; i < blocknum; i++) //只处理下半部分
{
int64 X, WY;
int32 s = (i - (blocknum >> 1)) * blocksize; //块的起始位置
int64 ww = pW[list[i] << (k - j - 1)]; //第j层第i个分块使用的单位根,w_{2^{j+1}}^{brv_{j+1}(2i)}
int64 ww_pre = pW_pre[list[i] << (k - j - 1)];
int64* pX = pF + (s);
int64* pY = pF + (s + offset);
for (int32 k = 0; k < hh; k++)
{
//CT蝴蝶(Harvey,输入输出范围[0,4p))
X = *pX;
X -= ((P - X) >> 63)&P;
WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
*pX = X + WY;
*pY = X + (P - WY);
pX++;
pY++;
//CT蝴蝶(Harvey,输入输出范围[0,4p))
X = *pX;
X -= ((P - X) >> 63)&P;
WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
*pX = X + WY;
*pY = X + (P - WY);
pX++;
pY++;
}
}
else
for (int32 i = blocknum >> 1; i < blocknum; i++) //只处理下半部分
{
int64 X, WY;
int32 s = (i - (blocknum >> 1)) * blocksize; //块的起始位置
int64 ww = pW[list[i] << (k - j - 1)]; //第j层第i个分块使用的单位根,w_{2^{j+1}}^{brv_{j+1}(2i)}
int64 ww_pre = pW_pre[list[i] << (k - j - 1)];
int64* pX = pF + (s);
int64* pY = pF + (s + offset);
for (int32 k = 0; k < hh; k++)
{
//CT蝴蝶(Harvey,输入输出范围[0,4p))
X = *pX;
X -= ((P - X) >> 63)&P;
WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
*pX = X + WY;
*pY = X + (P - WY);
pX++;
pY++;
}
}
}
//迭代结束,结果是hm=2^{k-1}个h长多项式
//约束范围到[0,2p)
if (k >= 2)
for (int32 i = 0; i < hn; i += 4)
{
*pF -= ((P - *pF) >> 63)&P;
pF++;
*pF -= ((P - *pF) >> 63)&P;
pF++;
*pF -= ((P - *pF) >> 63)&P;
pF++;
*pF -= ((P - *pF) >> 63)&P;
pF++;
}
else
for (int32 i = 0; i < hn; i++)
{
*pF -= ((P - *pF) >> 63)&P;
pF++;
}
}
void NegacyclicNTT::InvNTT(Poly& f, NTTRep& F)
{
int32 d = deg(F);
if (d >= this->hn)
ErrorInfo("%s\n", "deg(F) > hn - 1");
if (f.size() < this->hn)
f.resize(this->hn);
int64* pF = F.data();
int64* pf = f.data();
InvNTT(pf, pF);
}
void NegacyclicNTT::InvNTT(int64* pf, int64* pF)
{
if (pF != pf)
{
for (int32 i = 0; i < hn; i++)
pf[i] = pF[i];
}
int64* pW = W.data();
int64* pW_pre = W_pre.data();
int64 P = 2 * p;
for (int32 j = k - 1; j >= 1; j--) //回到第j=1层
{
int32 blocknum = 1 << j; //对j+1层分块
int32 blocksize = this->n >> j; //第j+1层块大小
int32 offset = blocksize >> 1; //计算第j层时蝴蝶的偏移量(块大小的一半)
int32* list = brvlist[j].data();
if (offset >= 4 * hh)
for (int32 i = blocknum >> 1; i < blocknum; i++) //只处理下半部分
{
int64 XY, T;
int32 s = (i - (blocknum >> 1)) * blocksize; //块的起始位置
int64 ww = pW[m - (list[i] << (k - j - 1))]; //第j+1层第i个分块使用的单位根,w_{2^{j+1}}^{-brv_{j+1}(2i)}
int64 ww_pre = pW_pre[m - (list[i] << (k - j - 1))];
int64* pX = pf + (s);
int64* pY = pf + (s + offset);
for (int32 k = 0; k < offset; k += 4)
{
//GS蝴蝶(Harvey,输入输出范围[0,2p))
XY = *pX + *pY;
XY -= ((P - XY) >> 63)&P;
T = *pX + (P - *pY);
*pX = XY;
*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
pX++;
pY++;
//GS蝴蝶(Harvey,输入输出范围[0,2p))
XY = *pX + *pY;
XY -= ((P - XY) >> 63)&P;
T = *pX + (P - *pY);
*pX = XY;
*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
pX++;
pY++;
//GS蝴蝶(Harvey,输入输出范围[0,2p))
XY = *pX + *pY;
XY -= ((P - XY) >> 63)&P;
T = *pX + (P - *pY);
*pX = XY;
*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
pX++;
pY++;
//GS蝴蝶(Harvey,输入输出范围[0,2p))
XY = *pX + *pY;
XY -= ((P - XY) >> 63)&P;
T = *pX + (P - *pY);
*pX = XY;
*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
pX++;
pY++;
}
}
else if (offset == 2 * hh)
for (int32 i = blocknum >> 1; i < blocknum; i++) //只处理下半部分
{
int64 XY, T;
int32 s = (i - (blocknum >> 1)) * blocksize; //块的起始位置
int64 ww = pW[m - (list[i] << (k - j - 1))]; //第j+1层第i个分块使用的单位根,w_{2^{j+1}}^{-brv_{j+1}(2i)}
int64 ww_pre = pW_pre[m - (list[i] << (k - j - 1))];
int64* pX = pf + (s);
int64* pY = pf + (s + offset);
for (int32 k = 0; k < hh; k++)
{
//GS蝴蝶(Harvey,输入输出范围[0,2p))
XY = *pX + *pY;
XY -= ((P - XY) >> 63)&P;
T = *pX + (P - *pY);
*pX = XY;
*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
pX++;
pY++;
//GS蝴蝶(Harvey,输入输出范围[0,2p))
XY = *pX + *pY;
XY -= ((P - XY) >> 63)&P;
T = *pX + (P - *pY);
*pX = XY;
*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
pX++;
pY++;
}
}
else
for (int32 i = blocknum >> 1; i < blocknum; i++) //只处理下半部分
{
int64 XY, T;
int32 s = (i - (blocknum >> 1)) * blocksize; //块的起始位置
int64 ww = pW[m - (list[i] << (k - j - 1))]; //第j+1层第i个分块使用的单位根,w_{2^{j+1}}^{-brv_{j+1}(2i)}
int64 ww_pre = pW_pre[m - (list[i] << (k - j - 1))];
int64* pX = pf + (s);
int64* pY = pf + (s + offset);
for (int32 k = 0; k < hh; k++)
{
//GS蝴蝶(Harvey,输入输出范围[0,2p))
XY = *pX + *pY;
XY -= ((P - XY) >> 63)&P;
T = *pX + (P - *pY);
*pX = XY;
*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
pX++;
pY++;
}
}
}
//迭代结束,再整理一下
int64 tmp;
if (k >= 2)
for (int32 i = 0; i < hn; i += 4)
{
tmp = BarrettMulMod(*pf, factor, factor_pre, p);
*pf = tmp - (tmp >= p)*p;
pf++;
tmp = BarrettMulMod(*pf, factor, factor_pre, p);
*pf = tmp - (tmp >= p)*p;
pf++;
tmp = BarrettMulMod(*pf, factor, factor_pre, p);
*pf = tmp - (tmp >= p)*p;
pf++;
tmp = BarrettMulMod(*pf, factor, factor_pre, p);
*pf = tmp - (tmp >= p)*p;
pf++;
}
else
for (int32 i = 0; i < hn; i++)
{
tmp = BarrettMulMod(*pf, factor, factor_pre, p);
*pf = tmp - (tmp >= p)*p;
pf++;
}
}
void NegacyclicNTT::NTTMul(NTTRep& FG, NTTRep& F, NTTRep& G)
{
if (F.size() < hn || G.size() < hn)
ErrorInfo("%s\n", "F.size < n or G.size < n");
if (FG.size() < hn)
FG.resize(hn);
int64* pFG = FG.data();
int64* pF = F.data();
int64* pG = G.data();
NTTMul(pFG, pF, pG);
}
void NegacyclicNTT::NTTMul(int64* pFG, int64* pF, int64* pG)
{
int64* pW = W.data();
int64 ww;
int64 r0, r1;
int64 P = 2 * p;
int32* list = brvlist[k].data() + hm;
if (h == 1)
if (k >= 2)
for (int32 i = 0; i < hm; i += 4) //m个h长小多项式
{
//使用冗余表示,[0,2p)
*pFG = BarrettMod(*pF * *pG, one_pre, p);
pFG++;
pF++;
pG++;
//使用冗余表示,[0,2p)
*pFG = BarrettMod(*pF * *pG, one_pre, p);
pFG++;
pF++;
pG++;
//使用冗余表示,[0,2p)
*pFG = BarrettMod(*pF * *pG, one_pre, p);
pFG++;
pF++;
pG++;
//使用冗余表示,[0,2p)
*pFG = BarrettMod(*pF * *pG, one_pre, p);
pFG++;
pF++;
pG++;
}
else
for (int32 i = 0; i < hm; i++) //m个h长小多项式
{
//使用冗余表示,[0,2p)
*pFG = BarrettMod(*pF * *pG, one_pre, p);
pFG++;
pF++;
pG++;
}
else if (h == 2)
if (k >= 3)
for (int32 i = 0; i < hm; i += 4) //hm个h长小多项式
{
int64 tmp;
//使用冗余表示,[0,2p)
ww = pW[list[i]]; //第k层第2^{k-1}+i个多项式使用的单位根,w_{2^k}^{brv_k(2^{k-1}+i)}
tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
pFG[0] = r0 - (((P - r0) >> 63)&P);
pFG[1] = r1 - (((P - r1) >> 63)&P);
pFG += 2;
pF += 2;
pG += 2;
//使用冗余表示,[0,2p)
ww = pW[list[i + 1]]; //第k层第2^{k-1}+i个多项式使用的单位根,w_{2^k}^{brv_k(2^{k-1}+i)}
tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
pFG[0] = r0 - (((P - r0) >> 63)&P);
pFG[1] = r1 - (((P - r1) >> 63)&P);
pFG += 2;
pF += 2;
pG += 2;
//使用冗余表示,[0,2p)
ww = pW[list[i + 2]]; //第k层第2^{k-1}+i个多项式使用的单位根,w_{2^k}^{brv_k(2^{k-1}+i)}
tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
pFG[0] = r0 - (((P - r0) >> 63)&P);
pFG[1] = r1 - (((P - r1) >> 63)&P);
pFG += 2;
pF += 2;
pG += 2;
//使用冗余表示,[0,2p)
ww = pW[list[i + 3]]; //第k层第2^{k-1}+i个多项式使用的单位根,w_{2^k}^{brv_k(2^{k-1}+i)}
tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
pFG[0] = r0 - (((P - r0) >> 63)&P);
pFG[1] = r1 - (((P - r1) >> 63)&P);
pFG += 2;
pF += 2;
pG += 2;
}
else
for (int32 i = 0; i < hm; i++) //hm个h长小多项式
{
//使用冗余表示,[0,2p)
ww = pW[list[i]]; //第k层第2^{k-1}+i个多项式使用的单位根,w_{2^k}^{brv_k(2^{k-1}+i)}
int64 tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
pFG[0] = r0 - (((P - r0) >> 63)&P);
pFG[1] = r1 - (((P - r1) >> 63)&P);
pFG += 2;
pF += 2;
pG += 2;
}
else
for (int32 i = 0; i < hm; i++) //hm个h长小多项式
{
ww = pW[list[i]]; //第k层第2^{k-1}+i个多项式使用的单位根,w_{2^k}^{brv_k(2^{k-1}+i)}
convolution(pFG, pF, pG, ww);
pFG += h;
pF += h;
pG += h;
}
}
PolyBarrett
#include "PolyBarrett.h"
// h = f / g mod p
void Div(Poly& h, Poly f, Poly g, int64 p)
{
int df = deg(f);
int dg = deg(g);
int64 a = g[dg];
int64 ainv = inv(a, p);
int64* pf = f.data();
int64* pg = g.data();
for (int i = df; i >= dg; i--)
{
int64 b = pf[i];
int64 c = (b * ainv) % p;
h[i - dg] = c;
pf[i] = 0;
if (c == 0)
continue;
int64* ptr = pf + (i - dg);
for (int j = 0; j < dg; j++)
{
ptr[j] -= c * pg[j];
ptr[j] %= p;
ptr[j] = ptr[j] < 0 ? ptr[j] + p : ptr[j];
}
}
}
void PolyBarrett::init(uint64 p, Poly& g, int32 acc, int32 h)
{
this->p = p;
this->dg = deg(g);
this->acc = acc;
this->k = dg + acc;
this->g = g;
this->N = 2 * dg + acc; //因为需要计算f*m
this->H = h;
this->K = ceil(log2(N / h));
this->n = pow(2, K)*H;
uint64 x = (this->n * p*p) >> K; // P > n*p^2
do {
x += 1;
P = (x << K) + 1;
} while (!Miller_Rabin(P, 10)); // 2^k | P-1
int HN = pow(2, K) / 2;
for (W = 2; W < P; W++)
if (pow_mod(W, HN, P) == P - 1) // order(w)=2^K
break;
Poly xk(k + 1);
xk[k] = 1;
this->m.resize(this->acc + 1);
Div(this->m, xk, g, p);
printf("Barrett of Polynomials: acc=%d, dg=%d, p=%lld -> ", acc, dg, p);
printf("P=%lld, W=%lld, K=%d, H=%d.\n", P, W, K, H);
this->NTT.init(P, W, K, H);
this->NTT.FwdNTT(this->M, this->m);
this->NTT.FwdNTT(this->G, this->g);
}
void PolyBarrett::Mod(int64* h, int64* f, int flen)
{
Poly t;
NTTRep T, F(this->n);
NTT.FwdNTT(F.data(), f, flen);
NTT.NTTMul(T, F, M);
NTT.InvNTT(t, T);
int64* pt = t.data();
for (int i = 0; i < this->n - this->k; i++)
{
pt[i] = pt[i + k] % p;
}
for (int i = this->n - this->k; i < this->n; i++)
{
pt[i] = 0;
}
NTT.FwdNTT(T, t);
NTT.NTTMul(T, T, G);
NTT.InvNTT(t, T);
for (int i = 0; i < this->dg; i++)
{
h[i] = (f[i] - pt[i]) % p;
h[i] += h[i] < 0 ? p : 0;
}
}
GFX
#include "GFX.h"
//g = f mod (x^n - 1)
void ModXnSubOne(GFX& g, GFX& f, int n)
{
int fn = f.n;
int fd = f.d;
int fp = f.p;
if (&g != &f)
for (int i = 0; i < fn; i++)
{
int64* p1 = g.rep[i%n].data();
int64* p2 = f.rep[i].data();
for (int j = 0; j < fd; j++)
{
p1[j] += p2[j];
p1[j] %= fp;
}
}
else
for (int i = n; i < fn; i++)
{
int64* p1 = g.rep[i%n].data();
int64* p2 = f.rep[i].data();
for (int j = 0; j < fd; j++)
{
p1[j] += p2[j];
p1[j] %= fp;
p2[j] = 0;
}
}
}
//g = f mod m(x),p
void Modm(int64* g, int64* f, int flen, Poly& m, uint64 p)
{
int df;
for (df = flen - 1; df >= 0; df--)
if (f[df] != 0)
break;
int dm = deg(m);
if (df < dm)
{
for (int i = 0; i <= df; i++)
g[i] = f[i] % p;
return;
}
int64* pf = new int64[df + 1];
for (int i = 0; i < df + 1; i++)
pf[i] = f[i];
int64 a = m[dm];
int64 ainv = inv(a, p);
int64* pm = m.data();
for (int i = df; i >= dm; i--)
{
int64 b = pf[i];
int64 c = (b * ainv) % p;
g[i - dm] = c;
pf[i] = 0;
if (c == 0)
continue;
int64* ptr = pf + (i - dm);
for (int j = 0; j < dm; j++)
{
ptr[j] -= c * pm[j];
ptr[j] %= p;
ptr[j] = ptr[j] < 0 ? ptr[j] + p : ptr[j];
}
}
for (int i = 0; i < dm; i++)
g[i] = pf[i];
delete pf;
}
ostream& operator<<(ostream& cout, GFX& f)
{
int deg1 = f.deg();
printf("[");
for (int i = 0; i <= deg1; i++)
{
int deg2 = deg(f.rep[i]);
int64* p = f.rep[i].data();
printf(" [");
for (int j = 0; j <= deg2; j++)
{
printf(" %lld ", p[j]);
}
printf("] ");
}
printf("]");
return cout;
}
//c = a+b
void GFX_Calculator::Add(GFX&c, GFX& a, GFX&b)
{
int d = a.d;
int n = a.n;
uint64 p = a.p;
for (int i = 0; i < n; i++)
{
int64* pa = a.rep[i].data();
int64* pb = b.rep[i].data();
int64* pc = c.rep[i].data();
for (int j = 0; j < d; j++)
{
pc[j] = pa[j] + pb[j];
pc[j] -= ((p - pc[j]) >> 63)&p; //if c>p then c = c-p
}
}
}
//c = a-b
void GFX_Calculator::Sub(GFX&c, GFX& a, GFX&b)
{
int d = a.d;
int n = a.n;
uint64 p = a.p;
for (int i = 0; i < n; i++)
{
int64* pa = a.rep[i].data();
int64* pb = b.rep[i].data();
int64* pc = c.rep[i].data();
for (int j = 0; j < d; j++)
{
pc[j] = pa[j] - pb[j];
pc[j] += (pc[j] >> 63)&p; //if c<0 then c = c+p
}
}
}
//寻找合适的Z_P,以及2^k次单位根w
void GFX_Calculator::findP(int d, int n, uint64 p, uint64& P, uint64& w, int& k, int h)
{
int N = 2 * ((2 * d)*n);
k = ceil(log2(N / h));
int N2 = pow(2, k); // N2 * h = h*2^k >= N
uint64 x = (N2*h * p*p) >> k; // P > n*p^2
do {
x += 1;
P = (x << k) + 1;
} while (!Miller_Rabin(P, 10)); // 2^k | P-1
int HN2 = N2 >> 1;
for (w = 2; w < P; w++)
if (pow_mod(w, HN2, P) == P - 1) // order(w)=N2
break;
}
//c = a*b,GF(p^d)=Z_p[x]/(m(x))
void GFX_Calculator::Mul(GFX&c, GFX& a, GFX&b)
{
int d = a.d;
int n = a.n;
uint64 p = a.p;
int BlockSize = 2 * d;
int BlockNum = 2 * n;
int Len = BlockSize * BlockNum;
Poly A(Len);
Poly B(Len);
Poly C(Len);
for (int i = 0; i < n; i++)
{
int64* pa = a.rep[i].data();
int64* pb = b.rep[i].data();
int64* pA = A.data() + (2 * d)*i;
int64* pB = B.data() + (2 * d)*i;
for (int j = 0; j < d; j++)
{
pA[j] = pa[j];
pB[j] = pb[j];
}
}
NTTRep A2, B2, C2;
intt.FwdNTT(A2, A);
intt.FwdNTT(B2, B);
intt.NTTMul(C2, A2, B2);
intt.InvNTT(C, C2); //C=A*B
int64* pC = C.data();
for (int i = 0; i < Len; i++)
pC[i] %= p;
/*cout << "A = " << A; pn;
cout << "B = " << B; pn;
cout << "C = " << C; pn;*/
c.zero();
for (int i = 0; i < BlockNum; i++)
{
int64* pC = C.data() + BlockSize * i;
PB.Mod(c.rep[i].data(), pC, BlockSize);
}
}
//c = a*b,GF(p^d)=Z_p[x]/(m(x))
void GFX_Calculator::Mul2(GFX&c, GFX& a, GFX&b)
{
int d = a.d;
int n = a.n;
uint64 p = a.p;
int BlockSize = 2 * d;
int BlockNum = 2 * n;
int Len = BlockSize * BlockNum;
Poly A(Len);
Poly B(Len);
Poly C(Len);
for (int i = 0; i < n; i++)
{
int64* pa = a.rep[i].data();
int64* pb = b.rep[i].data();
int64* pA = A.data() + (2 * d)*i;
int64* pB = B.data() + (2 * d)*i;
for (int j = 0; j < d; j++)
{
pA[j] = pa[j];
pB[j] = pb[j];
}
}
NTTRep A2, B2, C2;
intt.FwdNTT(A2, A);
intt.FwdNTT(B2, B);
intt.NTTMul(C2, A2, B2);
intt.InvNTT(C, C2); //C=A*B
int64* pC = C.data();
for (int i = 0; i < Len; i++)
pC[i] %= p;
/*cout << "A = " << A; pn;
cout << "B = " << B; pn;
cout << "C = " << C; pn;*/
c.zero();
for (int i = 0; i < BlockNum; i++)
{
int64* pC = C.data() + BlockSize * i;
Modm(c.rep[i].data(), pC, BlockSize, m, p);
}
}
结果
GFX Mul
m = [ 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 ]
GFX Mul: d=149,n=53,p=2 -> P=147457, W=22, K=14, H=2.
Barrett of Polynomials: acc=149, dg=149, p=2 -> P=3329, W=17, K=8, H=2.
a = [ [ 1 ] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [ 1 ] ]
b = [ [ 1 0 1 ] [ 1 ] ]
10 rounds
使用Barrett:a*b = [ [ 1 0 1 ] [ 1 ] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [ 1 0 1 ] [ 1 ] ]
0.155162 s
使用长除法:a*b = [ [ 1 0 1 ] [ 1 ] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [ 1 0 1 ] [ 1 ] ]
0.291891 s
用长除法计算:a*b mod (x^n - 1) = [ [ 0 0 1 ] [ 1 ] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [ 1 0 1 ] ]
0.0015902 s
|