1. 程式人生 > 其它 >省選日記 Day-19 - Day-15

省選日記 Day-19 - Day-15

省選日記 Day \(-19\) - Day \(-15\)

Day \(-19\) Mar 15, 2022, Tuesday

今天的模擬賽特別離譜, 三道題裡面兩道多項式.

In my opinion, problem A, C is supposed to be exist in a math book, not in a fucking contest.

B

這個題還稍微親民一些, 我想到了一個無法寫也無法 Hack 的輪廓線, 即使是對的, 寫出來碼量也十分恐怖.

我們先使理論值最大, 然後假設所有額外值都取絕對值, 這樣就會存在一些統計錯誤的額外值, 我們需要思考如何要麼改變選擇使這個額外值合法, 要麼正確統計這個額外值. 毫無疑問, 使答案合法一定會減少某個數, 我們只要想辦法儘可能減少減去的數字. 為了計算這個差值的最小值, 我們使用最小割.

連邊很簡單, 只要從源點開始向選擇的值為正的點連對應理論值的兩倍的邊, 然後從選擇的值為負的點向匯點連理論值兩倍的邊, 最後在相鄰的格子的點之間相互連額外值連線對應額外值絕對值兩倍的邊.

在這個圖上, 如果有從源點到匯點的增廣路, 那麼說明這裡面的每條邊至少有一條是不合法的, 我們需要割掉一條來使得它合法. 所以這個圖的最小割便是我們使答案合法需要減去的最小的值.

int a[1005][1005];
unsigned c[205];
unsigned char b[1005][1005];
int C;
unsigned m, n, P;
unsigned A, B, D, t;
unsigned Cnt(0), Ans(0), Tmp(0);
struct Node;
struct Edge {
  Node* To;
  unsigned Inv, Con;
};
struct Node {
  vector<Edge> E;
  unsigned Frm, Dep;
}N[205];
inline void Clr() {
  memset(b, 0, sizeof(b));
  for (Node* i(N + P + 1); i >= N; --i) i->E.clear();
  n = RD(), m = RD(), P = RD(), Ans = Tmp = 0;
}
inline void Link (Node* x, Node* y, unsigned Val) {
  x->E.push_back({y, y->E.size(), Val});
  y->E.push_back({x, x->E.size() - 1, 0});
}
inline void Judge (int z, unsigned x, unsigned y) {
  if(!x) return;
  unsigned TMul(c[x] * c[y]);
  Link(N + x, N + y, TMul), Link(N + y, N + x, TMul), Ans += TMul;
}
inline char BFS() {
  Node* Que[P + 2], **Hd(Que), **Tl(Que);
  for (Node* i(N + P + 1); i >= N; --i) i->Frm = 0, i->Dep = 0x3f3f3f3f;
  (*(++Hd) = N)->Dep = 0;
  while (Tl != Hd) {
    Node* Cur(*(++Tl));
    for (auto i:Cur->E) if((i.Con) && (i.To->Dep >= 0x3f3f3f3f)) 
      (*(++Hd) = i.To)->Dep = Cur->Dep + 1;
  }
  return N[P + 1].Dep < 0x3f3f3f3f;
}
inline unsigned DFS(Node* x, unsigned Come) {
  if(x == N + P + 1) return Come;
  unsigned Gone(0);
  for (unsigned &i(x->Frm); Come && (i < x->E.size()); ++i) if(x->E[i].Con && (x->E[i].To->Dep > x->Dep)) {
    unsigned Succ(DFS(x->E[i].To, min(Come, x->E[i].Con)));
    Come -= Succ, x->E[i].Con -= Succ, x->E[i].To->E[x->E[i].Inv].Con += Succ, Gone += Succ;
  }
  return Gone;
}
signed main() {
  t = RD();
  for (unsigned T(1); T <= t; ++T){
    Clr();
    for (unsigned i(1); i <= n; ++i) for (unsigned j(1); j <= m; ++j) a[i][j] = RDsg();
    for (unsigned i(1); i <= P; ++i) {
      A = RD(), B = RD(), b[A][B] = i, C = a[A][B] * (int)(c[i] = RD());
      if(a[A][B] > 0) Link(N, N + i, C), Ans += C;
      else Link(N + i, N + P + 1, -C), Ans += -C;
      C = a[A][B];
      Judge(a[A - 1][B], b[A - 1][B], i), Judge(a[A][B - 1], b[A][B - 1], i);
      Judge(a[A + 1][B], b[A + 1][B], i), Judge(a[A][B + 1], b[A][B + 1], i);
    }
    while (BFS()) Tmp += DFS(N, 0x3f3f3f3f);
    printf("%u\n", Ans - (Tmp << 1));
  }
  return Wild_Donkey;
}

Day \(-18\) Mar 16, 2022, Wednesday

今天的題, 我搞出了個 T1 的亂搞 \(100'\), 但是事後叉掉了. 這個問題是等價於傳遞閉包的, 所以我的做法如果是真的, 就可以拿圖靈獎了.

I'm supposed to judge the connection between two points on a DAG.

Calculate the topological order.

Calculate the DFS order.

Compare the orders and make a judgement.

正解當然是 bitset 優化分塊查詢了.

SDOI2013 森林

先考慮靜態的問題:

我們知道, 線段樹一旦可持久化了, 它就變成了好多線段樹. 在這些線段樹上面二分, 可以解決區間第 \(k\) 大問題.

對於樹上的路徑, 可以拆成兩個點到 LCA 的路徑. 為了在兩個路徑上二分, 我們以 DFS 序為時間軸, 建立可持久化權值線段樹. 每個點所在的版本儲存它到根節點路徑上不同權值的出現次數. 我們可以通過差分來完成可持久化權值線段樹的建立.

查詢時, 先找到路徑的 LCA, 然後在兩個點的版本和 LCA 的父親的版本這三個版本的基礎上進行線段樹上二分, 即可找到我們想要的權值.

接下來考慮動態版本. 因為這個東西它只有連邊沒有刪邊, 所以我們可以進行啟發式合併. 直接把較小的樹接到較大的下面去即可. 每個點重構 LCA 的倍增陣列需要 \(O(\log n)\) 複雜度, 總共最多合併 \(O(n\log n)\) 個點, 所以總複雜度 \(O(n\log^2 n)\). 詢問的複雜度是 \(O(\log n)\), 因此總複雜度是 \(O(n\log^2 n)\).

unsigned b[80005], m, n, Q, Top(0);
unsigned A, B, C, D, t;
unsigned Cnt(0), Ans(0), Tmp(0);
char Op[3];
struct Seg {
  Seg *LS, *RS;
  unsigned Val;
  inline void Find(Seg* x, Seg* y, Seg* z, unsigned L, unsigned R) {
    if(L == R) {D = L;return;}
    unsigned Tot(LS->Val + x->LS->Val - y->LS->Val - z->LS->Val), Mid((L + R) >> 1);
    if(C <= Tot) LS->Find(x->LS, y->LS, z->LS, L, Mid);
    else C -= Tot, RS->Find(x->RS, y->RS, z->RS, Mid + 1, R);
  }
}S[10000005], *CntS(S);
inline void Pls(Seg* x, Seg* y, unsigned L, unsigned R) {
  y->Val = x->Val + 1;
  if(L == R) return;
  unsigned Mid((L + R) >> 1);
  if(C <= Mid) Pls(x->LS, y->LS = ++CntS, L, Mid), y->RS = x->RS;
  else Pls(x->RS, y->RS = ++CntS, Mid + 1, R), y->LS = x->LS;
}
struct Node {
  vector <Node*> E;
  Node* Fa[18];
  Seg* Root;
  unsigned Dep, Val, Size;
  inline void Construct() {
    Node* Fa0(Fa[0]);
    Dep = Fa0->Dep + 1, Size = 1, C = Val, Pls(Fa0->Root, Root, 1, Top);
    memset(Fa, 0, sizeof(Fa)), Fa[0] = Fa0;
    for (unsigned i(0); Fa[i]; ++i) Fa[i + 1] = Fa[i]->Fa[i];
    for (auto i:E) if(i != Fa[0]) i->Fa[0] = this, i->Construct(), Size += i->Size;
  }
}N[80005];
signed main() {
  RD(), n = RD(), m = RD(), Q = RD(), N->Root = S;
  for (unsigned i(1); i <= n; ++i) b[i] = N[i].Val = RD(), N[i].Root = ++CntS;
  sort(b + 1, b + n + 1), Top = unique(b + 1, b + n + 1) - b;
  for (unsigned i(1); i <= n; ++i) N[i].Val = lower_bound(b + 1, b + Top, N[i].Val) - b;
  --Top, S->LS = S->RS = S, N->Fa[0] = N;
  for (unsigned i(1); i <= m; ++i) A = RD(), B = RD(), N[A].E.push_back(N + B), N[B].E.push_back(N + A);
  for (unsigned i(1); i <= n; ++i) if(!N[i].Fa[0]) N[i].Fa[0] = N, N[i].Construct();
  for (unsigned i(1); i <= Q; ++i) {
    scanf("%s", Op), A = (RD() ^ Ans), B = (RD() ^ Ans);
    if(Op[0] == 'Q') {
      C = (RD() ^ Ans);
      Node *CurA(N + A), *CurB(N + B);
      if(CurA->Dep < CurB->Dep) swap(CurA, CurB), swap(A, B);
      for (unsigned j(16); ~j; --j) {
        if(CurA->Fa[j] && (CurA->Fa[j]->Dep >= CurB->Dep)) CurA = CurA->Fa[j];
      }
      if(CurA == CurB) S->Find(N[A].Root, N[B].Fa[0]->Root, S, 1, Top);
      else {
        for (unsigned j(16); ~j; --j) if(CurA->Fa[j] != CurB->Fa[j]) CurA = CurA->Fa[j], CurB = CurB->Fa[j];
        N[A].Root->Find(N[B].Root, CurA->Fa[0]->Root, CurA->Fa[1]->Root, 1, Top);
      }
      printf("%u\n", Ans = b[D]);
    } else {
      Node *RtA(N + A), *RtB(N + B);
      for (unsigned j(16); ~j; --j) if((RtA->Fa[j]) && (RtA->Fa[j] != N)) RtA = RtA->Fa[j];
      for (unsigned j(16); ~j; --j) if((RtB->Fa[j]) && (RtB->Fa[j] != N)) RtB = RtB->Fa[j];
      if(RtA->Size < RtB->Size) swap(A, B), swap(RtA, RtB);
      N[B].Fa[0] = N + A, N[B].Construct(), RtA->Size += N[B].Size;
      N[B].E.push_back(N + A), N[A].E.push_back(N + B);
    }
  }
  return Wild_Donkey;
}

Day \(-17\) Mar 17, 2022, Thursday

今天的模擬賽可以說是二分大賽了.

T1 是個大水題, 二分答案即可.

T2 首先是分顏色討論, 接下來求答案的過程就大同小異了. 其實有線性, 原理也很簡單, 就是雙指標掃描保證花費在限制內且長度儘可能大即可. 也有不同的二分的做法, 可以二分最終的長度, 每次用雙指標直接判斷是否存在花費合理的. 也可以列舉中點, 然後二分半徑, 對每個中點求出答案, 取最大值.

ZJOI2017 樹狀陣列

這個題建立的樹狀陣列相當於是維護和查詢字尾和, 所以查詢結果錯誤當且僅當 \(L - 1\)\(R\) 兩個元素不相同的時候.

接下來考慮概率.

一開始我是這樣想的: 這簡單啊, 我維護每個點變成 \(1\) 的概率, 查詢的時候直接查不就好了嗎.

但是如果兩個點被同一次操作改變了概率, 那麼兩個點的概率就不獨立了, 也就是說, 兩個點不能同時被這個操作修改, 所以根據各自的概率不能計算出結果.

對某個詢問有影響的操作有三種情況: 只修改左端點, 只修改右端點, 兩個端點都修改.

前兩種操作可以進行 CDQ 分治, 解決 \(4\) 維偏序問題. 每個元素記錄四個值 \(L, R, Time, Type\). 前兩個值是詢問的 \(L - 1\)\(R\) 兩個點, 修改是記錄兩個端點 \(L\), \(R\), \(Time\) 就是操作的時間, \(Type = 0\) 表示這個操作是修改, 否則是查詢. 第一個型別的修改中對詢問 \(i\) 有貢獻的修改是滿足 \(L_j \leq L_i\), \(R_j < R_i\), \(Time_j < Time_i\), \(Type_j < Type_i\)\(j\). 我們對 \(Time\) 進行分治, 對 \(R\) 排序, 對 \(L\) 用帶回滾的線段樹維護. 由於 \(Type\) 只有兩個可能的取值, 所以可以掃描時直接判斷. 第二個型別的貢獻將上面的過程對稱一下即可.

現在已經算出了兩個貢獻使每個詢問的兩個端點獨立變成 \(1\) 的概率了, 接下來我們需要對每個詢問計算兩個端點被第三個貢獻修改其中一個的概率. 對於每個詢問 \(i\), 產生貢獻的修改為滿足 \(L_j \leq L_i\), \(R_j \geq R_i\), \(Time_j < Time_i\), \(Type_j < Type_i\)\(j\).

這樣做是麻煩且大常數的, 還有一個東西也可以維護三維偏序問題: 二維線段樹. 我們用它對平面上不同的點 \((L, R)\) 進行維護即可.

如果一個操作有 \(a\) 的概率使得 \(L\) 點和 \(R\) 點之一的狀態發生改變, 那麼 \((L, R)\) 不同的概率 \(x\) 就會變成:

\[\begin{aligned} x' &= a(1 - x) + (1 - a)x\\ &= a - ax + x - ax\\ &= (1 - 2a)x + a\\ \end{aligned} \]

如果先後進行改變概率為 \(a\), \(b\) 的兩次操作, 那麼 \(x\) 會變成:

\[\begin{aligned} x' &= (1 - 2b)((1 - 2a)x + a) + b\\ x' &= (1 - 2a)x + a - 2b((1 - 2a)x + a) + b\\ x' &= (1 - 2a - 2b + 4ab)x + a - 2ab + b\\ \end{aligned} \]

容易發現, 交換 \(a\), \(b\) 的順序不會影響結果, 所以我們不必關心操作的順序. 在內層線段樹每個葉子上存這個線段上的值經過操作後會乘多少加多少, 最後直接合並即可.

#define Mn(x) (x)-=(((x)>=Mod)?Mod:0)
const unsigned long long Mod(998244353); 
unsigned Inv[100005];
unsigned long long OpM, OpP, Ans;
unsigned m, n, A, B, C, D, E, F, t;
inline unsigned long long Udt(const unsigned& x) {unsigned Rt(((Mod - x) << 1) + 1); Mn(Rt); return Rt; }
struct Inner {
  unsigned LS, RS, Opt;
}I[41225005], *CntI(I), *Border(I);
inline void PsDw(Inner* x) {
  unsigned long long Tmp(Udt(x->Opt));
  if(!(x->LS)) I[x->LS = ++CntI - I] = {0, 0, 0};
  I[x->LS].Opt = (I[x->LS].Opt * Tmp + x->Opt) % Mod;
  if(!(x->RS)) I[x->RS = ++CntI - I] = {0, 0, 0};
  I[x->RS].Opt = (I[x->RS].Opt * Tmp + x->Opt) % Mod;
  x->Opt = 0;
}
inline void Chg(Inner* x, unsigned L, unsigned R) {
  if((C <= L) && (R <= D)) {
    x->Opt = (x->Opt * OpM + OpP) % Mod;
    return;
  }
  PsDw(x);
  unsigned Mid((L + R) >> 1);
  if(C <= Mid) Chg(I + x->LS, L, Mid);
  if(D > Mid) Chg(I + x->RS, Mid + 1, R);
}
inline void Find(Inner* x, unsigned L, unsigned R) {
  if(!(x->LS)) {Ans = (Ans * Udt(x->Opt) + x->Opt) % Mod; return;}
  PsDw(x);
  unsigned Mid((L + R) >> 1);
  if(B <= Mid) Find(I + x->LS, L, Mid);
  else Find(I + x->RS, Mid + 1, R);
}
struct Node {
  unsigned LS, RS, My;
}N[200005], *CntN(N);
inline void Chg(Node*x, unsigned L, unsigned R) {
  if((A <= L) && (R <= B)) {Chg(I + x->My, 1, n);return;}
  unsigned Mid((L + R) >> 1);
  if(A <= Mid) Chg(N + x->LS, L, Mid);
  if(B > Mid) Chg(N + x->RS, Mid + 1, R);
}
inline void Find(Node*x, unsigned L, unsigned R) {
  Find(I + x->My, 1, n);
  if(L == R) return;
  unsigned Mid((L + R) >> 1);
  if(A <= Mid) Find(N + x->LS, L, Mid);
  else Find(N + x->RS, Mid + 1, R);
}
inline void Build(Node* x, unsigned L, unsigned R) {
  I[x->My = ++CntI - I] = {0, 0, 0};
  if(L == R) return;
  unsigned Mid((L + R) >> 1);
  Build(N + (x->LS = ++CntN - N), L, Mid);
  Build(N + (x->RS = ++CntN - N), Mid + 1, R);
}
signed main() {
  n = RD(), m = RD(), Build(N, 0, n), Inv[1] = 1, *Border = {0, 0, 0};
  for (unsigned i(2); i <= n; ++i) Inv[i] = (Mod - (Mod / i)) * Inv[Mod % i] % Mod;
  for (unsigned i(1); i <= m; ++i) {
    t = RD(), A = RD(), B = RD();
    if(t & 1) {
      OpP = Inv[B - A + 1], OpM = Udt(OpP), E = A, F = B;
      if(F < n) A = E, B = F, C = F + 1, D = n, Chg(N, 1, n);
      if(E > 1) A = 1, B = E - 1, C = E, D = F, Chg(N, 1, n);
      C = A = E, D = B = F, OpP <<= 1, Mn(OpP), OpM = Udt(OpP), Chg(N, 1, n);
      OpP = (OpP * 499122177 % Mod) * (F - E) % Mod, OpM = Udt(OpP), C = E, D = F, Chg(Border, 1, n);
      OpP = 1, OpM = 998244352;
      if(E > 1) C = 1, D = E - 1, Chg(Border, 1, n);
      if(F < n) C = F + 1, D = n, Chg(Border, 1, n);
    }
    else {--A, Ans = 1; if(A) Find(N, 1, n); else Find(Border, 1, n); printf("%llu\n", Ans);}
  }
  return Wild_Donkey;
}

事情遠沒有這麼簡單, 空間卡死了!!!! \(500MB\) 的空間, 人們一般都用了 \(460MB\) 以上. 當然, 作為高貴的指標使用者我一定是寄了.

#define Mn(x) (x)-=(((x)>=Mod)?Mod:0)
const unsigned long long Mod(998244353); 
unsigned Inv[100005];
unsigned long long OpM, OpP, Ans;
unsigned m, n, A, B, C, D, E, F, t;
inline unsigned long long Udt(const unsigned& x) {unsigned Rt(((Mod - x) << 1) + 1); Mn(Rt); return Rt; }
struct Inner {
  unsigned LS, RS, Opt;
}I[41225005], *CntI(I), *Border(I);
inline void PsDw(Inner* x) {
  unsigned long long Tmp(Udt(x->Opt));
  if(!(x->LS)) I[x->LS = ++CntI - I] = {0, 0, 0};
  I[x->LS].Opt = (I[x->LS].Opt * Tmp + x->Opt) % Mod;
  if(!(x->RS)) I[x->RS = ++CntI - I] = {0, 0, 0};
  I[x->RS].Opt = (I[x->RS].Opt * Tmp + x->Opt) % Mod;
  x->Opt = 0;
}
inline void Chg(Inner* x, unsigned L, unsigned R) {
  if((C <= L) && (R <= D)) {
    x->Opt = (x->Opt * OpM + OpP) % Mod;
    return;
  }
  PsDw(x);
  unsigned Mid((L + R) >> 1);
  if(C <= Mid) Chg(I + x->LS, L, Mid);
  if(D > Mid) Chg(I + x->RS, Mid + 1, R);
}
inline void Find(Inner* x, unsigned L, unsigned R) {
  if(!(x->LS)) {Ans = (Ans * Udt(x->Opt) + x->Opt) % Mod; return;}
  PsDw(x);
  unsigned Mid((L + R) >> 1);
  if(B <= Mid) Find(I + x->LS, L, Mid);
  else Find(I + x->RS, Mid + 1, R);
}
struct Node {
  unsigned LS, RS, My;
}N[200005], *CntN(N);
inline void Chg(Node*x, unsigned L, unsigned R) {
  if((A <= L) && (R <= B)) {Chg(I + x->My, 1, n);return;}
  unsigned Mid((L + R) >> 1);
  if(A <= Mid) Chg(N + x->LS, L, Mid);
  if(B > Mid) Chg(N + x->RS, Mid + 1, R);
}
inline void Find(Node*x, unsigned L, unsigned R) {
  Find(I + x->My, 1, n);
  if(L == R) return;
  unsigned Mid((L + R) >> 1);
  if(A <= Mid) Find(N + x->LS, L, Mid);
  else Find(N + x->RS, Mid + 1, R);
}
inline void Build(Node* x, unsigned L, unsigned R) {
  I[x->My = ++CntI - I] = {0, 0, 0};
  if(L == R) return;
  unsigned Mid((L + R) >> 1);
  Build(N + (x->LS = ++CntN - N), L, Mid);
  Build(N + (x->RS = ++CntN - N), Mid + 1, R);
}
signed main() {
  n = RD(), m = RD(), Build(N, 0, n), Inv[1] = 1, *Border = {0, 0, 0};
  for (unsigned i(2); i <= n; ++i) Inv[i] = (Mod - (Mod / i)) * Inv[Mod % i] % Mod;
  for (unsigned i(1); i <= m; ++i) {
    t = RD(), A = RD(), B = RD();
    if(t & 1) {
      OpP = Inv[B - A + 1], OpM = Udt(OpP), E = A, F = B;
      if(F < n) A = E, B = F, C = F + 1, D = n, Chg(N, 1, n);
      if(E > 1) A = 1, B = E - 1, C = E, D = F, Chg(N, 1, n);
      C = A = E, D = B = F, OpP <<= 1, Mn(OpP), OpM = Udt(OpP), Chg(N, 1, n);
      OpP = (OpP * 499122177 % Mod) * (F - E) % Mod, OpM = Udt(OpP), C = E, D = F, Chg(Border, 1, n);
      OpP = 1, OpM = 998244352;
      if(E > 1) C = 1, D = E - 1, Chg(Border, 1, n);
      if(F < n) C = F + 1, D = n, Chg(Border, 1, n);
    }
    else {--A, Ans = 1; if(A) Find(N, 1, n); else Find(Border, 1, n); printf("%llu\n", Ans);}
  }
  return Wild_Donkey;
}

Day \(-16\) Mar 18, 2022, Friday

IOI2018 狼人

想了半天沒思路, 去偷瞄了一眼題解, 看到了 Kruskal 字樣, 看到這道題有標籤 主席樹 便有了點想法.

我們根據點的編號正序倒序建兩棵樹, 在第一棵樹上倍增找到 \(< R_i\) 的極大連通塊 (也就是一個子樹), 在第二棵樹上找對應的 \(> L_i\) 的子樹. 問題轉化為兩個子樹內是否存在相同編號的點. 這個點既可以被起點合法到達, 也可以合法到達終點, 我們就可以在這個點上變身了.

對第一棵樹求 DFS 序, 我們在第二棵樹上同樣編號的節點上儲存這個 DFS 序作為權值. 相當於求一棵子樹是否存在權值在特定區間內的點, 可以用可持久化權值線段樹來查詢.

unsigned Lst[200005], FS[200005], Stack[200005], STop(0);
inline unsigned FindS(unsigned x) {
  while (FS[x] ^ x) Stack[++STop] = x, x = FS[x];
  while (STop) FS[Stack[STop--]] = x;
  return x;
}
unsigned A, B, C, D, t;
unsigned Cnt(0), Ans(0), Tmp(0);
struct Seg {
  Seg *LS, *RS;
  unsigned Val;
}S[8000005], *Ver[200005], *CntS(S);
inline void Build(Seg* x, unsigned L, unsigned R) {
  x->Val = 0, x->LS = x->RS = NULL;
  if(L == R) return;
  unsigned Mid((L + R) >> 1);
  Build(x->LS = ++CntS, L, Mid);
  Build(x->RS = ++CntS, Mid + 1, R);
}
inline void Add(Seg* x, Seg* y, unsigned L, unsigned R){
  y->Val = x->Val + 1;
  if(L == R) return;
  unsigned Mid((L + R) >> 1);
  if(A <= Mid) y->RS = x->RS, Add(x->LS, y->LS = ++CntS, L, Mid);
  else y->LS = x->LS, Add(x->RS, y->RS = ++CntS, Mid + 1, R);
}
inline unsigned Find(Seg* x, Seg* y, unsigned L, unsigned R) {
  if((A <= L) && (R <= B)) return y->Val - x->Val;
  unsigned Mid((L + R) >> 1);
  if((A <= Mid) && (Find(x->LS, y->LS, L, Mid))) return 1;
  if((B > Mid) && (Find(x->RS, y->RS, Mid + 1, R))) return 1;
  return 0;
}
struct Tree {
  vector<Tree*> Son;
  Tree* Fa[18];
  unsigned DFSr, Size;
  inline void GetF() { for (unsigned i(0); Fa[i]; ++i) Fa[i + 1] = Fa[i]->Fa[i]; }
  inline void DFS() {
    DFSr = ++Cnt, Size = 1;
    for (auto i:Son) i->DFS(), Size += i->Size;
  }
}T[400005], *T2;
struct Node {
  vector <Node*> E;
}N[200005];
inline Tree* Get1(Tree*x, unsigned y) {
  for (unsigned i(17); ~i; --i) if((x->Fa[i]) && (x->Fa[i] - T <= y)) x = x->Fa[i];
  return x;
}
inline Tree* Get2(Tree*x, unsigned y) {
  for (unsigned i(17); ~i; --i) if((x->Fa[i]) && (x->Fa[i] - T2 >= y)) x = x->Fa[i];
  return x;
}
unsigned m, n, Q;
signed main() {
  n = RD(), m = RD(), Q = RD(), T2 = T + n, Build(Ver[0] = S, 1, n);
  for (unsigned i(1); i <= m; ++i) A = RD() + 1, B = RD() + 1, N[A].E.push_back(N + B), N[B].E.push_back(N + A);
  for (unsigned i(1); i <= n; ++i) {
    FS[i] = i;
    for (auto j:N[i].E) if((j < (N + i)) && (FindS(j - N) ^ i)) 
      T[FS[j - N]].Fa[0] = T + i, FS[FS[j - N]] = i;
  }
  for (unsigned i(n - 1); i; --i) T[i].GetF();
  for (unsigned i(1); i < n; ++i) T[i].Fa[0]->Son.push_back(T + i);
  Cnt = 0, T[n].DFS();
  for (unsigned i(n); i; --i) {
    FS[i] = i;
    for (auto j:N[i].E) if((j > (N + i)) && (FindS(j - N) ^ i))
      T2[FS[j - N]].Fa[0] = T2 + i, FS[FS[j - N]] = i;
  }
  for (unsigned i(2); i <= n; ++i) T2[i].GetF();
  for (unsigned i(2); i <= n; ++i) T2[i].Fa[0]->Son.push_back(T2 + i);
  Cnt = 0, T2[1].DFS();
  for (unsigned i(1); i <= n; ++i) Lst[T2[i].DFSr] = i;
  for (unsigned i(1); i <= n; ++i) A = T[Lst[i]].DFSr, Add(Ver[i - 1], Ver[i] = ++CntS, 1, n);
  for (unsigned i(1); i <= Q; ++i) {
    A = RD() + 1, B = RD() + 1, C = RD() + 1, D = RD() + 1;
    Tree *Fr(Get2(T2 + A, C)), *To(Get1(T + B, D));
    A = To->DFSr, B = To->DFSr + To->Size - 1;
    printf(Find(Ver[Fr->DFSr - 1], Ver[Fr->DFSr + Fr->Size - 1], 1, n) ? "1\n" : "0\n");
  }
  return Wild_Donkey;
}

好久沒寫這麼爽的 DS 題了, 從開始到結束, 手上的速度始終沒跟上腦子的感覺真是太帶勁了. 樣例很強, 好評.

分治 FFT

給出 \(g\), 求 \(f\) 使得:

\[f_i = \sum_{j = 1}^i f_{i - j}g_j \]

已知 \(f_0 = 1\), 我們設 \(g_0 = 0\), 則

\[f_i = \sum_{j = 0}^i f_{i - j}g_j \]

我們分治計算, 每次計運算元區間 \(f_{[L, R)}\) 內, \(f_{[L, Mid)}\) 對於 \(f_{[Mid, R)}\) 的貢獻. 設區間長度都為 \(2\) 的自然數冪. 對於 \(i \in [Mid, R)\).

\[f_i += \sum_{j = 0}^{Mid - L - 1} g_{i - L - j}f_{L + j} \]

這裡相當於計算 \(g_{[0, R - L)}\)\(f_{[L, Mid)}\) 的卷積, 取結果的區間 \([Mid - L, R - L)\) 加到 \(f_{[Mid, R)}\) 上去.

每次用一個區間計算它對右側的貢獻時, 必須滿足這個區間所有貢獻都被算完了. 我們計算整個式子時相當於建立了一棵完全二叉樹, 一個節點是一個長度為 \(2\) 的自然數次冪的區間, 一個節點的兩個兒子代表的區間平均分開了這個節點的區間. 我們在這個二叉樹上進行中序遍歷, 左兒子回溯時計算它對它兄弟的貢獻, 容易發現一個節點回溯時所有對它的區間的貢獻就被計算完了.

我們需要進行 \(O(2^i)\) 次長度為 \(O(\frac n{2^i})\) 的多項式乘法, 複雜度為 \(O(2^i \times \frac n{2^i} \times (\log n - i)) = O(n (\log n - i))\). \(i \in [0, \log n)\), 因此總複雜度是 \(O(n\log^2 n)\).

我們把一個 \(n\) 項的多項式和一個 \(2n\) 項的多項式相乘, 得到了一個 \(3n - 1\) 項的多項式. 如果我們進行 DFT 時把這個東西當成 \(2n\) 項的多項式, 最後 \(n - 1\) 項就會被加到前面 \(n - 1\) 項去, 不過我們不需要這些位置, 所以可以這樣做.

#define Inv(x) Pow(x,998244351)
#define Mn(x) (x)-=(((x)>=Mod)?Mod:0)
const unsigned long long Mod(998244353);
unsigned long long a[131105], b[131105];
unsigned long long PriR[524300], *PR[20][2], W, Inv[20];
unsigned m(1), Lgm(0), n, CurLen, CurLg;
unsigned A, B, C, D, t;
unsigned Cnt(0), Ans(0);
char Tp(0);
inline unsigned long long Pow(unsigned long long x, unsigned y) {
  unsigned long long Rt(1);
  while (y) { if(y & 1) Rt = Rt * x % Mod; x = x * x % Mod, y >>= 1; }
  return Rt;  
}
inline void DIT(unsigned long long* f) {
  for (unsigned i(1), j(1), l(1); i < CurLen; i <<= 1, ++j, l = ((l << 1) | 1)) {
    for (unsigned k(0); k < CurLen; ++k) if(!(i & k)) {
      unsigned long long TmA(f[k]), TmB(f[k ^ i] * PR[j][Tp][k & l] % Mod);
      f[k] = TmA + TmB, Mn(f[k]);
      f[k ^ i] = Mod + TmA - TmB, Mn(f[k ^ i]);
    }
  }
}
inline void DIF(unsigned long long* f) {
  for (unsigned i(CurLen >> 1), j(CurLg), l((1 << CurLg) - 1); i; i >>= 1, --j, l >>= 1) {
    for (unsigned k(0); k < CurLen; ++k) if(!(i & k)) {
      unsigned long long TmA(f[k]), TmB(f[k ^ i]);
      f[k] = TmA + TmB, Mn(f[k]);
      f[k ^ i] = PR[j][Tp][k & l] * (Mod + TmA - TmB) % Mod;
    }
  }
}
inline void Mul (unsigned long long* f, unsigned long long* g, unsigned long long* fg) {
  //Optimize: Small Block
  if(CurLen <= 32) {
    memset(fg + (CurLen >> 1), 0, CurLen << 2);
    for (unsigned i(CurLen >> 1); i < CurLen; ++i)
      for (unsigned j((CurLen >> 1) - 1); ~j; --j)
        fg[i] = (fg[i] + g[j] * f[i - j]) % Mod;
    return;
  }
  unsigned long long G[CurLen];
  memset(G, 0, CurLen << 3);
  memcpy(G, g, CurLen << 2);
  memcpy(fg, f, CurLen << 3);
  Tp = 0, DIF(G), DIF(fg);
  for (unsigned i(0); i < CurLen; ++i) fg[i] = fg[i] * G[i] % Mod;
  Tp = 1, DIT(fg);
  for (unsigned i(0); i < CurLen; ++i) fg[i] = fg[i] * Inv[CurLg] % Mod;
}
inline void Solve(unsigned L, unsigned Len) {
  if(Len == 1) {CurLg = 1; return;}
  Len >>= 1, Solve(L, Len);
  unsigned long long STm[CurLen = (Len << 1)], *Rt(STm + Len), *G(b + L + Len);
  Mul(a, b + L, STm);
  for (unsigned i(0); i < Len; ++i) G[i] += Rt[i], Mn(G[i]);
  Solve(L + Len, Len);
  ++CurLg;
}
signed main() { 
  n = RD(), a[0] = 0, b[0] = 1;
  while (m < n) m <<= 1, ++Lgm;
  W = Pow(3, 998244352 / m);
  *(PR[Lgm][0] = PriR) = 1,  *(PR[Lgm][1] = PR[Lgm][0] + m) = 1;
  for (unsigned i(1), j(0); i <= m; i <<= 1, ++j) Inv[j] = Inv(i);
  for (unsigned i(1); i < m; ++i) PriR[i] = PriR[i - 1] * W % Mod;
  for (unsigned i(1); i < m; ++i) PR[Lgm][1][i] = PriR[m - i] % Mod;
  for (unsigned i(m >> 1), k(Lgm - 1); ~k; i >>= 1, --k) {
    PR[k][0] = PR[k + 1][1] + (i << 1);
    for (unsigned l(0); l < i; ++l) PR[k][0][l] = PR[k + 1][0][l << 1];
    PR[k][1] = PR[k][0] + i;
    for (unsigned l(0); l < i; ++l) PR[k][1][l] = PR[k + 1][1][l << 1];
  }
  for (unsigned i(1); i < n; ++i) a[i] = RD();
  CurLen = m, Solve(0, m);
  for (unsigned i(0); i < n; ++i) printf("%llu ", b[i]); putchar(0x0A);
  return Wild_Donkey;
}

嘗試了新的寫法, 因為需要做多次 NTT, 並且需要靈活地進行乘法, 所以這次 NTT 寫得很封裝, 所以很好調. 一遍過了. 迴圈內單次取模優化了常數.

Day \(-15\) Mar 19, 2022, Saturday

簡單的數學題

取模意義下, 求

\[\begin{aligned} &\sum_{i = 1}^n \sum_{j = 1}^n ij\gcd(i, j)\\ =& \sum_{d = 1}^n d^3 \sum_{k = 1}^{\lfloor \frac nd \rfloor} \mu(k) k^2 \sum_{i = 1}^{\lfloor \frac n{dk} \rfloor} \sum_{j = 1}^{\lfloor \frac n{dk} \rfloor} ij\\ =& \sum_{d = 1}^n d^3 \sum_{k = 1}^{\lfloor \frac nd \rfloor} \mu(k) k^2 \frac{\lfloor \frac n{dk} \rfloor^2(\lfloor \frac n{dk} \rfloor + 1)^2}4\\ =& \sum_{d = 1}^n d^3 \sum_{k = 1}^{\lfloor \frac nd \rfloor} \mu(k) k^2 \sum_{i = 1}^{\lfloor \frac n{dk} \rfloor} i^3\\ =& \sum_{d = 1}^n d^3 \sum_{k = 1}^{\lfloor \frac nd \rfloor} \mu(k) k^2 \frac{\lfloor \frac{\lfloor \frac nd \rfloor}k \rfloor^2(\lfloor \frac{\lfloor \frac nd \rfloor}k \rfloor + 1)^2}4\\ \end{aligned} \]

法1 \(O(n^{\frac 34})\)

\(G(x)\) 表示

\[\sum_{i = 1}^x \mu(i)i^2\frac{\lfloor \frac{x}i \rfloor^2(\lfloor \frac xi \rfloor + 1)^2}4 \]

則問題轉化為:

\[\sum_{d = 1}^n d^3 G(\lfloor \frac nd \rfloor) \]

如果可以快速計算 \(G\), 便可以整除分塊算出答案.

容易發現 \(\lfloor \frac xi \rfloor\) 可以計算, 設 \(f(i) = \mu(i)i^2\), \(f\) 是積性函式, 可以亞線性篩法篩出來.

考慮到:

\[\begin{aligned} \epsilon(i) &= [i = 1]\\ \epsilon(i) &= i[i = 1]\\ \epsilon(i) &= i \sum_{j | i} \mu(j)\\ \epsilon(i) &= \sum_{j | i} \mu(j)j^2 \big(\frac ij\big) ^2\\ \epsilon &= f * Id_2\\ \end{aligned} \]

這裡 \(*\) 表示狄利克雷卷積. 所以可以使用杜教篩:

\[\begin{aligned} \sum_{i = 1}^n (f * Id_2)(i) &= \sum_{i = 1}^n \sum_{j | i} f(\frac ij) j^2\\ &= \sum_{j = 1}^n j^2 \sum_{i = 1}^{\lfloor \frac nj \rfloor} f(i)\\ &= \sum_{i = 1}^n f(i) + \sum_{j = 2}^n j^2 \sum_{i = 1}^{\lfloor \frac nj \rfloor} f(i)\\ \sum_{i = 1}^n f(i) &= \sum_{i = 1}^n (f * Id_2)(i) - \sum_{j = 2}^n j^2 \sum_{i = 1}^{\lfloor \frac nj \rfloor} f(i)\\ \end{aligned} \]

杜教篩是 \(O(n^{\frac 23})\) 的. 因為 \(G\) 不是積性函式, 所以在求小於等於 \(n^{\frac 23}\) 項的字首和時無法直接遞推. 因此求 \(G\) 的總複雜度是 \(O(n^{\frac 34})\).

算了算大概是 \(3*10^7\), 時限 \(4s\) 的情況下每秒不到 \(10^7\) 應該沒什麼問題吧. 交上去發現只能得 \(80'\), 無論怎麼卡常, 本地兩秒跑完上去還是 TLE, 所以考慮優化.

#define Inv(x) Pow(x,Mod-2)
bitset<5000000> No;
unsigned Prime[1000000], CntP(0), F[5000000], FL[2200], Cub[5000000], CuL[2200], Sq[5000000], SqL[2200], G[100005], GL[100005];
unsigned long long Mod, m, n, p, P, Inv6, Inv4, Ans(0);
inline void Mn(unsigned& x) {x -= (x >= Mod) ? Mod : 0;}
inline unsigned long long Pow(unsigned long long x, unsigned y) {
  unsigned long long Rt(1);
  while (y) { if(y & 1) Rt = Rt * x % Mod; x = x * x % Mod, y >>= 1; }
  return Rt;
}
inline unsigned long long CG(unsigned long long x) {
  unsigned Rt(0);
  for (unsigned long long L, R(0), Cur, Lst(0), Tis; R < x; ) {
    L = R + 1, R = x / (Cur = x / L), Tis = ((R > m) ? FL[n / R] : F[R]);
    Rt = (Rt + ((Cur > m) ? (CuL[n / Cur]) : Cub[Cur]) * (Mod + Tis - Lst)) % Mod;
    Lst = Tis;
  }
  return Rt;
}
unsigned M, A, B, C, D, t;
unsigned Cnt(0), Tmp(0);
signed main() {
  Mod = RD(), n = RDll();
  if(!n) return printf("0\n"), 0;
  if(n == 1) return printf("1\n"), 0;
  if(n == 2) return printf("13\n"), 0;
  m = pow(n, ((double)2)/3), M = n / m, Inv6 = Inv(6), Inv4 = Inv(4);
  p = sqrt(n), P = n / p;
  while ((n / (M + 1)) > m) ++M; while ((n / M) <= m) --M;
  while ((n / (P + 1)) > p) ++P; while ((n / P) <= p) --P;
  F[1] = 1;
  for (unsigned long long i(1); i <= m; ++i) Cub[i] = (Cub[i - 1] + (i * i % Mod) * i) % Mod;
  for (unsigned long long i(1); i <= m; ++i) Sq[i] = (Sq[i - 1] + i * i) % Mod;
  for (unsigned i(1); i <= M; ++i) {
    unsigned long long Nw((n / i) % Mod);
    CuL[i] = ((Nw * (Nw + 2) + 1) % Mod) * ((Inv4 * Nw % Mod) * Nw % Mod) % Mod;
  }
  for (unsigned i(1); i <= M; ++i) {
    unsigned long long Nw((n / i) % Mod);
    SqL[i] = (((Nw * Nw % Mod) * ((Nw << 1) + 3) % Mod) + Nw) * Inv6 % Mod;
  }
  for (unsigned i(2); i <= m; ++i) {
    if(!No[i]) Prime[++CntP] = i, F[i] = (Mod - i) * i % Mod;
    for (unsigned j(1), Cur(i << 1); (Cur <= m) && j <= CntP; ++j, Cur = Prime[j] * i) {
      F[Cur] = (unsigned long long)F[i] * F[Prime[j]] % Mod, No[Cur] = 1;
      if(!(i % Prime[j])) {F[Cur] = 0;break;}
    }
  }
  for (unsigned i(1); i <= m; ++i) F[i] += F[i - 1], Mn(F[i]);
  for (unsigned long long i(M), j(n / M); i; --i) {
    j = n / i, FL[i] = 1;
    for (unsigned long long L, R(1), Cur, Lst(1), Tis; R < j; ) {
      L = R + 1, R = j / (Cur = j / L), Tis = ((R > m) ? (SqL[n / R]) : Sq[R]);
      FL[i] = (FL[i] + ((Cur > m) ? FL[n / Cur] : F[Cur]) * (Mod + Lst - Tis)) % Mod;
      Lst = Tis;
    }
  }
  for (unsigned i(1); i <= p; ++i) G[i] = CG(i);
  for (unsigned i(1); i <= P; ++i) GL[i] = CG(n / i);
  for (unsigned long long L, R(0), Cur, Lst(0), Tis; R < n; ) {
    L = R + 1, R = n / (Cur = n / L), Tis = ((R > m) ? (CuL[n / R]) : Cub[R]);
    Ans = (Ans + (((Cur <= p) ? G[Cur] : GL[n / Cur]) * (Mod + Tis - Lst))) % Mod;
    Lst = Tis;
  }
  printf("%llu\n", Ans);
  return Wild_Donkey;
}

法2 \(O(n^{\frac 23})\)

重新審視我們的式子:

\[\begin{aligned} &\sum_{i = 1}^n \sum_{j = 1}^n ij\gcd(i, j)\\ =&\sum_{i = 1}^n \sum_{j = 1}^n ij \sum_{d | i, d | j} d \sum_{k | \frac id, k | \frac jd} \mu(k)\\ =&\sum_{k = 1}^n \mu(k)k^2 \sum_{d = 1}^{\lfloor \frac nk \rfloor} d^3 \frac{\lfloor \frac n{dk} \rfloor^2(\lfloor \frac n{dk} \rfloor + 1)^2}4\\ =&\sum_{k = 1}^n \mu(k)k^2 \sum_{d = 1}^{\lfloor \frac nk \rfloor} d^3 \sum_{j = 1}^{\lfloor \frac n{dk} \rfloor} j^3\\ =&\sum_{k = 1}^n \mu(k)k^2 \sum_{d = 1}^{\lfloor \frac nk \rfloor} \sum_{j = 1}^{\lfloor \frac n{dk} \rfloor} (dj)^3\\ =&\sum_{k = 1}^n \mu(k)k^2 \sum_{d = 1}^{\lfloor \frac nk \rfloor} d^3 (I*I)(d)\\ \end{aligned} \]

現在 \(g(x) = x^3 (I*I)(x)\) 也成了積性函式.

\[\begin{aligned} (g * (Id_3 \cdot \mu))(x) =& \sum_{i | x} i^3 (I*I)(i)\mu(\frac xi)\big(\frac xi\big)^3\\ =&x^3\sum_{i | x} (I*I)(i)\mu(\frac xi)\\ =&x^3\\ g * (Id_3 \cdot \mu) =& Id_3\\ \end{aligned} \]

發現需要求 \(Id_3 \cdot \mu\) 的字首和, 仍然可以進行杜教篩.

\[\epsilon = (Id_3 \cdot \mu) * Id_3 \]

三遍杜教篩算出需要的字首和最後求答案即可. 時間複雜度 \(O(n^{\frac 23})\).

法3 \(O(n^{\frac 23})\)

法2 這樣做確實可以把時間做到 \(O(n^{\frac 23})\), 但是做三次杜教篩是我們不希望的. 重新審視我們的式子

\[\begin{aligned} Ans =& \sum_{k = 1}^n \mu(k)k^2 \sum_{d = 1}^{\lfloor \frac nk \rfloor} d^3 \sum_{j = 1}^{\lfloor \frac n{dk} \rfloor} j^3\\ =&\sum_{k = 1}^n \mu(k)k^2 \sum_{d = 1}^{\lfloor \frac nk \rfloor} d^3 (I*I)(d)\\ \end{aligned} \]

第一個式子可以整除分塊, 第二個式子可以線性篩, 所以對於 \(g(x)\) 小於等於 \(n^{\frac 23}\) 項的字首和, 我們用第二個式子通過線性篩求出. 對於大於 \(n^{\frac 23}\) 項的字首和, 我們用第一個式子整除分塊地求.

這樣杜教篩就只需要處理 \(f(k) = \mu(k)k^2\) 的字首和即可. \(O(n^{\frac 23})\) 的複雜度可以痛快地通過此題.

#define Inv(x) Pow(x,Mod-2)
bitset<5000000> No;
unsigned Prime[1000000], CtB[5000000], CntP(0), D[5000000], F[5000000], FL[2200], Cub[5000000], CuL[2200], Sq[5000000], SqL[2200], GL[2205];
unsigned long long Mod, m, n, p, P, Inv6, Inv4, Ans(0);
inline void Mn(unsigned& x) {x -= (x >= Mod) ? Mod : 0;}
inline unsigned long long Pow(unsigned long long x, unsigned y) {
  unsigned long long Rt(1);
  while (y) { if(y & 1) Rt = Rt * x % Mod; x = x * x % Mod, y >>= 1; }
  return Rt;
}
inline unsigned long long CG(unsigned long long x) {
  unsigned Rt(0);
  for (unsigned long long L, R(0), Cur, Lst(0), Tis; R < x; ) {
    L = R + 1, R = x / (Cur = x / L), Tis = ((R > m) ? CuL[n / R] : Cub[R]);
    Rt = (Rt + ((Cur > m) ? (CuL[n / Cur]) : Cub[Cur]) * (Mod + Tis - Lst)) % Mod;
    Lst = Tis;
  }
  return Rt;
}
unsigned M, t;
unsigned Cnt(0), Tmp(0);
signed main() {
  Mod = RD(), n = RDll();
  if(!n) return printf("0\n"), 0;
  if(n == 1) return printf("1\n"), 0;
  if(n == 2) return printf("13\n"), 0;
  m = pow(n, ((double)2)/3), M = n / m, Inv6 = Inv(6), Inv4 = Inv(4);
  p = sqrt(n), P = n / p;
  while ((n / (M + 1)) > m) ++M; while ((n / M) <= m) --M;
  while ((n / (P + 1)) > p) ++P; while ((n / P) <= p) --P;
  F[1] = 1, D[1] = 1;
  for (unsigned long long i(1); i <= m; ++i) Cub[i] = (Cub[i - 1] + (i * i % Mod) * i) % Mod;
  for (unsigned long long i(1); i <= m; ++i) Sq[i] = (Sq[i - 1] + i * i) % Mod;
  for (unsigned i(1); i <= M; ++i) {
    unsigned long long Nw((n / i) % Mod);
    CuL[i] = ((Nw * (Nw + 2) + 1) % Mod) * ((Inv4 * Nw % Mod) * Nw % Mod) % Mod;
  }
  for (unsigned i(1); i <= M; ++i) {
    unsigned long long Nw((n / i) % Mod);
    SqL[i] = (((Nw * Nw % Mod) * ((Nw << 1) + 3) % Mod) + Nw) * Inv6 % Mod;
  }
  for (unsigned i(2); i <= m; ++i) {
    if(!No[i]) Prime[++CntP] = i, F[i] = (Mod - i) * i % Mod, D[i] = 2, CtB[i] = 2;
    for (unsigned j(1), Cur(i << 1); (Cur <= m) && j <= CntP; ++j, Cur = Prime[j] * i) {
      F[Cur] = (unsigned long long)F[i] * F[Prime[j]] % Mod, D[Cur] = D[i] * D[Prime[j]], CtB[Cur] = 2, No[Cur] = 1;
      if(!(i % Prime[j])) {F[Cur] = 0, D[Cur] = (D[i] / CtB[i]) * (CtB[Cur] = CtB[i] + 1);break;}
    }
  }
  for (unsigned long long i(1); i <= m; ++i) D[i] = (D[i - 1] + ((D[i] * i % Mod) * i % Mod) * i) % Mod;
  for (unsigned i(1); i <= m; ++i) F[i] += F[i - 1], Mn(F[i]);
  for (unsigned long long i(M), j(n / M); i; --i) {
    j = n / i, FL[i] = 1;
    for (unsigned long long L, R(1), Cur, Lst(1), Tis; R < j; ) {
      L = R + 1, R = j / (Cur = j / L), Tis = ((R > m) ? (SqL[n / R]) : Sq[R]);
      FL[i] = (FL[i] + ((Cur > m) ? FL[n / Cur] : F[Cur]) * (Mod + Lst - Tis)) % Mod;
      Lst = Tis;
    }
  }
  for (unsigned i(1); i <= M; ++i) GL[i] = CG(n / i);
  for (unsigned long long L, R(0), Cur, Lst(0), Tis; R < n; ) {
    L = R + 1, R = n / (Cur = n / L), Tis = ((R > m) ? (FL[n / R]) : F[R]);
    Ans = (Ans + (((Cur <= m) ? D[Cur] : GL[n / Cur]) * (Mod + Tis - Lst))) % Mod;
    Lst = Tis;
  }
  printf("%llu\n", Ans);
  return Wild_Donkey;
}

法4 \(O(n^{\frac 23})\)

簡單看了看題解, 有個挺妙的做法在這裡寫一下.

我們知道有 \(\phi * I = Id\). 因此

\[\begin{aligned} &\sum_{i = 1}^n \sum_{j = 1}^n ij\gcd(i, j)\\ &\sum_{i = 1}^n \sum_{j = 1}^n ij\sum_{d | i, d | j} \phi(d)\\ &\sum_{d = 1}^n d^2\phi(d) \frac{\lfloor \frac nd \rfloor^2(\lfloor \frac nd \rfloor + 1)^2}4\\ \end{aligned} \]

我們可以仍然根據 \(\phi * I = Id\), 用杜教篩求出 \(\phi(d)\) 的字首和然後直接整除分塊.