題解 HDU5728 【PowMod】
前置芝士:尤拉函式、莫比烏斯反演、線性求逆元、Dirichlet 前 & 字尾和、擴充套件尤拉定理(exEuler)、卡常
太菜了,不會亞線性複雜度的篩法做法,就只能靠反演維持生計了。
關於尤拉函式,有結論:\(\varphi(ij) = \frac{\varphi(i) \varphi(j) \gcd(i, j)}{\varphi(\gcd(i, j))}\)。證明略。
\(k = \varphi(n) \displaystyle\sum_{i = 1}^m \frac{\varphi(i) \gcd(i, n)}{\varphi(\gcd(i, n))}\)
\(= \varphi(n) \displaystyle\sum_{d\ |\ n} \frac{d}{\varphi(d)} \sum_{i = 1}^m [\gcd(i, n) = d] \varphi(i)\)
\(= \varphi(n) \displaystyle\sum_{d\ |\ n} \frac{d}{\varphi(d)} \sum_{i = 1}^{\lfloor \frac{m}{d} \rfloor} [\gcd(i, \frac{n}{d}) = 1] \varphi(id)\)
\(= \varphi(n) \displaystyle\sum_{d\ |\ n} \frac{d}{\varphi(d)} \sum_{q\ |\ \frac{n}{d}} \mu(q) \sum_{i = 1}^{\lfloor \frac{m}{dq} \rfloor} \varphi(idq)\)
令 \(T = dq\),有:
\(k = \varphi(n) \displaystyle\sum_{T\ |\ n} (\sum_{d\ |\ T} \frac{d}{\varphi(d)} \mu(\frac{T}{d})) \sum_{i = 1}^{\lfloor \frac{m}{T} \rfloor} \varphi(iT)\)
顯然,篩出尤拉函式後,\(f(T) = \displaystyle\sum_{d\ |\ T} \frac{d}{\varphi(d)} \mu(\frac{T}{d})\) 可以通過倒推 Dirichlet 字首和在 \(O(N \ln \ln N)\)
但是……剩下的部分怎麼辦呢?如果不預處理,每次線上計算還是會 TLE。
那就預處理嘛。設 \(g(n, m) = \displaystyle\sum_{i = 1}^m \varphi(in)\),顯然有轉移方程 \(g(n, m) = g(n, m - 1) + \varphi(nm)\)。
但預處理所有 \(g(n, m)\) 顯然是不現實的,所以要選定閾值 \(K\),對於 \(K\) 以內所有數 \(n\) 預處理出 \(g(n, m)\) 的值。
注意到題目中 \(n\) 為無平方因數的數,所以只需要預處理 \(K\) 以內所有的無平方因數的數 \(n\) 即可。我的程式碼裡取 \(K = 17\)(差不多卡在空間限制左右)。
最後剩下的無限冪塔部分顯然可以用擴充套件尤拉定理快速計算。
但這樣還是會 TLE。怎麼辦呢?卡常。register、inline、取模優化、火車頭、指令集都加上,然後你就可以愉快地 AC 了。當然,我也不能保證這份程式碼時時刻刻都能 AC,因為在第一次 AC 後再怎麼提交也無法 AC 了。建議選擇吃飯或夜深人靜的時候提交。
時間複雜度不會算(
醜陋的程式碼:
#pragma GCC optimize("Ofast")
#pragma GCC optimize("inline")
#pragma GCC optimize("-fgcse")
#pragma GCC optimize("-fgcse-lm")
#pragma GCC optimize("-fipa-sra")
#pragma GCC optimize("-ftree-pre")
#pragma GCC optimize("-ftree-vrp")
#pragma GCC optimize("-fpeephole2")
#pragma GCC optimize("-ffast-math")
#pragma GCC optimize("-fsched-spec")
#pragma GCC optimize("unroll-loops")
#pragma GCC optimize("-falign-jumps")
#pragma GCC optimize("-falign-loops")
#pragma GCC optimize("-falign-labels")
#pragma GCC optimize("-fdevirtualize")
#pragma GCC optimize("-fcaller-saves")
#pragma GCC optimize("-fcrossjumping")
#pragma GCC optimize("-fthread-jumps")
#pragma GCC optimize("-funroll-loops")
#pragma GCC optimize("-freorder-blocks")
#pragma GCC optimize("-fschedule-insns")
#pragma GCC optimize("inline-functions")
#pragma GCC optimize("-ftree-tail-merge")
#pragma GCC optimize("-fschedule-insns2")
#pragma GCC optimize("-fstrict-aliasing")
#pragma GCC optimize("-falign-functions")
#pragma GCC optimize("-fcse-follow-jumps")
#pragma GCC optimize("-fsched-interblock")
#pragma GCC optimize("-fpartial-inlining")
#pragma GCC optimize("no-stack-protector")
#pragma GCC optimize("-freorder-functions")
#pragma GCC optimize("-findirect-inlining")
#pragma GCC optimize("-fhoist-adjacent-loads")
#pragma GCC optimize("-frerun-cse-after-loop")
#pragma GCC optimize("inline-small-functions")
#pragma GCC optimize("-finline-small-functions")
#pragma GCC optimize("-ftree-switch-conversion")
#pragma GCC optimize("-foptimize-sibling-calls")
#pragma GCC optimize("-fexpensive-optimizations")
#pragma GCC optimize("inline-functions-called-once")
#pragma GCC optimize("-fdelete-null-pointer-checks")
#pragma GCC target("abm")
#pragma GCC target("avx")
#pragma GCC target("f16c")
#pragma GCC target("mmx")
#pragma GCC target("popcnt")
#pragma GCC target("sse")
#pragma GCC target("sse2")
#pragma GCC target("sse3")
#pragma GCC target("sse4")
#pragma GCC target("sse4.1")
#pragma GCC target("sse4.2")
#pragma GCC target("ssse3")
#pragma GCC target("tune=native")
#include <stdio.h>
#include <math.h>
const int N = 1e7 + 1, M = 12, K = 17, mod = 1e9 + 7;
int m;
int prime[N], phi[N], inv[N], f[N], number[M + 7] = {0, 1, 2, 3, 5, 6, 7, 10, 11, 13, 14, 15, 17}, g_list[K + 1][N];
bool p[N], mark[K + 7];
inline void init(){
register int cnt = 0;
p[0] = p[1] = true;
phi[1] = 1;
for (register int i = 2; i < N; i++){
if (!p[i]){
prime[++cnt] = i;
phi[i] = i - 1;
}
for (register int j = 1; j <= cnt && i * prime[j] < N; j++){
int t = i * prime[j];
p[t] = true;
if (i % prime[j] == 0){
phi[t] = phi[i] * prime[j];
break;
}
phi[t] = phi[i] * (prime[j] - 1);
}
}
inv[0] = inv[1] = 1;
for (register int i = 2; i < N; i++){
inv[i] = mod - 1ll * (mod / i) * inv[mod % i] % mod;
}
for (register int i = 1; i < N; i++){
f[i] = 1ll * i * inv[phi[i]] % mod;
}
for (register int i = cnt; i >= 1; i--){
for (register int j = (N - 1) / prime[i]; j >= 1; j--){
int t = j * prime[i];
if ((f[t] -= f[j]) < 0) f[t] += mod;
}
}
for (register int i = 1; i <= M; i++){
mark[number[i]] = true;
}
for (register int i = 1; i <= K; i++){
if (mark[i]){
for (register int j = 1, k = i; k < N; j++, k += i){
if ((g_list[i][j] = g_list[i][j - 1] + phi[k]) >= mod) g_list[i][j] -= mod;
}
}
}
}
inline int g(register int k){
if (k <= K) return g_list[k][m / k];
register int ans = 0;
for (register int i = k; i <= m; i += k){
if ((ans += phi[i]) >= mod) ans -= mod;
}
return ans;
}
inline int quick_pow(register int x, register int p, register int mod){
register int ans = 1;
while (p){
if (p & 1) ans = 1ll * ans * x % mod;
x = 1ll * x * x % mod;
p >>= 1;
}
return ans;
}
int solve(register int n, register int p){
return p == 1 ? 0 : quick_pow(n, solve(n, phi[p]) + phi[p], p);
}
int main(){
int n, p;
init();
while (scanf("%d %d %d", &n, &m, &p) != EOF){
register int k = 0, t = sqrt(n);
for (register int i = 1; i <= t; i++){
if (n % i == 0){
if ((k += 1ll * f[i] * g(i) % mod) >= mod) k -= mod;
if (i * i != n && (k += 1ll * f[n / i] * g(n / i) % mod) >= mod) k -= mod;
}
}
printf("%d\n", solve(1ll * k * phi[n] % mod, p));
}
return 0;
}