1. 程式人生 > 其它 >題解 HDU5728 【PowMod】

題解 HDU5728 【PowMod】

前置芝士:尤拉函式莫比烏斯反演線性求逆元Dirichlet 前 & 字尾和擴充套件尤拉定理(exEuler)、卡常

太菜了,不會亞線性複雜度的篩法做法,就只能靠反演維持生計了。

關於尤拉函式,有結論:\(\varphi(ij) = \frac{\varphi(i) \varphi(j) \gcd(i, j)}{\varphi(\gcd(i, j))}\)證明略。

\(k = \varphi(n) \displaystyle\sum_{i = 1}^m \frac{\varphi(i) \gcd(i, n)}{\varphi(\gcd(i, n))}\)

\(= \varphi(n) \displaystyle\sum_{d\ |\ n} \frac{d}{\varphi(d)} \sum_{i = 1}^m [\gcd(i, n) = d] \varphi(i)\)

\(= \varphi(n) \displaystyle\sum_{d\ |\ n} \frac{d}{\varphi(d)} \sum_{i = 1}^{\lfloor \frac{m}{d} \rfloor} [\gcd(i, \frac{n}{d}) = 1] \varphi(id)\)

\(= \varphi(n) \displaystyle\sum_{d\ |\ n} \frac{d}{\varphi(d)} \sum_{q\ |\ \frac{n}{d}} \mu(q) \sum_{i = 1}^{\lfloor \frac{m}{dq} \rfloor} \varphi(idq)\)

\(T = dq\),有:

\(k = \varphi(n) \displaystyle\sum_{T\ |\ n} (\sum_{d\ |\ T} \frac{d}{\varphi(d)} \mu(\frac{T}{d})) \sum_{i = 1}^{\lfloor \frac{m}{T} \rfloor} \varphi(iT)\)

顯然,篩出尤拉函式後,\(f(T) = \displaystyle\sum_{d\ |\ T} \frac{d}{\varphi(d)} \mu(\frac{T}{d})\) 可以通過倒推 Dirichlet 字首和在 \(O(N \ln \ln N)\)

的時間複雜度內預處理出。

但是……剩下的部分怎麼辦呢?如果不預處理,每次線上計算還是會 TLE。

那就預處理嘛。設 \(g(n, m) = \displaystyle\sum_{i = 1}^m \varphi(in)\),顯然有轉移方程 \(g(n, m) = g(n, m - 1) + \varphi(nm)\)

但預處理所有 \(g(n, m)\) 顯然是不現實的,所以要選定閾值 \(K\),對於 \(K\) 以內所有數 \(n\) 預處理出 \(g(n, m)\) 的值。

注意到題目中 \(n\) 為無平方因數的數,所以只需要預處理 \(K\) 以內所有的無平方因數的數 \(n\) 即可。我的程式碼裡取 \(K = 17\)(差不多卡在空間限制左右)。

最後剩下的無限冪塔部分顯然可以用擴充套件尤拉定理快速計算。

但這樣還是會 TLE。怎麼辦呢?卡常。register、inline、取模優化、火車頭、指令集都加上,然後你就可以愉快地 AC 了。當然,我也不能保證這份程式碼時時刻刻都能 AC,因為在第一次 AC 後再怎麼提交也無法 AC 了。建議選擇吃飯或夜深人靜的時候提交。

時間複雜度不會算(

醜陋的程式碼:

#pragma GCC optimize("Ofast")
#pragma GCC optimize("inline")
#pragma GCC optimize("-fgcse")
#pragma GCC optimize("-fgcse-lm")
#pragma GCC optimize("-fipa-sra")
#pragma GCC optimize("-ftree-pre")
#pragma GCC optimize("-ftree-vrp")
#pragma GCC optimize("-fpeephole2")
#pragma GCC optimize("-ffast-math")
#pragma GCC optimize("-fsched-spec")
#pragma GCC optimize("unroll-loops")
#pragma GCC optimize("-falign-jumps")
#pragma GCC optimize("-falign-loops")
#pragma GCC optimize("-falign-labels")
#pragma GCC optimize("-fdevirtualize")
#pragma GCC optimize("-fcaller-saves")
#pragma GCC optimize("-fcrossjumping")
#pragma GCC optimize("-fthread-jumps")
#pragma GCC optimize("-funroll-loops")
#pragma GCC optimize("-freorder-blocks")
#pragma GCC optimize("-fschedule-insns")
#pragma GCC optimize("inline-functions")
#pragma GCC optimize("-ftree-tail-merge")
#pragma GCC optimize("-fschedule-insns2")
#pragma GCC optimize("-fstrict-aliasing")
#pragma GCC optimize("-falign-functions")
#pragma GCC optimize("-fcse-follow-jumps")
#pragma GCC optimize("-fsched-interblock")
#pragma GCC optimize("-fpartial-inlining")
#pragma GCC optimize("no-stack-protector")
#pragma GCC optimize("-freorder-functions")
#pragma GCC optimize("-findirect-inlining")
#pragma GCC optimize("-fhoist-adjacent-loads")
#pragma GCC optimize("-frerun-cse-after-loop")
#pragma GCC optimize("inline-small-functions")
#pragma GCC optimize("-finline-small-functions")
#pragma GCC optimize("-ftree-switch-conversion")
#pragma GCC optimize("-foptimize-sibling-calls")
#pragma GCC optimize("-fexpensive-optimizations")
#pragma GCC optimize("inline-functions-called-once")
#pragma GCC optimize("-fdelete-null-pointer-checks")

#pragma GCC target("abm")
#pragma GCC target("avx")
#pragma GCC target("f16c")
#pragma GCC target("mmx")
#pragma GCC target("popcnt")
#pragma GCC target("sse")
#pragma GCC target("sse2")
#pragma GCC target("sse3")
#pragma GCC target("sse4")
#pragma GCC target("sse4.1")
#pragma GCC target("sse4.2")
#pragma GCC target("ssse3")
#pragma GCC target("tune=native")

#include <stdio.h>
#include <math.h>

const int N = 1e7 + 1, M = 12, K = 17, mod = 1e9 + 7;
int m;
int prime[N], phi[N], inv[N], f[N], number[M + 7] = {0, 1, 2, 3, 5, 6, 7, 10, 11, 13, 14, 15, 17}, g_list[K + 1][N];
bool p[N], mark[K + 7];

inline void init(){
	register int cnt = 0;
	p[0] = p[1] = true;
	phi[1] = 1;
	for (register int i = 2; i < N; i++){
		if (!p[i]){
			prime[++cnt] = i;
			phi[i] = i - 1;
		}
		for (register int j = 1; j <= cnt && i * prime[j] < N; j++){
			int t = i * prime[j];
			p[t] = true;
			if (i % prime[j] == 0){
				phi[t] = phi[i] * prime[j];
				break;
			}
			phi[t] = phi[i] * (prime[j] - 1);
		}
	}
	inv[0] = inv[1] = 1;
	for (register int i = 2; i < N; i++){
		inv[i] = mod - 1ll * (mod / i) * inv[mod % i] % mod;
	}
	for (register int i = 1; i < N; i++){
		f[i] = 1ll * i * inv[phi[i]] % mod;
	}
	for (register int i = cnt; i >= 1; i--){
		for (register int j = (N - 1) / prime[i]; j >= 1; j--){
			int t = j * prime[i];
			if ((f[t] -= f[j]) < 0) f[t] += mod;
		}
	}
	for (register int i = 1; i <= M; i++){
		mark[number[i]] = true;
	}
	for (register int i = 1; i <= K; i++){
		if (mark[i]){
			for (register int j = 1, k = i; k < N; j++, k += i){
				if ((g_list[i][j] = g_list[i][j - 1] + phi[k]) >= mod) g_list[i][j] -= mod;
			}
		}
	}
}

inline int g(register int k){
	if (k <= K) return g_list[k][m / k];
	register int ans = 0;
	for (register int i = k; i <= m; i += k){
		if ((ans += phi[i]) >= mod) ans -= mod;
	}
	return ans;
}

inline int quick_pow(register int x, register int p, register int mod){
	register int ans = 1;
	while (p){
		if (p & 1) ans = 1ll * ans * x % mod;
		x = 1ll * x * x % mod;
		p >>= 1;
	}
	return ans;
}

int solve(register int n, register int p){
	return p == 1 ? 0 : quick_pow(n, solve(n, phi[p]) + phi[p], p);
}

int main(){
	int n, p;
	init();
	while (scanf("%d %d %d", &n, &m, &p) != EOF){
		register int k = 0, t = sqrt(n);
		for (register int i = 1; i <= t; i++){
			if (n % i == 0){
				if ((k += 1ll * f[i] * g(i) % mod) >= mod) k -= mod;
				if (i * i != n && (k += 1ll * f[n / i] * g(n / i) % mod) >= mod) k -= mod;
			}
		}
		printf("%d\n", solve(1ll * k * phi[n] % mod, p));
	}
	return 0;
}