LuoguP1979 華容道 題解
阿新 • • 發佈:2020-10-29
寬搜
思路很簡單,開一個四維陣列used[i][j][k][l]
表示指定點在(i, j),空格在(j, k)是否走過,每次讓空格上下左右走,遇到指定旗子就改變旗子位置,時間複雜度\(O(qn^4)\)
於是,我們得到了一份70分的程式碼:
#include <bits/stdc++.h> using namespace std; int mp[31][31]; struct hehe{ int x, y, dx, dy, step; }; bool used[31][31][31][31]; int mx[] = {0, 0, 1, 0, -1}; int my[] = {0, 1, 0, -1, 0}; int main() { int n, m, T; cin >> n >> m >> T; for(int i = 1; i <= n; i++) { for(int j = 1; j <= m; j++) cin >> mp[i][j]; } while(T--) { bool fl = 0; queue<hehe>q; memset(used, 0, sizeof(used)); int qx, qy, zx, zy, kx, ky; cin >> qx >> qy >> zx >> zy >> kx >> ky; hehe now; now.dx = qx; now.dy = qy; now.x = zx; now.y = zy; now.step = 0; used[qx][qy][zx][zy] = 1; q.push(now); while(q.size()) { now = q.front(); q.pop(); int x = now.x, y = now.y, xx = now.dx, yy = now.dy; if(x == kx && y == ky) { fl = 1; cout << now.step << endl; break; } for(int i = 1; i <= 4; i++) { int xxx = xx + mx[i], yyy = yy + my[i]; if(xxx > n || yyy > m || xxx < 1 || yyy < 1) continue; int qwq = x, qaq = y; if(x == xxx && y == yyy) qwq = xx, qaq = yy; if(mp[xxx][yyy] && !used[xxx][yyy][qwq][qaq]) { used[xxx][yyy][qwq][qaq] = 1; hehe nxt; nxt.x = qwq; nxt.y = qaq; nxt.dx = xxx; nxt.dy = yyy; nxt.step = now.step + 1; q.push(nxt); } } } if(!fl) { cout << -1 << endl; } } }
接下來才是這篇題解的重點:
如何卡常
O2優化+關閉流同步
#pragma GCC optimize(2)
上面的語句是開啟O2優化
雖然在你谷不讓在程式中自己寫
但是可以手動開啊)
ios::sync_with_stdio(false);
這句話是關閉cin
、cout
的流同步
原理大概是讓cin
、cout
執行過程中不另開快取
會讓cin
、cout
跑得比scanf
、printf
都快
加上這兩句話,成功拿到了另外10分:
迴圈展開
定睛一看:嘶,#7#8被優化到了可觀測的TLE之內(1.20s以內)
想想之前看過一個神仙卡常毒瘤題P4604 [WC2017]挑戰
其中一個基本操作就是“迴圈展開”
具體一點:
以下程式碼
for(int i = 1; i <= 4; i++) { int xxx = xx + mx[i], yyy = yy + my[i]; if(xxx > n || yyy > m || xxx < 1 || yyy < 1) continue; int qwq = x, qaq = y; if(x == xxx && y == yyy) qwq = xx, qaq = yy; if(mp[xxx][yyy] && !used[xxx][yyy][qwq][qaq]) { used[xxx][yyy][qwq][qaq] = 1; hehe nxt; nxt.x = qwq; nxt.y = qaq; nxt.dx = xxx; nxt.dy = yyy; nxt.step = now.step + 1; q.push(nxt); } }
將其迴圈展開後變成這樣:
int i = 1;
int xxx = xx + mx[i], yyy = yy + my[i];
if(xxx > n || yyy > m || xxx < 1 || yyy < 1) goto s1;
qwq = x, qaq = y;
if(x == xxx && y == yyy) qwq = xx, qaq = yy;
if(mp[xxx][yyy] && !used[xxx][yyy][qwq][qaq])
{
used[xxx][yyy][qwq][qaq] = 1;
hehe nxt;
nxt.x = qwq;
nxt.y = qaq;
nxt.dx = xxx;
nxt.dy = yyy;
nxt.step = now.step + 1;
q.push(nxt);
}
s1: i++;
xxx = xx + mx[i], yyy = yy + my[i];
if(xxx > n || yyy > m || xxx < 1 || yyy < 1) goto s2;
qwq = x, qaq = y;
if(x == xxx && y == yyy) qwq = xx, qaq = yy;
if(mp[xxx][yyy] && !used[xxx][yyy][qwq][qaq])
{
used[xxx][yyy][qwq][qaq] = 1;
hehe nxt;
nxt.x = qwq;
nxt.y = qaq;
nxt.dx = xxx;
nxt.dy = yyy;
nxt.step = now.step + 1;
q.push(nxt);
}
s2: i++;
xxx = xx + mx[i], yyy = yy + my[i];
if(xxx > n || yyy > m || xxx < 1 || yyy < 1) goto s3;
qwq = x, qaq = y;
if(x == xxx && y == yyy) qwq = xx, qaq = yy;
if(mp[xxx][yyy] && !used[xxx][yyy][qwq][qaq])
{
used[xxx][yyy][qwq][qaq] = 1;
hehe nxt;
nxt.x = qwq;
nxt.y = qaq;
nxt.dx = xxx;
nxt.dy = yyy;
nxt.step = now.step + 1;
q.push(nxt);
}
s3: i++;
xxx = xx + mx[i], yyy = yy + my[i];
if(xxx > n || yyy > m || xxx < 1 || yyy < 1) continue;
qwq = x, qaq = y;
if(x == xxx && y == yyy) qwq = xx, qaq = yy;
if(mp[xxx][yyy] && !used[xxx][yyy][qwq][qaq])
{
used[xxx][yyy][qwq][qaq] = 1;
hehe nxt;
nxt.x = qwq;
nxt.y = qaq;
nxt.dx = xxx;
nxt.dy = yyy;
nxt.step = now.step + 1;
q.push(nxt);
}
這樣可以優化常數——因為這個迴圈每次bfs新節點的時候都要做一次,這樣理論上相當於優化了\(10^8\)級別的常數——雖然達不到
但是效果依然很明顯:
好,#7#8已經在0.01級別了!#11#13也優化到可視範圍內了!
優化掉i++
和move陣列
這兩個微不足道的東西足以卡掉零點幾秒的時間,甚至更多。
變成:
int xxx = xx + 1, yyy = yy + 0;
s1:
xxx = xx + 0, yyy = yy + 1;
...
s2:
xxx = xx - 1, yyy = yy + 0;
...
s3:
xxx = xx + 0, yyy = yy - 1;
...
好耶!多過了兩個點!順便把#11#13優化成了個位數的毫秒級!
最後,去掉goto
苦思冥想了好久,想起在goto
身上還有一個常數
於是把判斷去掉——因為在第0行、第0列、第n+1行、第m+1列都是零
將goto
優化掉了
if(xxx > n || yyy > m || xxx < 1 || yyy < 1) goto s1;
...
去掉。
驚險AC。
最終程式碼
#include <bits/stdc++.h>
using namespace std;
int mp[31][31];
struct hehe{
int x, y, dx, dy, step;
};
bool used[31][31][31][31];
int main()
{
ios::sync_with_stdio(false);
int n, m, T;
cin >> n >> m >> T;
for(register int i = 1; i <= n; i++)
{
for(register int j = 1; j <= m; j++)
cin >> mp[i][j];
}
while(T--)
{
bool fl = 0;
queue<hehe>q;
memset(used, 0, sizeof(used));
int qx, qy, zx, zy, kx, ky;
cin >> qx >> qy >> zx >> zy >> kx >> ky;
hehe now;
now.dx = qx;
now.dy = qy;
now.x = zx;
now.y = zy;
now.step = 0;
used[qx][qy][zx][zy] = 1;
q.push(now);
while(q.size())
{
now = q.front();
q.pop();
int x = now.x, y = now.y, xx = now.dx, yy = now.dy;
if(x == kx && y == ky)
{
fl = 1;
cout << now.step << endl;
break;
}
int qwq, qaq;
int xxx = xx + 1, yyy = yy + 0;
if(xxx < n || yyy < m || xxx > 1 || yyy > 1)
{
qwq = x, qaq = y;
if(x == xxx && y == yyy) qwq = xx, qaq = yy;
if(mp[xxx][yyy] && !used[xxx][yyy][qwq][qaq])
{
used[xxx][yyy][qwq][qaq] = 1;
hehe nxt;
nxt.x = qwq;
nxt.y = qaq;
nxt.dx = xxx;
nxt.dy = yyy;
nxt.step = now.step + 1;
q.push(nxt);
}
}
xxx = xx + 0, yyy = yy + 1;
if(xxx < n || yyy < m || xxx > 1 || yyy > 1)
{
qwq = x, qaq = y;
if(x == xxx && y == yyy) qwq = xx, qaq = yy;
if(mp[xxx][yyy] && !used[xxx][yyy][qwq][qaq])
{
used[xxx][yyy][qwq][qaq] = 1;
hehe nxt;
nxt.x = qwq;
nxt.y = qaq;
nxt.dx = xxx;
nxt.dy = yyy;
nxt.step = now.step + 1;
q.push(nxt);
}
}
xxx = xx - 1, yyy = yy + 0;
if(xxx < n || yyy < m || xxx > 1 || yyy > 1)
{
qwq = x, qaq = y;
if(x == xxx && y == yyy) qwq = xx, qaq = yy;
if(mp[xxx][yyy] && !used[xxx][yyy][qwq][qaq])
{
used[xxx][yyy][qwq][qaq] = 1;
hehe nxt;
nxt.x = qwq;
nxt.y = qaq;
nxt.dx = xxx;
nxt.dy = yyy;
nxt.step = now.step + 1;
q.push(nxt);
}
}
xxx = xx + 0, yyy = yy - 1;
if(xxx < n || yyy < m || xxx > 1 || yyy > 1)
{
qwq = x, qaq = y;
if(x == xxx && y == yyy) qwq = xx, qaq = yy;
if(mp[xxx][yyy] && !used[xxx][yyy][qwq][qaq])
{
used[xxx][yyy][qwq][qaq] = 1;
hehe nxt;
nxt.x = qwq;
nxt.y = qaq;
nxt.dx = xxx;
nxt.dy = yyy;
nxt.step = now.step + 1;
q.push(nxt);
}
}
}
if(!fl)
{
cout << -1 << endl;
}
}
}