LeetCode(140) Word Break II
題目如下:
Given a string s and a dictionary of words dict, add spaces in s to construct a sentence where each word is a valid dictionary word.
Return all such possible sentences.
For example, given
s = "catsanddog",
dict = ["cat", "cats", "and", "sand", "dog"].
A solution is ["cats and dog", "cat sand dog"].
分析如下:
題目可以用trie來做,也可以用DP + DFS來做。
這裡先說說DP + DFS的思路。
首先,和Word Break I類似,用DP來判斷輸入string是否能夠被break,並且在其中記錄中間結果。這個中間結果記錄了所有的前驅下標,是下一步DFS的基礎,等下細細講解。
然後,借用word ladder II的求所有路徑的思想,對中間結果進行dfs,從而找到所有的路徑。
現在來講中間結果。
先看下面的例子。
s = "catsand",
dict = ["cat", "cats", "and", "sand", ].
那麼,我希望用一個數組result_node來記錄,假設在最後一點result_node[length - 1] 可以break的話,都可以從哪些點開始break到最後一點。
length(s) = 7
result_node[6] = {2, 3},
因為可以從cats 這裡切分,"catsand" = "cats" + "and"
也可以從cat這裡切分, "catsand" = "cat" + "sand"
也就是說,最後求路的時候,"d"節點(index=6)的前一個節點是"s"節點(index = 3) , 或者"t"節點(index 2);
如果以下標0為開始,以當前下標j為結束的子串正好在dictionary中,那麼把當前下標的前驅就記為-1,並計入前驅集合中.例如,“catsand” 下標"t" 正好有"cat"在dictionary中,所以result_node[2] = -1,同理result_node[3] = -1;
再看一個比較麻煩的例子。
unordered_set<string> dict = {"a", "aa"};
string s = "aaaa";
result_node[0] = {-1};
result_node[1] = {-1, 0};
result_node[2] = {0, 1};
result_node[3] = {1, 2};
現在,使用DP,從頭到尾把輸入字元傳掃一遍,並且記錄每個節點的前驅,最後進行DFS並且按照要求的格式把結果輸出就可以了。
容易出錯的地方是,因為記錄了-1這個前驅,所以最後dfs的時候,還需要注意這個特殊值。見程式碼。
我的程式碼:
//30ms
class Solution {
public:
void dfs(vector<vector<int> > & result_node, int start, vector<int> & path, vector<vector<int> > &paths) {
if (start == -1) {
// BUG1:
// C++ pass by value, so first insert into paths, then edit(pop_back(),which holds the value -1) the value of path.
// If pop_back() path happens before it is inserted into the paths, then the next round in dfs will be affected.
paths.push_back(path);
paths.back().pop_back();
reverse(paths.back().begin(), paths.back().end());
return;
} else {
for (int i = 0; i < result_node[start].size(); ++i) {
// BUG2:
// path.push_back(i);
path.push_back(result_node[start][i]);
dfs(result_node, result_node[start][i], path, paths);
path.pop_back();
}
}
}
void format_result(string &s ,vector<vector<int> > &paths, vector<string> &final_paths) {
for (int i = 0; i < paths.size(); ++i) {
// BUG3:
// the first component of the word string is simply the first word
// the last delimiter(as in the example, it should be ) should be inserted
paths[i].push_back(s.length() -1);
string tmp = s.substr(0, paths[i][0] + 1);
// BUG4:
// the second component of the word string is the space sign " " and the word.
for (int j = 1;j < paths[i].size(); ++j) {
tmp += " ";
tmp += s.substr(paths[i][j - 1] + 1, paths[i][j] - paths[i][j - 1]);
}
final_paths.push_back(tmp);
}
}
vector<string> wordBreak(string s, unordered_set<string> &dict) {
vector<bool> result_bool(s.length(), false);
vector<vector<int> > result_node(s.length());
vector<string> final_paths;
if (dict.empty()) return final_paths;
if (dict.find(s) != dict.end()) {
final_paths.push_back(s);
return final_paths;
}
for (int i = 0; i < result_bool.size(); ++i) {
if (dict.find(s.substr(0, i + 1)) != dict.end()) {
result_bool[i] = true;
result_node[i].push_back(-1);
}
for (int j = 0; j < i; ++j) {
if ((result_bool[j] == true) && (dict.find(s.substr(j + 1, i -j))!= dict.end())) {
result_bool[i] = true;
result_node[i].push_back(j);
}
}
}
vector<vector<int> > paths;
// BUG5:
// similar to Word Break I, use a bool array to record if the word can be segmented.
// before dfs, should test whether dfs is needed
if ( !result_bool[s.length() - 1]) {
return final_paths;
}
vector<int> path;
dfs(result_node, result_node.size() - 1, path, paths);
format_result(s, paths, final_paths);
return final_paths;
}
};