[leetcode] Similar String Groups
題目
Two strings X and Y are similar if we can swap two letters (in different positions) of X, so that it equals Y.
For example, “tars” and “rats” are similar (swapping at positions 0 and 2), and “rats” and “arts” are similar, but “star” is not similar to “tars”, “rats”, or “arts”.
Together, these form two connected groups by similarity: {“tars”, “rats”, “arts”} and {“star”}. Notice that “tars” and “arts” are in the same group even though they are not similar. Formally, each group is such that a word is in the group if and only if it is similar to at least one other word in the group.
We are given a list A of strings. Every string in A is an anagram of every other string in A. How many groups are there?
Example 1:
Input: [“tars”,“rats”,“arts”,“star”] Output: 2
Note:
- A.length <= 2000
- A[i].length <= 1000
- A.length * A[i].length <= 20000
- All words in A consist of lowercase letters only.
- All words in A have the same length and are anagrams of each other.
- The judging time limit has been increased for this question.
DFS
思路
先放下如何判定“相似”的問題,題目中,如果兩個字串相似,那麼他們可以被分到同一個組裡。問題啟發我們構造圖,若兩個字串相似,那麼在圖中有這兩個節點有邊相連。問題便轉化為一個求圖中連通分量的問題了。
程式碼
class Solution {
public:
int numSimilarGroups(vector< string>& A) {
typedef struct {
string& s;
vector<int> next;
} Node;
vector<Node> graph;
for(auto iter = A.begin(); iter != A.end(); ++iter) {
graph.push_back(Node{ *iter, vector<int>() });
}
for(int i = 0; i != A.size(); ++i) {
for(int j = i+1; j != A.size(); ++j) {
if(isSimilar(A[i], A[j])) {
graph[i].next.push_back(j);
graph[j].next.push_back(i);
}
}
}
function<void (int)> dfs;
vector<bool> hasVisited(A.size(), false);
dfs = [&dfs, &hasVisited, &graph] (int begin) {
if(hasVisited[begin]) {
return;
}
hasVisited[begin] = true;
for(auto i : graph[begin].next) {
dfs(i);
}
};
int groupCount = 0;
for(int i = 0; i != A.size(); ++i) {
if(!hasVisited[i]) {
++groupCount;
dfs(i);
}
}
return groupCount;
}
private:
bool isSimilar(string& a, string& b) {
if(a.size() != b.size()) {
return false;
}
int n = a.size();
vector<int> diff;
for(int i = 0; i != n; ++i) {
if(a[i] != b[i]) {
diff.push_back(i);
}
}
if(diff.size() == 2) {
return a[diff[0]] == b[diff[1]]
&& a[diff[1]] == b[diff[0]];
} else if(diff.size() == 0) {
vector<bool> appear(26, false);
for(int i = 0; i != n; ++i) {
if(a[i] == b[i]) {
if(appear[a[i] - 'a']) {
return true;
} else {
appear[a[i] - 'a'] = true;
}
}
}
}
return false;
};
}; };
int groupCount = 0;
for(int i = 0; i != A.size(); ++i) {
if(!hasVisited[i]) {
++groupCount;
dfs(i);
}
}
return groupCount;
}
};
並查集
事實上,處理這種圖中有多少分量的問題,一種更優雅的方式是使用並查集,這樣子函式會有下面的形式。
int numSimilarGroups(vector<string>& A) {
disjoint_set ds(A.size());
for (int i = 0; i < A.size(); i++)
for (int j = i + 1; j < A.size(); j++)
if (isSimilar(A[i], A[j]))
ds.join(i, j);
return ds.size();
}
其中並查集的定義為:
class disjoint_set {
vector<int> v;
int sz;
public:
disjoint_set(int n) {
makeset(n);
}
void makeset(int n) {
v.resize(n);
iota(v.begin(), v.end(), 0);
sz = n;
}
int find(int i) {
if (i != v[i])
v[i] = find(v[i]);
return v[i];
}
void join(int i, int j) {
int ri = find(i), rj = find(j);
if (ri != rj) {
v[ri] = rj;
sz--;
}
}
int size() {
return sz;
}
};
時間複雜度
演算法的複雜度主要部分為演算法中的二重迴圈,複雜度均為