1. 程式人生 > >leetcode:(187) Repeated DNA Sequence(java)

leetcode:(187) Repeated DNA Sequence(java)

/**
 * 題目:
 *      All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG".
 *      When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.
 *      Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.
 *      Example:
 *          Input: s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT"
 *          Output: ["AAAAACCCCC", "CCCCCAAAAA"]
 * 解題思路:
 *      從字串的第一個字元開始遍歷,每次遍歷10個元素為一組新增到set中,若新增不成功,則代表set中已經存在相同的10個元素,
 *      然後將其新增到repeated中,最後將repeated轉換成list形式返回。
 */

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;

public class FindRepeatedDnaSequence_187_1021 {
    public List<String> FindRepeatedDnaSequence(String s) {
        HashSet<String> set = new HashSet<>();
        HashSet<String> repeated = new HashSet<>();

        for (int i = 0; i + 9 < s.length(); i++) {
            String temp = s.substring(i, i + 10);
            if (!set.add(temp)) {
                repeated.add(temp);
            }
        }
        List<String> result = new ArrayList(repeated);
        return result;
    }

    public static void main(String[] args) {
        String s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT";

        FindRepeatedDnaSequence_187_1021 test = new FindRepeatedDnaSequence_187_1021();
        List<String> result = test.FindRepeatedDnaSequence(s);
        System.out.println(result);
    }
}