1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
class Solution {
public List<String> findRepeatedDnaSequences(String s) {
if (s.length() <= 10) {
return new ArrayList<>();
}
int left = 0, right = 0, hash = 0;
// Initialize the winodw
for (right = 0; right < 10; right++) {
int currDigit = getDigit(s.charAt(right));
hash = hash * 10 + currDigit;
}
Map<Integer, Integer> visited = new HashMap<>();
visited.put(hash, 1);
List<String> ans = new ArrayList<>();
int msbOffset = 1000000000;
while (right < s.length()) {
hash = (hash - msbOffset * (getDigit(s.charAt(left)))) * 10 + getDigit(s.charAt(right));
if (visited.containsKey(hash) && visited.get(hash) == 1) {
ans.add(s.substring(left + 1, right + 1));
}
visited.put(hash, visited.getOrDefault(hash, 0) + 1);
left += 1;
right += 1;
}
return ans;
}

private int getDigit(char c) {
int currDigit = 0;
switch (c) {
case 'A':
currDigit = 1;
break;
case 'C':
currDigit = 2;
break;
case 'G':
currDigit = 3;
break;
case 'T':
currDigit = 4;
break;
default:
currDigit = 0;
break;
}
return currDigit;
}
}

这道题获得的重要思想就是如果问某个substring是否出现多次的时候, 尝试用计算哈希值的方式来去identify不同的substring.