https://leetcode.com/problems/number-of-matching-subsequences/description/
Given string
S
and a dictionary of words words
, find the number of words[i]
that is a subsequence of S
.Example : Input: S = "abcde" words = ["a", "bb", "acd", "ace"] Output: 3 Explanation: There are three words inwords
that are a subsequence ofS
: "a", "acd", "ace".
Note:
- All words in
words
andS
will only consists of lowercase letters. - The length of
S
will be in the range of[1, 50000]
. - The length of
words
will be in the range of[1, 5000]
. - The length of
words[i]
will be in the range of[1, 50]
.
Solution 2: Indexing+ Binary Search
Time complexity: O(S + W * L * log(S))
Space complexity: O(S)
S: length of S
W: number of words
L: length of a word
private List<List<Integer>> pos;
private boolean isMatch(String word) {
int l = -1;
for (char c : word.toCharArray()) {
List<Integer> p = pos.get(c);
int index = Collections.binarySearch(p, l + 1);
if (index < 0) index = -index - 1;
if (index >= p.size()) return false;
l = p.get(index);
}
return true;
}
public int numMatchingSubseq(String S, String[] words) {
pos = new ArrayList<>();
for (int i = 0; i < 128; ++i)
pos.add(new ArrayList<Integer>());
char[] s = S.toCharArray();
for (int i = 0; i < s.length; ++i)
pos.get(s[i]).add(i);
int ans = 0;
for (String word : words)
if (isMatch(word)) ++ans;
return ans;
}
动态规划(Dynamic Programming)
dp[x][c]表示S[x]之后字符c第一次出现的位置
利用O(len(S))的代价,可以预处理出上述dp数组
对于字典words中的单词word,以O(len(word))的代价可以判断该单词是否为S的子序列
综上,时间复杂度为O(len(S) + len(word) * len(words))
public int numMatchingSubseq(String S, String[] words) {
int slen = S.length();
int[] last = new int[26];
int[][] dp = new int[slen + 1][26];
for (int i = 0; i < slen; i++) {
int idx = S.charAt(i) - 'a';
for (int x = last[idx]; x <= i; x++) {
dp[x][idx] = i + 1;
}
last[idx] = i + 1;
}
int cnt = 0;
for (String word: words) {
cnt += check(dp, word);
}
return cnt;
}
public int check(int[][] dp, String word) {
int idx = 0;
int wlen = word.length();
for (int i = 0; i < wlen; i++) {
idx = dp[idx][word.charAt(i) - 'a'];
if (idx == 0) return 0;
}
return 1;
}
2.
第一步:把words[i]按首字母分别加入到相应的List中。
第二步:按字符遍历S,每次取出首字母等于S[i]的List。如果List中的words[i]的长度为1,则是子序列,数量+1。若长度不为1,则去掉第一个字符,将后面的字符串按首字母分别加入到相应的List中去。
public int numMatchingSubseq(String S, String[] words) {
Map<Character, Deque<String>> map = new HashMap<>();
for (char c = 'a'; c <= 'z'; c++) {
map.putIfAbsent(c, new LinkedList<String>());
}
for (String word : words) {
map.get(word.charAt(0)).addLast(word);
}
int count = 0;
for (char c : S.toCharArray()) {
Deque<String> queue = map.get(c);
int size = queue.size();
for (int i = 0; i < size; i++) {
String word = queue.removeFirst();
if (word.length() == 1) {
count++;
} else {
map.get(word.charAt(1)).addLast(word.substring(1));
}
}
}
return count;
}
}
首先定义一个数组
对
index
,长度为words
的长度,index
中index[i]
代表words[i]
接下来应当匹配是否在S中出现的字符位置,初始化都为0
。当words[i]
满足为S子序列时,index[i]=-1
。对
S
的字符从前到后开始遍历,假设当前为S[j],遍历index:- 当
index[i] == -1
时,continue; - 当
S[j] == words[i].charAt(index[i])
时,index[i]++; - 当
index[i] >= words[i].length()
,满足条件,总数加1,并且index[i] = -1
。
需要注意的是,如果上一轮S[j]
时,没有任何words[i].charAt(index[i])
为S[j]
,则当S[j + 1] == S[j]
时直接跳过此轮。
public int numMatchingSubseq(String S, String[] words) {int sum = 0;int[] index = new int[words.length];for (int i = 0; i < words.length; i++) {index[i] = 0;}boolean hasMatch = false;char[] cs = S.toCharArray();for (int m = 0; m < cs.length; m++) {char c = cs[m];if (!hasMatch && m > 0 && cs[m - 1] == c) {continue;}if (hasMatch) hasMatch = false;for (int i = 0; i < words.length; i++) {if (index[i] == -1) {continue;}if (c == words[i].charAt(index[i])) {index[i]++;if (index[i] >= words[i].length()) {sum++;index[i] = -1;hasMatch = true;}}}}return sum;}
Solution 1: Brute Force
Time complexity: O((S + L) * W)
C++ w/o cache TLE
Space complexity: O(1)
C++ w/ cache 155 ms
Space complexity: O(W * L)
int numMatchingSubseq(const string& S, vector<string>& words) {
int ans = 0;
unordered_map<string, bool> m;
for (const string& word : words) {
auto it = m.find(word);
if (it == m.end()) {
bool match = isMatch(word, S);
ans += m[word] = match;
} else {
ans += it->second;
}
}
return ans;
}
private:
bool isMatch(const string& word, const string& S) {
int start = 0;
for (const char c : word) {
bool found = false;
for (int i = start; i < S.length(); ++i)
if (S[i] == c) {
found = true;
start = i + 1;
break;
}
if (!found) return false;
}
return true;
}
public boolean isSubsequence(String s, String t) {
char[] charS = s.toCharArray();
char[] charT = t.toCharArray();
int i = 0;
int j = 0;
while (i < s.length() && j < t.length()) {
if (charS[i] == charT[j]) {
i++;
}
j++;
}
return i == s.length();
}
相似题目在后续工作中写到,如果输入很多S,你想一个一个的检测是否是字符串t的子序列,你应该怎么修改你的代码?
本题应该就是上面写的后续工作。这个题目是上周比赛的第二题。
最开始我是使用上面的代码依次判断是否是子序列,这样提交出现超时,需要对代码进行优化。
优化1:添加hash,统计字符出现的次数,若words[i]字符数量不对,直接返回false。
优化2:添加set,统计已经判断过为子序列的words[i],防止重复判断。
public int numMatchingSubseq(String S, String[] words) {
int ans = 0;
Set<String> subsequenceSet = new HashSet<String>();
int[] alphabet = new int[26];
char[] charS = S.toCharArray();
for (int i = 0; i < charS.length; i++) {
alphabet[charS[i] - 'a']++;
}
for (int i = 0; i < words.length; i++) {
if (subsequenceSet.contains(words[i])) {
ans++;
}else if(isSubsequence(charS, alphabet, words[i])) {
subsequenceSet.add(words[i]);
ans++;
}
}
return ans;
}
private boolean isSubsequence(char[] charS, int[] alphabet, String word) {
char[] charWord = word.toCharArray();
int[] alphWord = new int[26];
for (int i = 0; i < charWord.length; i++) {
alphWord[charWord[i] - 'a']++;
}
for (int i = 0; i < 26; i++) {
if (alphWord[i] > alphabet[i]) {
return false;
}
}
int m = 0;
int n = 0;
while (m < charS.length && n < charWord.length) {
if (charS[m] == charWord[n]) {
n++;
}
m++;
}
return n == charWord.length;
}
public int numMatchingSubseq(String S, String[] words) {
if (S == null || words == null)
return 0;
int count = 0;
Map<String, Boolean> cache = new HashMap<>();
for (String word : words) {
if (cache.containsKey(word)) {
count += cache.get(word) == true ? 1 : 0;
continue;
}
boolean isSubseq = isSubseq(S, word);
cache.put(word, isSubseq);
if (isSubseq) {
count++;
}
}
return count;
}
private boolean isSubseq(String S, String word) {
int wordPos = 0;
for (Character sCh : S.toCharArray()) {
if (sCh == word.charAt(wordPos)) {
wordPos++;
}
if (wordPos == word.length()) {
break;
}
}
return wordPos == word.length();
}