Massive Algorithms: LeetCode 792 - Number of Matching Subsequences

LeetCode 792 - Number of Matching Subsequences

https://leetcode.com/problems/number-of-matching-subsequences/description/

Given string S and a dictionary of words words, find the number of words[i] that is a subsequence of S.

Example :
Input: 
S = "abcde"
words = ["a", "bb", "acd", "ace"]
Output: 3
Explanation: There are three words in words that are a subsequence of S: "a", "acd", "ace".

Note:

All words in words and S will only consists of lowercase letters.
The length of S will be in the range of [1, 50000].
The length of words will be in the range of [1, 5000].
The length of words[i] will be in the range of [1, 50].

http://zxi.mytechroad.com/blog/string/leetcode-792-number-of-matching-subsequences/

Solution 2: Indexing+ Binary Search

Time complexity: O(S + W * L * log(S))

Space complexity: O(S)

S: length of S

W: number of words

L: length of a word

  private List<List<Integer>> pos;

  private boolean isMatch(String word) {

    int l = -1;

    for (char c : word.toCharArray()) {

      List<Integer> p = pos.get(c);

      int index = Collections.binarySearch(p, l + 1);

      if (index < 0) index = -index - 1;

      if (index >= p.size()) return false;

      l = p.get(index);

}

    return true;

}

  public int numMatchingSubseq(String S, String[] words) {

    pos = new ArrayList<>();

    for (int i = 0; i < 128; ++i) 

      pos.add(new ArrayList<Integer>());

    char[] s = S.toCharArray();

    for (int i = 0; i < s.length; ++i)

      pos.get(s[i]).add(i);

    int ans = 0;

    for (String word : words)

      if (isMatch(word)) ++ans;

    return ans;

}

http://bookshadow.com/weblog/2018/03/04/leetcode-number-of-matching-subsequences/

动态规划（Dynamic Programming）

dp[x][c]表示S[x]之后字符c第一次出现的位置

利用O(len(S))的代价，可以预处理出上述dp数组

对于字典words中的单词word，以O(len(word))的代价可以判断该单词是否为S的子序列

综上，时间复杂度为O(len(S) + len(word) * len(words))

public int numMatchingSubseq(String S, String[] words) { int slen = S.length(); int[] last = new int[26]; int[][] dp = new int[slen + 1][26]; for (int i = 0; i < slen; i++) { int idx = S.charAt(i) - 'a'; for (int x = last[idx]; x <= i; x++) { dp[x][idx] = i + 1; } last[idx] = i + 1; } int cnt = 0; for (String word: words) { cnt += check(dp, word); } return cnt; } public int check(int[][] dp, String word) { int idx = 0; int wlen = word.length(); for (int i = 0; i < wlen; i++) { idx = dp[idx][word.charAt(i) - 'a']; if (idx == 0) return 0; } return 1; }

https://zhuanlan.zhihu.com/p/34230881

第一步：把words[i]按首字母分别加入到相应的List中。

第二步：按字符遍历S，每次取出首字母等于S[i]的List。如果List中的words[i]的长度为1，则是子序列，数量+1。若长度不为1，则去掉第一个字符，将后面的字符串按首字母分别加入到相应的List中去。

    public int numMatchingSubseq(String S, String[] words) {
        Map<Character, Deque<String>> map = new HashMap<>();
        for (char c = 'a'; c <= 'z'; c++) {
            map.putIfAbsent(c, new LinkedList<String>());
        }
        for (String word : words) {
            map.get(word.charAt(0)).addLast(word);
        }

        int count = 0;
        for (char c : S.toCharArray()) {
            Deque<String> queue = map.get(c);
            int size = queue.size();
            for (int i = 0; i < size; i++) {
                String word = queue.removeFirst();
                if (word.length() == 1) {
                    count++;
                } else {
                    map.get(word.charAt(1)).addLast(word.substring(1));
                }
            }
        }
        return count;
    }
}

http://blog.jerkybible.com/2018/03/09/LeetCode-792-Number-of-Matching-Subsequences/

首先定义一个数组index，长度为words的长度，index中index[i]代表words[i]接下来应当匹配是否在S中出现的字符位置，初始化都为0。当words[i]满足为S子序列时，index[i]=-1。
对S的字符从前到后开始遍历，假设当前为S[j]，遍历index：

当index[i] == -1时，continue；
当S[j] == words[i].charAt(index[i])时，index[i]++；
当index[i] >= words[i].length()，满足条件，总数加1，并且index[i] = -1。
需要注意的是，如果上一轮S[j]时，没有任何words[i].charAt(index[i])为S[j]，则当S[j + 1] == S[j]时直接跳过此轮。


public int numMatchingSubseq(String S, String[] words) {

  int sum = 0;

  int[] index = new int[words.length];

  for (int i = 0; i < words.length; i++) {

      index[i] = 0;

  }



  boolean hasMatch = false;

  char[] cs = S.toCharArray();

  for (int m = 0; m < cs.length; m++) {

      char c = cs[m];

      if (!hasMatch && m > 0 && cs[m - 1] == c) {

          continue;

      }

      if (hasMatch) hasMatch = false;

      for (int i = 0; i < words.length; i++) {

          if (index[i] == -1) {

              continue;

          }

          if (c == words[i].charAt(index[i])) {

              index[i]++;

              if (index[i] >= words[i].length()) {

                  sum++;

                  index[i] = -1;

                  hasMatch = true;

              }

          }

      }

  }

  return sum;



}

Solution 1: Brute Force

Time complexity: O((S + L) * W)

C++ w/o cache TLE

Space complexity: O(1)

C++ w/ cache 155 ms

Space complexity: O(W * L)

  int numMatchingSubseq(const string& S, vector<string>& words) {    

    int ans = 0;

    unordered_map<string, bool> m;

    for (const string& word : words) {

      auto it = m.find(word);

      if (it == m.end()) {

        bool match = isMatch(word, S);

        ans += m[word] = match;

      } else {

        ans += it->second;

}

}

    return ans;

}

private:

  bool isMatch(const string& word, const string& S) {    

    int start = 0;

    for (const char c : word) {

      bool found = false;

      for (int i = start; i < S.length(); ++i)

        if (S[i] == c) {

          found = true;

          start = i + 1;          

          break;

}

      if (!found) return false;

}

    return true;

}

https://zhuanlan.zhihu.com/p/34230881

public boolean isSubsequence(String s, String t) {
    char[] charS = s.toCharArray();
    char[] charT = t.toCharArray();
    int i = 0;
    int j = 0;
    while (i < s.length() && j < t.length()) {
        if (charS[i] == charT[j]) {
            i++;
        }
        j++;
    }
    return i == s.length();
}


相似题目在后续工作中写到，如果输入很多S，你想一个一个的检测是否是字符串t的子序列，你应该怎么修改你的代码？

本题应该就是上面写的后续工作。这个题目是上周比赛的第二题。

最开始我是使用上面的代码依次判断是否是子序列，这样提交出现超时，需要对代码进行优化。

优化1：添加hash，统计字符出现的次数，若words[i]字符数量不对，直接返回false。

优化2：添加set，统计已经判断过为子序列的words[i]，防止重复判断。


public int numMatchingSubseq(String S, String[] words) {
    int ans = 0;
    Set<String> subsequenceSet = new HashSet<String>();
    int[] alphabet = new int[26];
    char[] charS = S.toCharArray();
    for (int i = 0; i < charS.length; i++) {
        alphabet[charS[i] - 'a']++;
    }
    for (int i = 0; i < words.length; i++) {
        if (subsequenceSet.contains(words[i])) {
            ans++;
        }else if(isSubsequence(charS, alphabet, words[i])) {
            subsequenceSet.add(words[i]);
            ans++;
        }
    }
    return ans;
}

private boolean isSubsequence(char[] charS, int[] alphabet, String word) {
    char[] charWord = word.toCharArray();
    int[] alphWord = new int[26];
    for (int i = 0; i < charWord.length; i++) {
        alphWord[charWord[i] - 'a']++;
    }
    for (int i = 0; i < 26; i++) {
        if (alphWord[i] > alphabet[i]) {
            return false;
        }
    }
    int m = 0;
    int n = 0;
    while (m < charS.length && n < charWord.length) {
        if (charS[m] == charWord[n]) {
            n++;
        }
        m++;
    }

    return n == charWord.length;
}

public int numMatchingSubseq(String S, String[] words) {

if (S == null || words == null)

return 0;

int count = 0;

Map<String, Boolean> cache = new HashMap<>();

for (String word : words) {

if (cache.containsKey(word)) {

count += cache.get(word) == true ? 1 : 0;

continue;

}

boolean isSubseq = isSubseq(S, word);

cache.put(word, isSubseq);

if (isSubseq) {

count++;

}

return count;

}

private boolean isSubseq(String S, String word) {

int wordPos = 0;

for (Character sCh : S.toCharArray()) {

if (sCh == word.charAt(wordPos)) {

wordPos++;

}

if (wordPos == word.length()) {

break;

}

return wordPos == word.length();

}

LeetCode 792 - Number of Matching Subsequences

Labels

Popular Posts