Saturday, November 28, 2015

Airbnb: K Edit Distance

Related: LeetCode 161 - One Edit DistanceLeetCode 72 - Edit Distance
Buttercola: Airbnb: K Edit Distance
Given a list of word and a target word, output all the words for each the edit distance with the target no greater than k.

e.g. [abc, abd, abcd, adc], target "ac", k = 1,

output = [abc, adc]

Naive Solution:
A naive solution would be, for each word in the list, calculate the edit distance with the target word. If it is equal or less than k, output to the result list.

If we assume that the average length of the words is m, and the total number of words in the list is n, the total time complexity is O(n * m^2).

A better solution with a trie:
The problem with the previous solution is if the given list of the words is like ab, abc, abcd, each time we need to repeatedly calculate the edit distance with the target word. If we can combine the prefix of all words together, we can save lots of time.
`  ``public` `List<String> getKEditDistance(String[] words, String target, ``int` `k) {`
`    ``List<String> result = ``new` `ArrayList<>();`
`    `
`    ``if` `(words == ``null` `|| words.length == ``0` `|| `
`        ``target == ``null` `|| target.length() == ``0` `|| `
`        ``k < ``0``) {`
`      ``return` `result;`
`    ``}`
`    `
`    ``Trie trie = ``new` `Trie();`
`    ``for` `(String word : words) {`
`      ``trie.add(word);`
`    ``}`
`    `
`    ``TrieNode root = trie.root;`
`    `
`    ``int``[] prev = ``new` `int``[target.length() + ``1``];`
`    ``for` `(``int` `i = ``0``; i < prev.length; i++) {`
`      ``prev[i] = i;`
`    ``}`
`    `
`    ``getKEditDistanceHelper(``""``, target, k, root, prev, result);`
`    `
`    ``return` `result;`
`  ``}`
`  `
`  ``private` `void` `getKEditDistanceHelper(String curr, String target, ``int` `k, TrieNode root, `
`                                      ``int``[] prevDist, List<String> result) {`
`    ``if` `(root.isLeaf) {`
`      ``if` `(prevDist[target.length()] <= k) {`
`        ``result.add(curr);`
`      ``} ``else` `{`
`        ``return``;`
`      ``}`
`    ``}`
`    `
`    ``for` `(``int` `i = ``0``; i < ``26``; i++) {`
`      ``if` `(root.children[i] == ``null``) {`
`        ``continue``;`
`      ``}`
`      `
`      ``int``[] currDist = ``new` `int``[target.length() + ``1``];`
`      ``currDist[``0``] = curr.length() + ``1``;`
`      ``for` `(``int` `j = ``1``; j < prevDist.length; j++) {`
`        ``if` `(target.charAt(j - ``1``) == (``char``) (i + ``'a'``)) {`
`          ``currDist[j] = prevDist[j - ``1``];`
`        ``} ``else` `{`
`          ``currDist[j] = Math.min(Math.min(prevDist[j - ``1``], prevDist[j]), `
`                                 ``currDist[j - ``1``]) + ``1``;`
`        ``}`
`      ``}`
`      `
`      ``getKEditDistanceHelper(curr + (``char``) (i + ``'a'``), target, k, `
`         ``root.children[i], currDist, result);`
`    ``}`
`  ``}`
`  `
`  ``public` `static` `void` `main(String[] args) {`
`    ``Solution solution = ``new` `Solution();`
`    ``String[] input = ``new` `String[]{``"abcd"``, ``"abc"``, ``"abd"``, ``"ad"``};`
`    ``String target = ``"ac"``;`
`    ``int` `k = ``1``;`
`    `
`    ``List<String> result = solution.getKEditDistance(input, target, k);`
`    `
`    ``for` `(String s : result) {`
`      ``System.out.println(s);`
`    ``}`
`  ``}`
`  `
`  ``class` `TrieNode {`
`    ``TrieNode[] children;`
`    ``boolean` `isLeaf;`
`    `
`    ``public` `TrieNode() {`
`      ``children = ``new` `TrieNode[``26``];`
`      `
`    ``}`
`  ``}`
`  `
`  ``class` `Trie {`
`    ``TrieNode root;`
`    `
`    ``public` `Trie() {`
`      ``root = ``new` `TrieNode();`
`    ``}`
`    `
`    ``// Add a word into trie`
`    ``public` `void` `add(String s) {`
`      ``if` `(s == ``null` `|| s.length() == ``0``) {`
`        ``return``;`
`      ``}`
`      `
`      ``TrieNode p = root;`
`      ``for` `(``int` `i = ``0``; i < s.length(); i++) {`
`        ``char` `c = s.charAt(i);`
`        ``if` `(p.children[c - ``'a'``] == ``null``) {`
`          ``p.children[c - ``'a'``] = ``new` `TrieNode();`
`        ``}`
`        `
`        ``if` `(i == s.length() - ``1``) {`
`          ``p.children[c - ``'a'``].isLeaf = ``true``;`
`        ``}`
`        `
`        ``p = p.children[c - ``'a'``];`
`      ``}`
`    ``}`
`  ``}`
https://robinliu.gitbooks.io/leetcode/content/trie.html

``````     c
/ | \
b  s  t
/
y
``````

``````          c a t
[ 0 1 2 3 ] <- prev_dist
-> c  [ 1 0 1 2 ] <- cur_dist
cb [ 2 1 1 2 ]
cs [ 2 1 1 2 ]
ct [ 2 1 1 1 ]
``````

http://ninefu.github.io/blog/KEditDistance/
``` public static void main(String[] args) {
KEditDistance kEditDistance = new KEditDistance();
TrieNode root = kEditDistance.new TrieNode();
String[] lists = new String[] {"abcd", "abc", "abd","ad"};
String word = "ac";
int k = 1;
List<String> result = kEditDistance.getKEditDistance(lists, word, k, root);
for (String s : result) {
System.out.println(s);
}
}

public List<String> getKEditDistance(String[] lists, String target, int k, TrieNode root) {
List<String> result = new LinkedList<>();
if (lists == null || lists.length == 0 || target == null || target.length() == 0 || k < 0) {
return result;
}
for (String s : lists) {
}
int[] prevDist = new int[target.length() + 1];
for (int i = 0; i < prevDist.length; i++) {
prevDist[i] = i;
}

computeDistance("", prevDist, target, k, root, result);
return result;
}

public void computeDistance(String cur, int[] prevDist, String target, int k, TrieNode node, List<String> result) {
if (node.isWord) {
if (prevDist[target.length()] <= k) {
} else {
return;
}
}

for (int i = 0; i < 26; i++) {
if (node.children[i] == null) {
continue;
}
//   print(node);

int[] curDist = new int[target.length() + 1];
curDist[0] = cur.length() + 1;
for (int j = 1; j < prevDist.length; j++) {
if (target.charAt(j - 1) == (char) (i + 'a')) {
curDist[j] = prevDist[j - 1];
} else {
curDist[j] = Math.min(Math.min(prevDist[j - 1], prevDist[j]), curDist[j - 1]) + 1;
}
}

computeDistance(cur + (char)(i + 'a'), curDist, target, k, node.children[i], result);
}
}

public  void print(TrieNode root) {
TrieNode cur = root;
int level = 0;
Queue<TrieNode> queue = new LinkedList<>();
while (!queue.isEmpty()) {
int size = queue.size();
while (size > 0) {
cur = queue.poll();
System.out.println("level " + level + cur.character);
for (int i = 0; i < cur.children.length; i++) {
if (cur.children[i] != null) {
}
}
size--;
}
level++;
}

}

class TrieNode {
boolean isWord;
TrieNode[] children;
char character;

public TrieNode() {
children = new TrieNode[26];
}

public void add(String s) {
if (s == null || s.length() == 0) {
return;
}
TrieNode cur = this;
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (cur.children[c - 'a'] == null) {
cur.children[c - 'a'] = new TrieNode();
}
cur = cur.children[c - 'a'];
cur.character = c;
}
cur.isWord = true;
}
}```
http://elise.co.nf/?p=955

``` public List<String> findKEditDistance(String[] strs, String target, int k) {
List<String> res = new ArrayList<>();
if(strs.length==0) return res;
Trie trie = new Trie();
TrieNode root = trie.buildTrie(strs);
int[] distance = new int[target.length()+1];
for(int i = 0;i<distance.length;i++) distance[i] = i;
helper(target,res,root,distance,k,"");
return res;
}
private void helper(String target, List<String> res, TrieNode root,
int[] distance, int k, String tmp) {
if(root.isWord) {
return;
}

for(int i = 0;i<26;i++) {
if(root.children[i]==null) continue;
int[] curdistance = new int[target.length()+1];
//this +1 is to add the processing character, which is (char)(i+'a')
curdistance[0] = tmp.length()+1;
//use curdistance and distance to represent edit distance for tmp and target
//distance[j] means edit distance between tmp and target[0,j)
for(int j = 1;j<distance.length;j++) {
if(target.charAt(j-1)==(char)(i+'a')) curdistance[j] = distance[j-1];
// if character matches, no insert, delete or replace. so no increasing edit distance
//current edit distance for tmp and target[0,j) is distance[j-1]
else {
//not match, either delete, insert or replace character on tmp
curdistance[j] = Math.min(Math.min(distance[j-1], distance[j]), curdistance[j-1])+1; //+1?
//distance[j-1] means replace tmp.charAt(j-1) with target.charAt(j-1)
//distance[j] means delete tmp.charAt(j-1)
//curdistance[j-1] means add target.charAt(j-1) after tmp.charAt(j-1)
}
}
helper(target,res,root.children[i],curdistance,k,tmp+(char)(i+'a'));
}
}
}
class TrieNode {
public TrieNode[] children = new TrieNode[26];
public boolean isWord = false;
}
class Trie {
public void insert(TrieNode root, String str) {
TrieNode cur = root;
for(int i = 0;i<str.length();i++) {
char c = str.charAt(i);
if(cur.children[c-'a']==null) cur.children[c-'a'] = new TrieNode();
cur = cur.children[c-'a'];
}
cur.isWord = true;
}
public TrieNode buildTrie(String[] strs) {
TrieNode root = new TrieNode();
for(String str:strs) insert(root,str);
return root;
}
}```
http://lhearen.top/2016/10/16/K-Edit-Distance/

http://stevehanov.ca/blog/index.php?id=114