Massive Algorithms: Find the shortest unique prefix

Find the shortest unique prefix - EPI

https://www.geeksforgeeks.org/find-all-shortest-unique-prefixes-to-represent-each-word-in-a-given-list/

Given an array of words, find all shortest unique prefixes to represent each word in the given array. Assume that no word is prefix of another.

Examples:

Input: arr[] = {"zebra", "dog", "duck", "dove"}
Output: dog, dov, du, z
Explanation: dog => dog
             dove = dov 
             duck = du
             z   => zebra

A Simple Solution is to consider every prefix of every word (starting from the shortest to largest), and if a prefix is not prefix of any other string, then print it.

    static class TrieNode 

    { 

        TrieNode[] child = new TrieNode[MAX]; 

        int freq;  // To store frequency 

        TrieNode() { 

            freq =1; 

            for (int i = 0; i < MAX; i++) 

                child[i] = null; 

        } 

    } 

    static TrieNode root; 

    // Method to insert a new string into Trie 

    static void insert(String str) 

    { 

        // Length of the URL 

        int len = str.length(); 

        TrieNode pCrawl = root; 

        // Traversing over the length of given str. 

        for (int level = 0; level<len; level++) 

        { 

            // Get index of child node from current character 

            // in str. 

            int index = str.charAt(level); 

            // Create a new child if not exist already 

            if (pCrawl.child[index] == null) 

                pCrawl.child[index] = new TrieNode(); 

            else

               (pCrawl.child[index].freq)++; 

            // Move to the child 

            pCrawl = pCrawl.child[index]; 

        } 

    } 

    // This function prints unique prefix for every word stored 

    // in Trie. Prefixes one by one are stored in prefix[]. 

    // 'ind' is current index of prefix[] 

    static void findPrefixesUtil(TrieNode root, char[] prefix, 

                          int ind) 

    { 

        // Corner case 

        if (root == null) 

           return; 

        // Base case 

        if (root.freq == 1) 

        { 

           prefix[ind] = '\0'; 

           int i = 0; 

           while(prefix[i] != '\0') 

            System.out.print(prefix[i++]); 

           System.out.print("  "); 

           return; 

        } 

        for (int i=0; i<MAX; i++) 

        { 

           if (root.child[i] != null) 

           { 

               prefix[ind] = (char) i; 

               findPrefixesUtil(root.child[i], prefix, ind+1); 

           } 

        } 

    } 

    // Function to print all prefixes that uniquely 

    // represent all words in arr[0..n-1] 

    static void findPrefixes(String arr[], int n) 

    { 

        // Construct a Trie of all words 

        root = new TrieNode(); 

        root.freq = 0; 

        for (int i = 0; i<n; i++) 

            insert(arr[i]); 

        // Create an array to store all prefixes 

        char[] prefix = new char[MAX_WORD_LEN]; 

        // Print all prefixes using Trie Traversal 

        findPrefixesUtil(root, prefix, 0); 

    }

https://shawnlincoding.wordpress.com/2015/03/25/shortest-unique-prefix-to-represent-word-in-an-array/

看到prefix肯定想到的是trie咯，在trie node里面多加一个field count，表示
这个字符出现多少次
1，insert word into the trie
2, search the word, 找到第一个count为1的node，返回
因为说明这个node下面没有分支了，他就应该是唯一的

    public static final int R = 256;

    private Node root;

    private class Node{

        private int count;

        private boolean isEnd;

        private Node next[] = new Node[R];

        public Node(){

            count = 0;

            isEnd = false;

        }

        public Node(int count, boolean isEnd){

            this.count = count;

            this.isEnd = isEnd;

        }

    }

    public void insert(String str){

        if(root == null) root = new Node();

        Node curr = root;

        for(int i = 0; i < str.length(); i++){

            char c = str.charAt(i);

            if(curr.next[c] == null){

                curr.next[c] = new Node(1, false);

            }else{

                curr.next[c].count++;

            }

            curr = curr.next[c];

        }

        curr.isEnd = true;

    }

    public String shortestPrefix(String str){

        Node curr = root;

        int len = 0;

        for(int i = 0; i < str.length(); i++){

            char c = str.charAt(i);

            curr = curr.next[c];

            len++;

            if(curr.count == 1) break;

        }

        return str.substring(0, len);

    }

    public static void main(String[] args){

        TriePrefix trie = new TriePrefix();

        String[] words = {"by", "sea", "sells", "she", "shells", "shore", "the"};

        String[] words1 = {"zebra", "dog", "duck", "dove"};

        for(String word : words1){

            trie.insert(word);

        }

        String[] res = new String[words1.length];

        for(int i = 0; i < words1.length; i++){

            res[i] = trie.shortestPrefix(words1[i]);

            System.out.println(res[i]);

        }

    }

https://www.geeksforgeeks.org/find-shortest-unique-prefix-every-word-given-list-set-2-using-sorting/

In this post a sorting based approach is discussed. On comparing the string with 2 other most similar strings in the array, we can find its shortest unique prefix. For example, if we sort the array {“zebra”, “dog”, “duck”, “dove”}, we get {“dog”, “dove”, “duck”, “zebra”}. The shortest unique prefix for the string “dove” can be found as:
Compare “dove” to “dog” –> unique prefix for dove is “dov”
Compare “dove” to “duck” –> unique prefix for dove is “do”
Now, the shortest unique prefix for “dove” is the one with greater length between “dov” and “do”. So, it is “dov”.
The shortest unique prefix for first and last string can be found by comparing them with only 1 most similar neighbor on right and left, respectively.
We can sort the array of strings and keep on doing this for every string of the array.

If we want to output the prefixes as the order of strings in the input array, we can store the string and its corresponding index in the hashmap. While adding the prefix to result array, we can get the index of the corresponding string from hashmap and add the prefix to that index.

    public String[] uniquePrefix(String[] a) 

{

        int size = a.length; 

        /* create an array to store the results */

        String[] res = new String[size]; 

        Arrays.fill(res, ""); 

        /* hashmap to store the indexes */

        HashMap<String, Integer> hm = 

                        new HashMap<String, Integer>(); 

        for (int i = 0; i < size; i++) 

            hm.put(a[i], i); 

        /* sort the array of strings */

        Arrays.sort(a); 

        /* compare the first string with its only right neighbor */

        int j = 0; 

        while (j < Math.min(a[0].length()-1, a[1].length()-1)) 

{

            if (a[0].charAt(j) == a[1].charAt(j)) 

                j++; 

            else

                break; 

}

        /* get the index of a[0] from HashMap */

        res[ hm.get(a[0]) ] = a[0].substring(0, j+1); 

        /* Store the unique prefix of a[1] from its 

          left neighbor */

        String temp_prefix= a[1].substring(0, j+1); 

        for (int i = 1; i < size-1; i++) 

{

            /* compute prefix of a[i] unique from its right neighbor */

            j = 0; 

            while (j < Math.min(a[i].length()-1, a[i+1].length()-1)) 

{

                if (a[i].charAt(j) == a[i+1].charAt(j) ) 

                    j++; 

                else

                    break; 

}

            String new_prefix = a[i].substring(0, j+1); 

            /* compare the new prefix with previous prefix */

            if (temp_prefix.length() > new_prefix.length() ) 

                res[ hm.get(a[i]) ] = temp_prefix; 

            else

                res[ hm.get(a[i]) ] = new_prefix; 

            /* store the prefix of a[i+1] unique from its 

               left neighbour */

            temp_prefix = a[i+1].substring(0, j+1); 

}

        /* compute the unique prefix for the last string 

           in sorted array */

        String sec_last = a[size-2]; 

        String last = a[size-1]; 

        j = 0; 

        while (j < Math.min( sec_last.length()-1, last.length()-1)) 

{

            if (sec_last.charAt(j) == last.charAt(j) ) 

                j++; 

            else

                break; 

}

        res[ hm.get(last) ] = last.substring(0, j+1); 

        return res; 

}

http://www.codebytes.in/2014/10/finding-shortest-prefixes-for-strings.html
[We are using two arrays, the one given and the one we'll be using for the prefixes]

1. Make another array for prefixes. Store first characters of original strings in this array.
2. For all the prefixes to the left of it (in the prefix array), check whether this prefix has been used somewhere.
3. If it has been used, check whether you can add on character to the previous duplicate prefix, if not, add one character to the one that is being checked for duplicates.
4. Follow the same procedure for this new updated prefix (whether it was at some previous location in the array or the current one that was being checked, which is now updated)[Recursion]

    public static void checkPrefix(String[] strings, String[] pre, String s, int index){
        
        //System.out.println(Arrays.toString(pre));
        for(int i=index-1;i>=0;--i){
            if(s.matches(pre[i])){ 

                if(s.length()==strings[i].length()){
                    //System.out.println("Can't update the previous one, need to update this one");
                    pre[index] = strings[index].substring(0, s.length()+1);
                    checkPrefix(strings, pre, pre[index], index);
                    return;
                }
                else if(s.length() < strings[i].length()){ 
                    //System.out.println("Can update the previous one");
                    pre[i] = strings[i].substring(0, s.length()+1);
                    checkPrefix(strings, pre, pre[i], i);
                    return;
                } 
            }
        }
    }
    
    public static void findPrefixes(String[] strings){
        System.out.println();
        String[] pre = new String[strings.length];
         
        for(int i=0;i<pre.length;++i){
            if(i>0){
                if(strings[i].matches(strings[i-1])){
                    System.out.println("Duplicate string - Error!"); continue; 
                }
            }
            pre[i]=Character.toString(strings[i].charAt(0)); 
            checkPrefix(strings, pre, pre[i], i);
        } 
        System.out.println(Arrays.toString(strings));
        System.out.println(Arrays.toString(pre));
    }

ShortestUniquePrefix.java

private static String randString(int len) {
Random r = new Random();
StringBuilder ret = new StringBuilder(len);
while (len-- > 0) {
ret.append((char) (r.nextInt(26) + 'a'));
}
return ret.toString();
}

public static String findShortestPrefix(String s, Set<String> D) {
// Builds a trie according to given dictionary D.
Trie T = new Trie();
for (String word : D) {
T.insert(word);
}
return T.getShortestUniquePrefix(s);
}

public static class Trie {
private static class TrieNode {
private boolean isString = false;
private Map<Character, TrieNode> leaves = new HashMap<>();

public boolean getIsString() {
return isString;
}

public void setIsString(boolean string) {
isString = string;
}

public Map<Character, TrieNode> getLeaves() {
return leaves;
}
}

private TrieNode root = new TrieNode();

public boolean insert(String s) {
TrieNode p = root;
for (char c : s.toCharArray()) {
if (!p.getLeaves().containsKey(c)) {
p.getLeaves().put(c, new TrieNode());
}
p = p.getLeaves().get(c);
}

// s already existed in this trie.
if (p.getIsString()) {
return false;
} else { // p.getIsString() == false
p.setIsString(true); // Inserts s into this trie.
return true;
}
}

public String getShortestUniquePrefix(String s) {
TrieNode p = root;
StringBuilder prefix = new StringBuilder();
for (char c : s.toCharArray()) {
prefix.append(c);
if (!p.getLeaves().containsKey(c)) {
return prefix.toString();
}
p = p.getLeaves().get(c);
}
return "";
}
}

Find the shortest unique prefix - EPI

Labels

Popular Posts