Massive Algorithms: Group Contacts

Group Contacts | tech::interview

Group Contacts | tech::interview
* Given a lot of accounts registered under facebook, each user can have multiple
* email addresses registered under the same account.
* Example, John has john@gmail.com and john@yahoo.com registered under user john.
* Somehow people tend to have multiple accounts with different username but same
* email addresses.
* Find a way to group those accounts.

有这么一个class Contact，里面有一个string的name，和一个vector 装着email address，是这个Contact有的address，用一个list装着是因为一个人有可能有多个email，现在给你vector，比如
{
{ "John", {"john@gmail.com"} },
{ "Mary", {"mary@gmail.com"} },
{ "John", {"john@yahoo.com"} },
{ "John", {"john@gmail.com", "john@yahoo.com", "john@hotmail.com"} },
{ "Bob", {"bob@gmail.com"} }
}

现在我们知道如果email address相同的话，那么就说明是同一个人，现在要做的是根据这些email address，把同一个contact给group起来，生成一个vector<vector<Contact>>

http://www.cnblogs.com/EdwardLiu/p/6569384.html

有一些账号，账号里面有一个或多个email， 如果两个账号有共同的email，则认为这两个账号是同一个人，找出哪些账号是同一个人
输入是这样的:数字是用户，字母是邮箱，有很多人有多个邮箱，找出相同的用户
  1- {x,y,z}
  2-{x} 
  3-{a,b}
  4-{y, z}
  5-{b}
  6-{m}
  7-{t,b}

账号之间如果有公共email就连边，我觉得要用hashMap, 存（email, user） pair

如果当前user的某个email在hashMap里出现过，当时是user1, 就看user和user1是否connected，否的话就要union,根据Union Find

Union Find Based on Quick Union

 3 class UnionFind{
 4     int[] ids;
 5     int count;
 6     public UnionFind(int num) {
 7         ids = new int[num];
 8         Arrays.fill(ids, -1);
 9         count = 0;
10     }
11     
12     public int find(int id) {
13         while (id != ids[id]) id = ids[id];
14         return id;
15     }
16     
17     public void union(int n1, int n2) {
18         int root1 = find(n1);
19         int root2 = find(n2);
20         ids[root2] = root1;
21     }
22     
23     public boolean isConnected(int n1, int n2) {
24         return find(n1) == find(n2);
25     }
26 }
27 
28 public class EmailUser {
29     public int commonUser(HashMap<Integer, List<Character>> map) {
30         int size = map.size();
31         UnionFind uf = new UnionFind(10);
32         HashMap<Character, Integer> emailToUser = new HashMap<Character, Integer>();
33         for (int user : map.keySet()) {
34             uf.ids[user] = user;
35             uf.count++;
36             List<Character> emailList = map.get(user);
37             for (Character eachEmail : emailList) {
38                 if (!emailToUser.containsKey(eachEmail)) {
39                     emailToUser.put(eachEmail, user);
40                 }
41                 else {
42                     int oriUser = emailToUser.get(eachEmail);
43                     if (!uf.isConnected(user, oriUser)) {
44                         uf.union(oriUser, user);
45                         uf.count--;
46                     }
47                 }
48             }
49         }
50         for (int elem : uf.ids)
51             System.out.print(elem);
52         return uf.count;
53     }

https://robinliu.gitbooks.io/leetcode/content/union_find.html

class Contact {
public:
    string name;
    vector<string> emails;
};
class UnionFind {
    vector<int> father, ranks;
public:
    UnionFind(int num_nodes) {
        for (int i = 0; i < num_nodes; i++) {
            father.push_back(i);
            ranks.push_back(0);
        }
    }
    int find(int x) {
        if (x == father[x]) return x;
        return father[x] = find(father[x]);
    }
    void do_union(int x, int y) {
        x = find(x);
        y = find(y);
        if (x == y) return;
        if (ranks[x] < ranks[y]) {
            father[x] = y;
        } else {
            father[y] = x;
            if (ranks[x] == ranks[y]) {
                ranks[x]++;
            }
        }
    }
};
vector<vector<Contact>> group_contacts(vector<Contact>& input) {
    unordered_map<string,vector<int>> email_record;

    int n = (int)input.size();
    for (int k = 0; k < input.size(); k++) {
        for (auto email : input[k].emails) {
            email_record[email].push_back(k);
        }
    }
    UnionFind uf(n);
    for (auto p : email_record) {
        for (int i = 0; i < p.second.size() - 1; i++) {
            uf.do_union(p.second[i], p.second[i + 1]);
        }
    }
    unordered_map<int,vector<int>> groups;
    for (int i = 0; i < n; i++) groups[uf.find(i)].push_back(i);

    vector<vector<Contact>> ret;
    for (auto& p : groups) {
        vector<Contact> vs;
        for (auto& c : p.second) vs.push_back(input[c]);
        ret.push_back(vs);
    }
    return ret;
}

http://www.geeksforgeeks.org/find-same-contacts-in-a-list-of-contacts/

Given a list of contacts containing username, email and phone number in any order. Identify the same contacts (i.e., same person having many different contacts) and output the same contacts together.

Notes:
1) A contact can store its three fields in any order, i.e., phone number can appear before username or username can appear before phone number.

2) Two contacts are same if they have either same username or email or phone number.

O(N^2)The idea is to first create a graph of contacts using given array. In the graph, there is an edge between vertex i to vertex j if they both have either same username or same email or same phone number. Once the graph is constructed, the task reduces to finding connected components in an undirected graph. We can find connected components either by doing DFS or BFS starting from every unvisited vertex.

// Structure for storing contact details.

struct contact

{

    string field1, field2, field3;

};

// A utility function to fill entries in adjacency matrix

// representation of graph

void buildGraph(contact arr[], int n, int *mat[])

{

    // Initialize the adjacency matrix

    for (int i=0; i<n; i++)

       for (int j=0; j<n; j++)

           mat[i][j] = 0;

    // Traverse through all contacts

    for (int i = 0; i < n; i++) {

        // Add mat from i to j and vice versa, if possible.

        // Since length of each contact field is at max some

        // constant. (say 30) so body execution of this for

        // loop takes constant time.

        for (int j = i+1; j < n; j++)

            if (arr[i].field1 == arr[j].field1 ||

                arr[i].field1 == arr[j].field2 ||

                arr[i].field1 == arr[j].field3 ||

                arr[i].field2 == arr[j].field1 ||

                arr[i].field2 == arr[j].field2 ||

                arr[i].field2 == arr[j].field3 ||

                arr[i].field3 == arr[j].field1 ||

                arr[i].field3 == arr[j].field2 ||

                arr[i].field3 == arr[j].field3)

{

                mat[i][j] = 1;

                mat[j][i] = 1;

                break;

}

}

}

// A recuesive function to perform DFS with vertex i as source

void DFSvisit(int i, int *mat[], bool visited[], vector<int>& sol, int n)

{

    visited[i] = true;

    sol.push_back(i);

    for (int j = 0; j < n; j++)

        if (mat[i][j] && !visited[j])

            DFSvisit(j, mat, visited, sol, n);

}

// Finds similar contacrs in an array of contacts

void findSameContacts(contact arr[], int n)

{

    // vector for storing the solution

    vector<int> sol;

    // Declare 2D adjaceny matrix for mats

    int **mat = new int*[n];

    for (int i = 0; i < n; i++)

        mat[i] = new int[n];

    // visited array to keep track of visited nodes

    bool visited[n];

    memset(visited, 0, sizeof(visited));

    // Fill adjacency matrix

    buildGraph(arr, n, mat);

    // Since, we made a graph with contacts as nodes with fields as links.

    // two nodes are linked if they represent the same person.

    // so, total number of connected components and nodes in each component

    // will be our answer.

    for (int i = 0; i < n; i++)

{

        if (!visited[i])

{

            DFSvisit(i, mat, visited, sol, n);

            // Add delimeter to separate nodes of one component from other.

            sol.push_back(-1);

}

}

    // Print the solution

    for (int i = 0; i < sol.size(); i++)

        if (sol[i] == -1) cout << endl;

        else cout << sol[i] << " ";

}

http://ideone.com/7vKTLx
class Contact {
public:
string name;
vector<string> emails;
};
class UnionFind {
vector<int> father, ranks;
public:
UnionFind(int num_nodes) {
for (int i = 0; i < num_nodes; i++) {
father.push_back(i);
ranks.push_back(0);
}
}
int find(int x) {
if (x == father[x]) return x;
return father[x] = find(father[x]);
}
void do_union(int x, int y) {
x = find(x);
y = find(y);
if (x == y) return;
if (ranks[x] < ranks[y]) {
father[x] = y;
} else {
father[y] = x;
if (ranks[x] == ranks[y]) {
ranks[x]++;
}
}
}
};
vector<vector<Contact>> group_contacts(vector<Contact>& input) {
unordered_map<string,vector<int>> email_record;

int n = (int)input.size();
for (int k = 0; k < input.size(); k++) {
for (auto email : input[k].emails) {
email_record[email].push_back(k);
}
}
UnionFind uf(n);
for (auto p : email_record) {
for (int i = 0; i < p.second.size() - 1; i++) {
uf.do_union(p.second[i], p.second[i + 1]);
}
}
unordered_map<int,vector<int>> groups;
for (int i = 0; i < n; i++) groups[uf.find(i)].push_back(i);

vector<vector<Contact>> ret;
for (auto& p : groups) {
vector<Contact> vs;
for (auto& c : p.second) vs.push_back(input[c]);
ret.push_back(vs);
}
return ret;
}
https://github.com/monkeylyf/interviewjam/blob/master/graph/facebook_Group_Users_With_Same_Email_Address.java
public ArrayList<ArrayList<User>> groupUserWithSameEmail(ArrayList<User> input) {
ArrayList<ArrayList<User>> groupedUser = new ArrayList<ArrayList<User>>();
System.out.println(input);
// Create node with one name and one email address.
ArrayList<User> nodes = new ArrayList<User>();

int index = 0;

// Map index of nodes to original input.
HashMap<Integer, Integer> nodeIdxToOriginal = new HashMap<Integer, Integer>();
for (int i = 0; i < input.size(); ++i) {
User user = input.get(i);
for (String email : user.emails) {
nodes.add(new User(user.name, new String[] { email }));
nodeIdxToOriginal.put(index++, i);
}
}

int n = nodes.size();
boolean[][] adjacencyMatrix = new boolean[n][n];
HashMap<String, Integer> emailToNodeIdx = new HashMap<String, Integer>();

int i = 0;

// Create adjacency matrix to represent the graph.
for (User user : input) {
int startIdx = i;
int endIdx = i + user.emails.length - 1;

// Connecting nodes with the email address seen before.
for (String email : user.emails) {
if (emailToNodeIdx.containsKey(email)) {
int idx = emailToNodeIdx.get(email);
adjacencyMatrix[idx][i] = true;
adjacencyMatrix[i][idx] = true;
} else {
emailToNodeIdx.put(email, i);
}
++i;
}

// Connecting all email registered under same user name.
for (int j = startIdx + 1; j <= endIdx; ++j) {
// System.out.println("Connecting " + startIdx + " " + j);
adjacencyMatrix[startIdx][j] = true;
adjacencyMatrix[j][startIdx] = true;
}
i = endIdx + 1;
}

// printMatrix(adjacencyMatrix);

// Mapping groupId to user in original input.
HashSet<Integer> used = new HashSet<Integer>();

for (ArrayList<Integer> arr : dfs(adjacencyMatrix, n, nodes)) {
ArrayList<User> group = new ArrayList<User>();
for (int idx : arr) {
int originalId = nodeIdxToOriginal.get(idx);
if (!used.contains(originalId)) {
// If a user has not been added to the group, add it.
group.add(input.get(originalId));
used.add(originalId);
}
}
groupedUser.add(group);
}

return groupedUser;
}

/* DFS logic.*/
public ArrayList<ArrayList<Integer>> dfs(boolean[][] adjacencyMatrix, int n,
ArrayList<User> nodes) {
ArrayList<ArrayList<Integer>> groupedUserID = new ArrayList<ArrayList<Integer>>();

boolean[] visited = new boolean[n];

for (int i = 0; i < n; ++i) {
if (!visited[i]) {
ArrayList<Integer> acc = new ArrayList<Integer>();
dfsUtil(i, adjacencyMatrix, visited, nodes, acc);
groupedUserID.add(acc);
}
}

return groupedUserID;
}

private void dfsUtil(int i, boolean[][] adjacencyMatrix, boolean[] visited,
ArrayList<User> nodes, ArrayList<Integer> acc) {
visited[i] = true;
acc.add(i);
for (int next = 0; next < visited.length; ++next) {
if (next != i && !visited[next] && adjacencyMatrix[i][next]) {
dfsUtil(next, adjacencyMatrix, visited, nodes, acc);
}
}
}

class User {

String name;
String[] emails;

User(String name, String[] emails) {
this.name = name;
this.emails = emails;
}

public String toString() {
StringBuilder sb = new StringBuilder(this.name);
sb.append(":");
for (String email : this.emails) {
sb.append(" ");
sb.append(email);
}

return sb.toString();
}
}

X. Extended
https://warmland.gitbooks.io/algorithm/content/interview/snapchat-_group_record_using_union-find.html

// 题目是手机上的通讯录，每条记录只有(name, number)这种pair,有些记录名字重复，有些记录号码重复，让我返回一个list>， // 将所有记录按人分组。比较tricky的点在于(ABC,123), (ABC, 456), (BCD, 456) // 三条记录，第一条和第三条也属于同一个人。要求时间复杂度尽量小

uf算法我是明白的，但是我想的时候的困惑在于

1.ufMap里面存放什么，是map还是map还是别的什么

2.在group的时候，既要对name进行uf group又要对number进行group怎么操作

解决方案是这样的（既union-find部分的思路）

1.首先ufMap里面存<Record, Record>，初始化把所有entry过一遍，都存入，使它最初的root节点都是它自己

2. find部分。

　　1）首先找到这个record的root节点

　　2）然后把从这个record开始直到root节点所有的节点的root节点都更新成root

　　3）返回root

3. union部分，没什么特别的

　　1）对record1,record2找到root1,root2

　　2）如果root1 != root2，那么就设置其中一个是另外一个的root节点

完成union-find的部分，整个算法的整体思路是。

1.建立一个nameMap<nameString, Record>和一个numMap<number, Record>

2.对于所有entry走一遍，

　　1) 如果nameMap里面已经存在当前entry的name，那么就说明要union；否则就把[nameString, entry]加入map

　　2）如果numMap里面已经存在当前entry的number，那么就说明要union；否则就把[number, entry]加入map
3. 所以此刻ufMap里面已经有已经对于name和number group之后的结果，现在就需要把结果保存下来。

　　所以可以先建一个Map<String, List<Record>>整理好，再根据这个map得到List<List<Record>>

实际代码如下：

public class GroupRecord {
    static class Record {
        String name;
        int number;
        public Record(String name, int number) {
            this.name = name;
            this.number = number;
        }

        public String toString() {
            return "(" + name + "," + number + ")";
        }
    }

    public static List<List<Record>> group(List<Record> records) {
        Map<Record, Record> ufMap = new HashMap<Record, Record>();
        Map<String, Record> nameMap = new HashMap<>();
        Map<Integer, Record> numMap = new HashMap<>();
        for(Record entry: records) {
            ufMap.put(entry, entry);
        }
        for(Record entry: records) {
            if(nameMap.containsKey(entry.name)) {
                union(ufMap, entry, nameMap.get(entry.name));
            } else {
                nameMap.put(entry.name, entry);
            }
            if(numMap.containsKey(entry.number)) {
                union(ufMap, entry, numMap.get(entry.number));
            } else {
                numMap.put(entry.number, entry);
            }
        }
        Map<String, List<Record>> groupMap = new HashMap<>();
        for(Record entry: records) {
            Record root = find(ufMap, entry);
            List<Record> list = groupMap.get(root.name);
            if(list == null) {
                list = new ArrayList<Record>();
            }
            list.add(entry);
            groupMap.put(root.name, list);
        }
        List<List<Record>> res = new ArrayList<List<Record>>(groupMap.values());
        return res;
    }

    private static Record find(Map<Record, Record> ufMap, Record record) {
        Record root = record;
        while(!ufMap.get(root).equals(root)) {
            root = ufMap.get(root);
        }
        Record cur = record;
        while(!cur.equals(root)) {
            ufMap.put(record, root);
            cur = ufMap.get(cur);
        }
        return root;
    }

    private static void union(Map<Record, Record> ufMap, Record r1, Record r2) {
        Record root1 = find(ufMap, r1);
        Record root2 = find(ufMap, r2);
        if(!root1.equals(root2)) {
            ufMap.put(root1, root2);
        }
    }

    public static void main(String[] args) {
        List<List<Record>> res = new ArrayList<List<Record>>();
        ArrayList<Record> records = new ArrayList<>();
        records.add(new Record("ABC",123));
        records.add(new Record("ABC",456));
        records.add(new Record("BCD",456));

        records.add(new Record("AD",342));
        records.add(new Record("AddD",11));
        records.add(new Record("DFX",34));
        records.add(new Record("AD",34));
        records.add(new Record("AD",11));

        res = group(records);
        System.out.println(res);
    }
}

Group Contacts | tech::interview

Labels

Popular Posts