https://leetcode.com/problems/encode-and-decode-tinyurl/
https://mikecoder.github.io/oj-code/2017/03/04/EncodeandDecodeTinyURL/
短链接的维护,但不限制内部如何生成短链接。就是Hash表的问题,我直接使用的
https://discuss.leetcode.com/topic/81633/easy-solution-in-java-5-line-code
https://discuss.leetcode.com/topic/81620/maybe-base64
Your solution, while terse, totally misses the point of the question. Encoding is valuable as long as you can send back a "shorter" URL; your method seems to generate much longer URLs than the supplied input.
X. http://blog.csdn.net/jmspan/article/details/51749521
http://www.jianshu.com/p/4b39053a7a24
https://github.com/zxqiu/leetcode-lintcode/blob/master/system%20design/Consistent_Hashing_II.java
public class Solution {
static int[] hashToMachine;
static int shardsPerMachine;
static int maxShards;
static int availableShards;
// @param n a positive integer
// @param k a positive integer
// @return a Solution object
public static Solution create(int n, int k) {
Solution solution = new Solution();
solution.maxShards = n;
solution.shardsPerMachine = k;
solution.availableShards = n;
solution.hashToMachine = new int[n];
Arrays.fill(hashToMachine, -1);
return solution;
}
// @param machine_id an integer
// @return a list of shard ids
public List<Integer> addMachine(int machine_id) {
if (shardsPerMachine > availableShards) {
return new ArrayList<Integer>();
}
List<Integer> shards = randomNumber();
for (Integer i : shards) {
hashToMachine[i] = machine_id;
}
availableShards -= shardsPerMachine;
Collections.sort(shards);
return shards;
}
// @param hashcode an integer
// @return a machine id
public int getMachineIdByHashCode(int hashcode) {
int ret = hashcode % maxShards;
while (hashToMachine[ret] == -1) {
ret = (ret + 1) % maxShards;
}
return hashToMachine[ret];
}
private static List<Integer> randomNumber() {
Set<Integer> dupCheck = new HashSet<Integer>();
List<Integer> ret = new ArrayList<Integer>();
Random r = new Random();
while (ret.size() < shardsPerMachine) {
int candidate = r.nextInt(maxShards);
if (dupCheck.contains(candidate) || hashToMachine[candidate] != -1) {
continue;
}
ret.add(candidate);
dupCheck.add(candidate);
}
return ret;
}
TinyURL is a URL shortening service where you enter a URL such as
https://leetcode.com/problems/design-tinyurl
and it returns a short URL such as http://tinyurl.com/4e9iAk
.
Design the
X.encode
and decode
methods for the TinyURL service. There is no restriction on how your encode/decode algorithm should work. You just need to ensure that a URL can be encoded to a tiny URL and the tiny URL can be decoded to the original URL.https://mikecoder.github.io/oj-code/2017/03/04/EncodeandDecodeTinyURL/
map<string, string> urls;
string hash(string url) {
long long hash = 0;
for (int i = 0; i < (int)url.length(); i++) {
hash = hash * 10 + url[i];
}
return to_string(hash);
}
string encode(string longUrl) {
string key = hash(longUrl);
urls.insert(pair<string, string>(key, longUrl));
return key;
}
string decode(string shortUrl) {
return urls.find(shortUrl)->second;
}
http://xiadong.info/2017/03/leetcode-535-encode-and-decode-tinyurl/短链接的维护,但不限制内部如何生成短链接。就是Hash表的问题,我直接使用的
unordered_map
容器所提供的Hash函数对原链接进行处理,得到一个数值,然后将该数值转换为62进制字符串(10个数字+大小写字母各26个),该字符串作为短链接的后缀部分。
class Solution {
string tinyUrlPrefix = "http://tinyurl.com/";
unordered_map<string, string> urls;
public:
// Encodes a URL to a shortened URL.
string encode(string longUrl) {
auto hashFunc = urls.hash_function();
size_t key = hashFunc(longUrl);
string shortUrl = tinyUrlPrefix + convertToSixtyTwoBase(key);
urls[shortUrl] = longUrl;
return shortUrl;
}
// Decodes a shortened URL to its original URL.
string decode(string shortUrl) {
return urls[shortUrl];
}
string convertToSixtyTwoBase (size_t key) {
string str;
while (key > 0) {
int mod = key % 62;
if (mod < 10) str.push_back(mod + '0');
else if (mod < 36) str.push_back(mod - 10 + 'a');
else str.push_back(mod - 36 + 'A');
key /= 62;
}
return str;
}
};
https://discuss.leetcode.com/topic/82273/three-different-approaches-in-java
Approach 1- Using simple counter
public class Codec {
Map<Integer, String> map = new HashMap<>();
int i=0;
public String encode(String longUrl) {
map.put(i,longUrl);
return "http://tinyurl.com/"+i++;
}
public String decode(String shortUrl) {
return map.get(Integer.parseInt(shortUrl.replace("http://tinyurl.com/", "")));
}
}
https://discuss.leetcode.com/topic/81637/two-solutions-and-thoughtshttps://discuss.leetcode.com/topic/81633/easy-solution-in-java-5-line-code
My first solution produces short URLs like
http://tinyurl.com/0
, http://tinyurl.com/1
, etc, in that order.class Codec:
def __init__(self):
self.urls = []
def encode(self, longUrl):
self.urls.append(longUrl)
return 'http://tinyurl.com/' + str(len(self.urls) - 1)
def decode(self, shortUrl):
return self.urls[int(shortUrl.split('/')[-1])]
Using increasing numbers as codes like that is simple but has some disadvantages, which the below solution fixes:
- If I'm asked to encode the same long URL several times, it will get several entries. That wastes codes and memory.
- People can find out how many URLs have already been encoded. Not sure I want them to know.
- People might try to get special numbers by spamming me with repeated requests shortly before their desired number comes up.
- Only using digits means the codes can grow unnecessarily large. Only offers a million codes with length 6 (or smaller). Using six digits or lower or upper case letters would offer (10+26*2)6 = 56,800,235,584 codes with length 6.
The following solution doesn't have these problems. It produces short URLs like
http://tinyurl.com/KtLa2U
, using a random code of six digits or letters. If a long URL is already known, the existing short URL is used and no new entry is generated.class Codec:
alphabet = string.ascii_letters + '0123456789'
def __init__(self):
self.url2code = {}
self.code2url = {}
def encode(self, longUrl):
while longUrl not in self.url2code:
code = ''.join(random.choice(Codec.alphabet) for _ in range(6))
if code not in self.code2url:
self.code2url[code] = longUrl
self.url2code[longUrl] = code
return 'http://tinyurl.com/' + self.url2code[longUrl]
def decode(self, shortUrl):
return self.code2url[shortUrl[-6:]]
It's possible that a randomly generated code has already been generated before. In that case, another random code is generated instead. Repeat until we have a code that's not already in use. How long can this take? Well, even if we get up to using half of the code space, which is a whopping 626/2 = 28,400,117,792 entries, then each code has a 50% chance of not having appeared yet. So the expected/average number of attempts is 2, and for example only one in a billion URLs takes more than 30 attempts. And if we ever get to an even larger number of entries and this does become a problem, then we can just use length 7. We'd need to anyway, as we'd be running out of available codes.
Map<String, String> index = new HashMap<String, String>();
Map<String, String> revIndex = new HashMap<String, String>();
static String BASE_HOST = "http://tinyurl.com/";
// Encodes a URL to a shortened URL.
public String encode(String longUrl) {
if (revIndex.containsKey(longUrl)) return BASE_HOST + revIndex.get(longUrl);
String charSet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
String key = null;
do {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < 6; i++) {
int r = (int) (Math.random() * charSet.length());
sb.append(charSet.charAt(r));
}
key = sb.toString();
} while (index.containsKey(key));
index.put(key, longUrl);
revIndex.put(longUrl, key);
return BASE_HOST + key;
}
// Decodes a shortened URL to its original URL.
public String decode(String shortUrl) {
return index.get(shortUrl.replace(BASE_HOST, ""));
}
https://discuss.leetcode.com/topic/81620/maybe-base64
Your solution, while terse, totally misses the point of the question. Encoding is valuable as long as you can send back a "shorter" URL; your method seems to generate much longer URLs than the supplied input.
import java.nio.charset.StandardCharsets;
import java.util.Base64;
public class Codec {
public String encode(String longUrl) {
return Base64.getUrlEncoder().encodeToString(longUrl.getBytes(StandardCharsets.UTF_8));
}
public String decode(String shortUrl) {
return new String(Base64.getUrlDecoder().decode(shortUrl));
}
}
X. http://blog.csdn.net/jmspan/article/details/51740066
一般的数据库进行horizontal shard的方法是指,把 id 对 数据库服务器总数 n 取模,然后来得到他在哪台机器上。这种方法的缺点是,当数据继续增加,我们需要增加数据库服务器,将 n 变为 n+1 时,几乎所有的数据都要移动,这就造成了不 consistent。为了减少这种 naive 的 hash方法(%n) 带来的缺陷,出现了一种新的hash算法:一致性哈希的算法——Consistent Hashing。这种算法有很多种实现方式,这里我们来实现一种简单的 Consistent Hashing。
- 将 id 对 360 取模,假如一开始有3台机器,那么让3台机器分别负责0~119, 120~239, 240~359 的三个部分。那么模出来是多少,查一下在哪个区间,就去哪台机器。
- 当机器从 n 台变为 n+1 台了以后,我们从n个区间中,找到最大的一个区间,然后一分为二,把一半给第n+1台机器。
- 比如从3台变4台的时候,我们找到了第3个区间0~119是当前最大的一个区间,那么我们把0~119分为0~59和60~119两个部分。0~59仍然给第1台机器,60~119给第4台机器。
- 然后接着从4台变5台,我们找到最大的区间是第3个区间120~239,一分为二之后,变为 120~179, 180~239。
假设一开始所有的数据都在一台机器上,请问加到第 n 台机器的时候,区间的分布情况和对应的机器编号分别是多少?
Notice
你可以假设 n <= 360. 同时我们约定,当最大区间出现多个时,我们拆分编号较小的那台机器。
比如0~119, 120~239区间的大小都是120,但是前一台机器的编号是1,后一台机器的编号是2, 所以我们拆分0~119这个区间。
比如0~119, 120~239区间的大小都是120,但是前一台机器的编号是1,后一台机器的编号是2, 所以我们拆分0~119这个区间。
Clarification
If the maximal interval is [x, y], and it belongs to machine id z, when you add a new machine with id n, you should divide [x, y, z] into two intervals:
[x, (x + y) / 2, z]
and [(x + y) / 2 + 1, y, n]
Example
for n =
1
, return[
[0,359,1]
]
represent 0~359 belongs to machine 1.
for n =
2
, return[
[0,179,1],
[180,359,2]
]
for n =
3
, return[
[0,89,1]
[90,179,3],
[180,359,2]
]
for n =
4
, return[
[0,89,1],
[90,179,3],
[180,269,2],
[270,359,4]
]
for n =
5
, return[
[0,44,1],
[45,89,5],
[90,179,3],
[180,269,2],
[270,359,4]
]
方法:使用堆来维护区间。
- public List<List<Integer>> consistentHashing(int n) {
- // Write your code here
- PriorityQueue<Range> heap = new PriorityQueue<>(16,
- new Comparator<Range>() {
- @Override
- public int compare(Range r1, Range r2) {
- if (r1.to - r1.from > r2.to - r2.from) return -1;
- if (r1.to - r1.from < r2.to - r2.from) return 1;
- return r1.id - r2.id;
- }
- }
- );
- heap.offer(new Range(1, 0, 359));
- for(int i = 2; i <= n; i++) {
- Range range = heap.poll();
- Range range1 = new Range(range.id, range.from, (range.from+range.to)/2);
- Range range2 = new Range(i, (range.from+range.to)/2+1, range.to);
- heap.offer(range1);
- heap.offer(range2);
- }
- Range[] ranges = heap.toArray(new Range[0]);
- List<List<Integer>> results = new ArrayList<>(ranges.length);
- for(int i = 0; i < ranges.length; i++) {
- List<Integer> result = new ArrayList<>(3);
- result.add(ranges[i].from);
- result.add(ranges[i].to);
- result.add(ranges[i].id);
- results.add(result);
- }
- return results;
- }
- class Range {
- int id;
- int from, to;
- Range(int id, int from, int to) {
- this.id = id;
- this.from = from;
- this.to = to;
- }
vector<vector<int>> consistentHashing(int n) {
// Write your code here
vector<vector<int>> ret;
vector<int> range;
range.push_back(0);
range.push_back(359);
range.push_back(1);
ret.push_back(range);
for(int i = 2; i <= n; i++) {
int maxRange = INT_MIN;
int index = -1;
for(int j = 0; j < ret.size(); j++) {
if (maxRange < ret[j][1] - ret[j][0]) {
maxRange = ret[j][1] - ret[j][0];
index = j;
}
}
int mid = (ret[index][1] + ret[index][0]) / 2;
int end = ret[index][1];
ret[index][1] = mid;
range[0] = mid + 1;
range[1] = end;
range[2] = i;
ret.push_back(range);
}
return ret;
}
X. http://blog.csdn.net/jmspan/article/details/51749521
在 Consistent Hashing I 中我们介绍了一个比较简单的一致性哈希算法,这个简单的版本有两个缺陷:
- 增加一台机器之后,数据全部从其中一台机器过来,这一台机器的读负载过大,对正常的服务会造成影响。
- 当增加到3台机器的时候,每台服务器的负载量不均衡,为1:1:2。
为了解决这个问题,引入了 micro-shards 的概念,一个更好的算法是这样:
- 将 360° 的区间分得更细。从 0~359 变为一个 0 ~ n-1 的区间,将这个区间首尾相接,连成一个圆。
- 当加入一台新的机器的时候,随机选择在圆周中撒 k 个点,代表这台机器的 k 个 micro-shards。
- 每个数据在圆周上也对应一个点,这个点通过一个 hash function 来计算。
- 一个数据该属于那台机器负责管理,是按照该数据对应的圆周上的点在圆上顺时针碰到的第一个 micro-shard 点所属的机器来决定。
n 和 k在真实的 NoSQL 数据库中一般是 2^64 和 1000。
请实现这种引入了 micro-shard 的 consistent hashing 的方法。主要实现如下的三个函数:
- create(int n, int k)
- addMachine(int machine_id) // add a new machine, return a list of shard ids.
- getMachineIdByHashCode(int hashcode) // return machine id
Notice
当 n 为 2^64 时,在这个区间内随机基本不会出现重复。
但是为了方便测试您程序的正确性,n 在数据中可能会比较小,所以你必须保证你生成的 k 个随机数不会出现重复。
LintCode并不会判断你addMachine的返回结果的正确性(因为是随机数),只会根据您返回的addMachine的结果判断你getMachineIdByHashCode结果的正确性。
但是为了方便测试您程序的正确性,n 在数据中可能会比较小,所以你必须保证你生成的 k 个随机数不会出现重复。
LintCode并不会判断你addMachine的返回结果的正确性(因为是随机数),只会根据您返回的addMachine的结果判断你getMachineIdByHashCode结果的正确性。
create(100, 3)
addMachine(1)
>> [3, 41, 90] => 三个随机数
getMachineIdByHashCode(4)
>> 1
addMachine(2)
>> [11, 55, 83]
getMachineIdByHashCode(61)
>> 2
getMachineIdByHashCode(91)
>> 1
- private TreeMap<Integer, Integer> tm = new TreeMap<>();
- private int[] nums;
- private int size = 0;
- private int k;
- public Solution(int n, int k) {
- this.nums = new int[n];
- for(int i = 0; i < n; i++) this.nums[i] = i;
- Random random = new Random();
- for(int i = 0; i < n; i++) {
- int j = random.nextInt(i + 1);
- int t = nums[i];
- nums[i] = nums[j];
- nums[j] = t;
- }
- this.k = k;
- }
- // @param n a positive integer
- // @param k a positive integer
- // @return a Solution object
- public static Solution create(int n, int k) {
- // Write your code here
- return new Solution(n, k);
- }
- // @param machine_id an integer
- // @return a list of shard ids
- public List<Integer> addMachine(int machine_id) {
- // Write your code here
- List<Integer> ids = new ArrayList<>();
- for(int i = 0; i < this.k; i++) {
- int id = this.nums[size++ % this.nums.length];
- ids.add(id);
- this.tm.put(id, machine_id);
- }
- return ids;
- }
- // @param hashcode an integer
- // @return a machine id
- public int getMachineIdByHashCode(int hashcode) {
- // Write your code here
- if (tm.isEmpty()) return 0;
- Integer ceiling = tm.ceilingKey(hashcode);
- if (ceiling != null) return tm.get(ceiling);
- return tm.get(tm.firstKey());
- }
http://www.jianshu.com/p/4b39053a7a24
https://github.com/zxqiu/leetcode-lintcode/blob/master/system%20design/Consistent_Hashing_II.java
public class Solution {
static int[] hashToMachine;
static int shardsPerMachine;
static int maxShards;
static int availableShards;
// @param n a positive integer
// @param k a positive integer
// @return a Solution object
public static Solution create(int n, int k) {
Solution solution = new Solution();
solution.maxShards = n;
solution.shardsPerMachine = k;
solution.availableShards = n;
solution.hashToMachine = new int[n];
Arrays.fill(hashToMachine, -1);
return solution;
}
// @param machine_id an integer
// @return a list of shard ids
public List<Integer> addMachine(int machine_id) {
if (shardsPerMachine > availableShards) {
return new ArrayList<Integer>();
}
List<Integer> shards = randomNumber();
for (Integer i : shards) {
hashToMachine[i] = machine_id;
}
availableShards -= shardsPerMachine;
Collections.sort(shards);
return shards;
}
// @param hashcode an integer
// @return a machine id
public int getMachineIdByHashCode(int hashcode) {
int ret = hashcode % maxShards;
while (hashToMachine[ret] == -1) {
ret = (ret + 1) % maxShards;
}
return hashToMachine[ret];
}
private static List<Integer> randomNumber() {
Set<Integer> dupCheck = new HashSet<Integer>();
List<Integer> ret = new ArrayList<Integer>();
Random r = new Random();
while (ret.size() < shardsPerMachine) {
int candidate = r.nextInt(maxShards);
if (dupCheck.contains(candidate) || hashToMachine[candidate] != -1) {
continue;
}
ret.add(candidate);
dupCheck.add(candidate);
}
return ret;
}