### LeetCode 471 - Encode String with Shortest Length

Related: LeetCode 394 - Decode String
Given a non-empty string, encode the string such that its encoded length is the shortest.
The encoding rule is: `k[encoded_string]`, where the encoded_string inside the square brackets is being repeated exactly k times.
Note:
1. k will be a positive integer and encoded string will not be empty or have extra space.
2. You may assume that the input string contains only lowercase English letters. The string's length is at most 160.
3. If an encoding process does not make the string shorter, then do not encode it. If there are several solutions, return any of them is fine.
Example 1:
```Input: "aaa"
Output: "aaa"
Explanation: There is no way to encode it such that it is shorter than the input string, so we do not encode it.
```
Example 2:
```Input: "aaaaa"
Output: "5[a]"
Explanation: "5[a]" is shorter than "aaaaa" by 1 character.
```
Example 3:
```Input: "aaaaaaaaaa"
Output: "10[a]"
Explanation: "a9[a]" or "9[a]a" are also valid solutions, both of them have the same length = 5, which is the same as "10[a]".
```
Example 4:
```Input: "aabcaabcd"
Output: "2[aabc]d"
Explanation: "aabc" occurs twice, so one answer can be "2[aabc]d".
```
Example 5:
```Input: "abbbabbbcabbbabbbc"
Output: "2[2[abbb]c]"
Explanation: "abbbabbbc" occurs twice, but "abbbabbbc" can also be encoded to "2[abbb]c", so one answer can be "2[2[abbb]c]".```
https://chenboxie.wordpress.com/2018/07/08/leetcode-471-encode-string-with-shortest-length/

• dp[i][j] = min(dp[i][k] + dp[k + 1][j]), where 1 <= k < j, ‘+’ means concatenate here
• dp[i][j] = min(dp[i][j], compress(s[i:j])), compress is a function to find if we can shorten input string by find a repeating pattern in itself

2. 但仅仅使用第一个递推是不够的，例如这个例子，abcabcabc, 用第一个递推公式可以得到, dp(abcabcabc) = dp(abcabc) + dp(abc) = 2[abc] + abc = 2[abc]abc，永远无法得到3[abc]

3. 假设自身的字符串为sub, 在对自身进行压缩的过程中，如果自身是符合压缩条件的，压缩必须写成：
sub = sub.length() / index + “[” + dp[i][i + index – 1] + “]”;

sub = “abcdbcd abcdbcd”，不能写成2[abcdbcd], 而是写成2[a2[bcd]]

1. 区间DP的辨别和运用，dp[i][j] = max/min(dp[i][k] + dp[k + 1][j])
2. 像s=abcabc这样的字符串，在O(n)时间内进行压缩的方法，就是在s + s中寻找s的第二个起始位置，如果第二个起始位置小于s的长度的话，说明s包含重复字符串。

https://discuss.leetcode.com/topic/71963/accepted-solution-in-java
We will form 2-D array of Strings.
dp[i][j] = string from index i to index j in encoded form.
We can write the following formula as:-
dp[i][j] = min(dp[i][j], dp[i][k] + dp[k+1][j]) or if we can find some pattern in string from i to j which will result in more less length.
Time Complexity = O(n^3)
Time Complexity is O(N^4), replaceAll() is O(N)
``````    String[][] dp = new String[s.length()][s.length()];

for(int l=0;l<s.length();l++) {
for(int i=0;i<s.length()-l;i++) {
int j = i+l;
String substr = s.substring(i, j+1);
// Checking if string length < 5. In that case, we know that encoding will not help.
if(j - i < 4) {
dp[i][j] = substr;
} else {
dp[i][j] = substr;
// Loop for trying all results that we get after dividing the strings into 2 and combine the   results of 2 substrings
for(int k = i; k<j;k++) {
if((dp[i][k] + dp[k+1][j]).length() < dp[i][j].length()){
dp[i][j] = dp[i][k] + dp[k+1][j];
}
}

// Loop for checking if string can itself found some pattern in it which could be repeated.
for(int k=0;k<substr.length();k++) {
String repeatStr = substr.substring(0, k+1);
if(repeatStr != null
&& substr.length()%repeatStr.length() == 0
&& substr.replaceAll(repeatStr, "").length() == 0) {
String ss = substr.length()/repeatStr.length() + "[" + dp[i][i+k] + "]";
if(ss.length() < dp[i][j].length()) {
dp[i][j] = ss;
}
}
}
}
}
}

return dp[0][s.length()-1];
}``````

X.
http://www.cnblogs.com/grandyang/p/6194403.html

```    string encode(string s) {
int n = s.size();
vector<vector<string>> dp(n, vector<string>(n, ""));
for (int step = 1; step <= n; ++step) {
for (int i = 0; i + step - 1 < n; ++i) {
int j = i + step - 1;
dp[i][j] = s.substr(i, step);
for (int k = i; k < j; ++k) {
string left = dp[i][k], right = dp[k + 1][j];
if (left.size() + right.size() < dp[i][j].size()) {
dp[i][j] = left + right;
}
}
string t = s.substr(i, j - i + 1), replace = "";
auto pos = (t + t).find(t, 1);
if (pos >= t.size()) replace = t;
else replace = to_string(t.size() / pos) + '[' + dp[i][i + pos - 1] + ']';
if (replace.size() < dp[i][j].size()) dp[i][j] = replace;
}
}
return dp[0][n - 1];
}
```

```    string encode(string s) {
int n = s.size();
vector<vector<string>> dp(n, vector<string>(n, ""));
for (int step = 1; step <= n; ++step) {
for (int i = 0; i + step - 1 < n; ++i) {
int j = i + step - 1;
dp[i][j] = s.substr(i, step);
string t = s.substr(i, j - i + 1), replace = "";
auto pos = (t + t).find(t, 1);
if (pos < t.size()) {
replace = to_string(t.size() / pos) + "[" + dp[i][i + pos - 1] + "]";
if (replace.size() < dp[i][j].size()) dp[i][j] = replace;
continue;
}
for (int k = i; k < j; ++k) {
string left = dp[i][k], right = dp[k + 1][j];
if (left.size() + right.size() < dp[i][j].size()) {
dp[i][j] = left + right;
}
}
}
}
return dp[0][n - 1];
}
```

https://segmentfault.com/a/1190000008341304
dpi表示s[i, j]最短的压缩结果，subproblem里面枚举切分点k，分别得到dpi和dpk+1求和，找到长度最短的。

``````    public String encode(String s) {
int n = s.length();
String[][] dp = new String[n][n];
for(int i = 0; i < n; i++) dp[i][i] = "" + s.charAt(i);
// j - i
for(int len = 1; len < n; len++) {
for(int i = 0; i + len < n; i++) {
int j = i + len;
// enumerate seperate k
for(int k = i; k < j; k++) {
int left = dp[i][k].length();
int right = dp[k+1][j].length();
// update shortest encoded string within (i, j)
if(dp[i][j] == null || left + right < dp[i][j].length()) {
dp[i][j] = dp[i][k] + dp[k+1][j];
}
}
// update string within (i, j), encode abcabc
String sub = s.substring(i, j + 1);
int index = (sub + sub).indexOf(sub, 1);
if(index < sub.length()) {
sub = (sub.length() / index) + "[" + dp[i][i+index-1] + "]";
}
if(dp[i][j] == null || dp[i][j].length() > sub.length()) {
dp[i][j] = sub;
}
}
}

return dp[0][n-1];
}``````
http://www.cnblogs.com/EdwardLiu/p/6213476.html
Initially I think of 1D DP, dp[i] stands for the shortest string of first i characters, then:
dp[i] = minLen{dp[k] + encode(substring(k+1, i))}
then I realize that the second part encode(substring(k+1, i)) is actually the same with our dp problem. So it turns out the transfer function is
dp[i] = minLen{dp[k] + dp(substring(k+1, i))}
then 1D is not enough, I introduce the second dimension, which indicates the end. dp[i][j] is the shortest encoded string from i to j
But the hardest part of this problem is how to generate dp[i][j] from dp[i][k] and dp[k+1][j]
I‘ve thought about the cases like:
dp[i][k] = 3[abc] 　　dp[k+1][j] = 2[abc], 　　then dp[i][j] = 5[abc]
dp[i][k] = 3[abc] 　　dp[k+1][j] = xyz, 　　then dp[i][j] = 3[abc]xyz
dp[i][k] = aabc　　　dp[k+1][j] = aabc, 　　then dp[i][j] = 2[aabc]
No idea what to implement this conveniently, so refer to idea
The idea is to firstly concantenate dp[i][k] and dp[k+1][j] directly to construct dp[i][j], and then check if there exist possible repeat patterns in the original substring s.substring(i, j+1) that could further shorten dp[i][j]

replaceAll function is really clever

```利用字典dp记录字符串的最优编码串

def __init__(self): self.dp = dict() def encode(self, s): """ :type s: str :rtype: str """ size = len(s) if size <= 1: return s if s in self.dp: return self.dp[s] ans = s for p in range(1, size + 1): left, right = s[:p], s[p:] t = self.solve(left) + self.encode(right) if len(t) < len(ans): ans = t self.dp[s] = ans return ans def solve(self, s): ans = s size = len(s) for x in range(1, size / 2 + 1): if size % x or s[:x] * (size / x) != s: continue y = str(size / x) + '[' + self.encode(s[:x]) + ']' if len(y) < len(ans): ans = y return ans

https://yijiajin.github.io/leetcode/2017/01/02/LC471.-Encode-String-with-Shortest-Length/

public String encode(String s) { // dp solution, for each substring of s, check whether it can be encode // dp[i][j] means substing of [i, j] after encode if(s == null || s.length() <= 4) return s; int len = s.length(); String[][] dp = new String[len][len]; for(int sublen = 0; sublen < len; sublen++){ for(int begin = 0; begin < len - sublen; begin++){ int end = begin + sublen; String sub = s.substring(begin, end + 1); dp[begin][end] = sub; if(sublen < 4) continue; // sub.length = sublen + 1 // get shortest substring combination for(int i = begin; i < end; i++){ if(dp[begin][i].length() + dp[i+1][end].length() < dp[begin][end].length()) dp[begin][end] = dp[begin][i] + dp[i+1][end]; } // if repeat, using kmp to encode then compare with shortest combination String pattern = kmp(sub); //System.out.println(pattern); // no repeat, keep origional -> shorest combination  if(pattern.length() == sub.length()) continue; // find repeat, repeat should also be encoded String encode = sub.length()/pattern.length() + "[" + dp[begin][begin+pattern.length()-1] + "]"; if(encode.length() < dp[begin][end].length()) dp[begin][end] = encode; } // for begin  } // for sublen return dp[0][len - 1]; } // code // kmp method to find repeat pattern in one String private String kmp(String str){ int len = str.length(); int[] next = new int[len]; char[] arr = str.toCharArray(); Arrays.fill(next, -1); int i = 0, j = 1; for(; j < len; j++){ i = next[j - 1]; while(arr[i + 1] != arr[j] && i >= 0) i = next[i]; if(arr[i + 1] == arr[j]) next[j] = i + 1; else next[j] = -1; } //System.out.println(Arrays.toString(next)); // if repeat, it must before the 0 int patternLen = len - next[len - 1] - 1; if(patternLen == 0) return str; if(patternLen != len && len % patternLen == 0) { return str.substring(0, patternLen); } else { return str; } } // kmp