## Thursday, April 20, 2017

### Substring with highest frequency length product - GeeksforGeeks

Given a string which contains lower alphabetic characters, we need to find out such a substring of this string whose product of length and frequency in string is maximum among all possible choices of substrings.

An efficient solution to solve this problem by first constructing longest common prefix array, now suppose value of lcp[i] is K then we can say that i-th and (i+1)-th suffix has K length prefix in common i.e. there is a substring of length K which is repeating twice. In the same way, let three consecutive values of lcp are (K, K-2, K+1) then we can say that there is a substring of length (K-2) which is repeating three times in the string.
Now after above observation, we can see that our result will be such a range of lcp array whose smallest element times number of elements in the range is maximum because range will correspond to the frequency of string and smallest element of range will correspond to length of repeating string now this reformed problem can be solved similar to largest rectangle in histogram problem.
In below code lcp array is constructed by Kasai’s algorithm.
`struct` `suffix`
`{`
`    ``int` `index;  ``// To store original index`
`    ``int` `rank[2]; ``// To store ranks and next rank pair`
`};`

`// A comparison function used by sort() to compare`
`// two suffixes. Compares two pairs, returns 1 if`
`// first pair is smaller`
`int` `cmp(``struct` `suffix a, ``struct` `suffix b)`
`{`
`    ``return` `(a.rank[0] == b.rank[0])?`
`           ``(a.rank[1] < b.rank[1] ?1: 0):`
`           ``(a.rank[0] < b.rank[0] ?1: 0);`
`}`

`// This is the main function that takes a string`
`// 'txt' of size n as an argument, builds and`
`// return the suffix array for the given string`
`vector<``int``> buildSuffixArray(string txt, ``int` `n)`
`{`
`    ``// A structure to store suffixes and their indexes`
`    ``struct` `suffix suffixes[n];`

`    ``// Store suffixes and their indexes in an array`
`    ``// of structures. The structure is needed to sort`
`    ``// the suffixes alphabatically and maintain their`
`    ``// old indexes while sorting`
`    ``for` `(``int` `i = 0; i < n; i++)`
`    ``{`
`        ``suffixes[i].index = i;`
`        ``suffixes[i].rank[0] = txt[i] - ``'a'``;`
`        ``suffixes[i].rank[1] = ((i+1) < n)? (txt[i + 1] - ``'a'``): -1;`
`    ``}`

`    ``// Sort the suffixes using the comparison function`
`    ``// defined above.`
`    ``sort(suffixes, suffixes+n, cmp);`

`    ``// At his point, all suffixes are sorted according to first`
`    ``// 2 characters.  Let us sort suffixes according to first 4`
`    ``// characters, then first 8 and so on`
`    ``// This array is needed to get the index in suffixes[]`
`    ``// from original index.  This mapping is needed to get`
`    ``// next suffix.`
`    ``int` `ind[n];`
`    ``for` `(``int` `k = 4; k < 2*n; k = k*2)`
`    ``{`
`        ``// Assigning rank and index values to first suffix`
`        ``int` `rank = 0;`
`        ``int` `prev_rank = suffixes[0].rank[0];`
`        ``suffixes[0].rank[0] = rank;`
`        ``ind[suffixes[0].index] = 0;`

`        ``// Assigning rank to suffixes`
`        ``for` `(``int` `i = 1; i < n; i++)`
`        ``{`
`            ``// If first rank and next ranks are same as`
`            ``// that of previous suffix in array, assign`
`            ``// the same new rank to this suffix`
`            ``if` `(suffixes[i].rank[0] == prev_rank &&`
`                    ``suffixes[i].rank[1] == suffixes[i-1].rank[1])`
`            ``{`
`                ``prev_rank = suffixes[i].rank[0];`
`                ``suffixes[i].rank[0] = rank;`
`            ``}`
`            ``else` `// Otherwise increment rank and assign`
`            ``{`
`                ``prev_rank = suffixes[i].rank[0];`
`                ``suffixes[i].rank[0] = ++rank;`
`            ``}`
`            ``ind[suffixes[i].index] = i;`
`        ``}`

`        ``// Assign next rank to every suffix`
`        ``for` `(``int` `i = 0; i < n; i++)`
`        ``{`
`            ``int` `nextindex = suffixes[i].index + k/2;`
`            ``suffixes[i].rank[1] = (nextindex < n)?`
`                 ``suffixes[ind[nextindex]].rank[0]: -1;`
`        ``}`

`        ``// Sort the suffixes according to first k characters`
`        ``sort(suffixes, suffixes+n, cmp);`
`    ``}`

`    ``// Store indexes of all sorted suffixes in the suffix array`
`    ``vector<``int``>suffixArr;`
`    ``for` `(``int` `i = 0; i < n; i++)`
`        ``suffixArr.push_back(suffixes[i].index);`

`    ``// Return the suffix array`
`    ``return`  `suffixArr;`
`}`

`/* To construct and return LCP */`
`vector<``int``> kasai(string txt, vector<``int``> suffixArr)`
`{`
`    ``int` `n = suffixArr.size();`

`    ``// To store LCP array`
`    ``vector<``int``> lcp(n, 0);`

`    ``// An auxiliary array to store inverse of suffix array`
`    ``// elements. For example if suffixArr[0] is 5, the`
`    ``// invSuff[5] would store 0.  This is used to get next`
`    ``// suffix string from suffix array.`
`    ``vector<``int``> invSuff(n, 0);`

`    ``// Fill values in invSuff[]`
`    ``for` `(``int` `i=0; i < n; i++)`
`        ``invSuff[suffixArr[i]] = i;`

`    ``// Initialize length of previous LCP`
`    ``int` `k = 0;`

`    ``// Process all suffixes one by one starting from`
`    ``// first suffix in txt[]`
`    ``for` `(``int` `i=0; i<n; i++)`
`    ``{`
`        ``/* If the current suffix is at n-1, then we don’t`
`           ``have next substring to consider. So lcp is not`
`           ``defined for this substring, we put zero. */`
`        ``if` `(invSuff[i] == n-1)`
`        ``{`
`            ``k = 0;`
`            ``continue``;`
`        ``}`

`        ``/* j contains index of the next substring to`
`           ``be considered  to compare with the present`
`           ``substring, i.e., next string in suffix array */`
`        ``int` `j = suffixArr[invSuff[i]+1];`

`        ``// Directly start matching from k'th index as`
`        ``// at-least k-1 characters will match`
`        ``while` `(i+k<n && j+k<n && txt[i+k]==txt[j+k])`
`            ``k++;`

`        ``lcp[invSuff[i]] = k; ``// lcp for the present suffix.`

`        ``// Deleting the starting character from the string.`
`        ``if` `(k>0)`
`            ``k--;`
`    ``}`

`    ``// return the constructed lcp array`
`    ``return` `lcp;`
`}`

`//  method to get LCP array`
`vector<``int``> getLCPArray(string str)`
`{`
`    ``vector<``int``>suffixArr = buildSuffixArray(str, str.length());`
`    ``return` `kasai(str, suffixArr);`
`}`

`// The main function to find the maximum rectangular`
`// area under given histogram with n bars`
`int` `getMaxArea(``int` `hist[], ``int` `n)`
`{`
`    ``// Create an empty stack. The stack holds indexes`
`    ``// of hist[] array. The bars stored in stack are`
`    ``// always in increasing order of their heights.`
`    ``stack<``int``> s;`

`    ``int` `max_area = 0; ``// Initalize max area`
`    ``int` `tp;  ``// To store top of stack`

`    ``// To store area with top bar as the smallest bar`
`    ``int` `area_with_top;`

`    ``// Run through all bars of given histogram`
`    ``int` `i = 0;`
`    ``while` `(i < n)`
`    ``{`
`        ``// If this bar is higher than the bar on`
`        ``// top stack, push it to stack`
`        ``if` `(s.empty() || hist[s.top()] <= hist[i])`
`            ``s.push(i++);`

`        ``// If this bar is lower than top of stack,`
`        ``// then calculate area of rectangle with`
`        ``// stack top as the smallest (or minimum`
`        ``// height) bar. 'i' is 'right index' for`
`        ``// the top and element before top in stack`
`        ``// is 'left index'`
`        ``else`
`        ``{`
`            ``tp = s.top();  ``// store the top index`
`            ``s.pop();  ``// pop the top`

`            ``// Calculate the area with hist[tp]`
`            ``// stack as smallest bar`
`            ``area_with_top = hist[tp] * (s.empty() ?`
`                           ``(i + 1) : i - s.top());`

`            ``// update max area, if needed`
`            ``if` `(max_area < area_with_top)`
`                ``max_area = area_with_top;`
`        ``}`
`    ``}`

`    ``// Now pop the remaining bars from stack`
`    ``// and calculate area with every`
`    ``// popped bar as the smallest bar`
`    ``while` `(s.empty() == ``false``)`
`    ``{`
`        ``tp = s.top();`
`        ``s.pop();`
`        ``area_with_top = hist[tp] * (s.empty() ?`
`                        ``(i + 1) : i - s.top());`

`        ``if` `(max_area < area_with_top)`
`            ``max_area = area_with_top;`
`    ``}`

`    ``return` `max_area;`
`}`

`// Returns maximum product of frequency and length`
`// of a substring.`
`int` `maxProductOfFreqLength(string str)`
`{`
`    ``//  get LCP array by Kasai's algorithm`
`    ``vector<``int``> lcp = getLCPArray(str);`

`    ``int` `hist[lcp.size()];`

`    ``//  copy lcp array into hist array`
`    ``for` `(``int` `i = 0; i < lcp.size(); i++)`
`        ``hist[i] = lcp[i];`

`    ``//  get the maximum area under lcp histogram`
`    ``int` `substrMaxValue = getMaxArea(hist, lcp.size());`

`    ``// if string length itself is greater than`
`    ``// histogram area, then return that`
`    ``if` `(str.length() > substrMaxValue)`
`        ``return` `str.length();`
`    ``else`
`        ``return` `substrMaxValue;`
`}`