Buttercola: Airbnb: CSV Parser
/*
Understand the problem:
For this problem, there are several cases need to consider:
1. For comma, transform to |
2. If comma is inside a quote, don't treat the comma as separated. Remove the comma and print the entire token. e.g. "San Francisco, CA" => San Francisco, CA
3. If there are double quotes, remove one. e.g. "Alexandra ""Alex""" => Alexandra "Alex".
Note that """Alexandra Alex""" becomes "Alexandra Alex" because we first remove the outer-most quote, and then remove one quote of the double quote.
It's very complex to build a real csv parser - need consider white space etc
airbnb面试题汇总
给定一个CSV文件,格式是 “some_name|some_address|some_phone|some_job”
要求输出Json format “{name:some_name, address:some_addres,phone:some_phone, job:some_job}”
输入内容中有些特殊符号要注意处理
/*
John,Smith,john.smith@gmail.com,Los Angeles,1
Jane,Roberts,janer@msn.com,"San Francisco, CA",0
"Alexandra ""Alex""",Menendez,alex.menendez@gmail.com,Miami,1
"""Alexandra Alex"""
Jane,Roberts,janer@msn.com,"San Francisco, CA",0
"Alexandra ""Alex""",Menendez,alex.menendez@gmail.com,Miami,1
"""Alexandra Alex"""
John|Smith|john.smith@gmail.com|Los Angeles|1
Jane|Roberts|janer@msn.com|San Francisco, CA|0
Alexandra "Alex"|Menendez|alex.menendez@gmail.com|Miami|1
"Alexandra Alex"
*/
Jane|Roberts|janer@msn.com|San Francisco, CA|0
Alexandra "Alex"|Menendez|alex.menendez@gmail.com|Miami|1
"Alexandra Alex"
*/
For this problem, there are several cases need to consider:
1. For comma, transform to |
2. If comma is inside a quote, don't treat the comma as separated. Remove the comma and print the entire token. e.g. "San Francisco, CA" => San Francisco, CA
3. If there are double quotes, remove one. e.g. "Alexandra ""Alex""" => Alexandra "Alex".
Note that """Alexandra Alex""" becomes "Alexandra Alex" because we first remove the outer-most quote, and then remove one quote of the double quote.
It's very complex to build a real csv parser - need consider white space etc
public
static
String parseCSV(String s) {
List<String> result =
new
ArrayList<>();
if
(s ==
null
|| s.length() ==
0
) {
return
""
;
}
boolean
inQuote =
false
;
StringBuffer sb =
new
StringBuffer();
for
(
int
i =
0
; i < s.length(); i++) {
if
(inQuote) {
if
(s.charAt(i) ==
'"'
) {
if
(i == s.length() -
1
) {
result.add(sb.toString()); // change the 2 lines to break
return
printStr(result); //
}
else
if
(s.charAt(i +
1
) ==
'"'
) {
sb.append(
'"'
);
i++;
}
else
{
result.add(sb.toString());
sb.setLength(
0
);
inQuote =
false
;
i++;
}
}
else
{
sb.append(s.charAt(i));
}
}
else
{
if
(s.charAt(i) ==
'"'
) {
inQuote =
true
;
}
else
if
(s.charAt(i) ==
','
) {
result.add(sb.toString());
sb.setLength(
0
);
}
else
{
sb.append(s.charAt(i));
}
}
}
if
(sb.length() >
0
) {
result.add(sb.toString());
}
return
printStr(result);
}
private
static
String printStr(List<String> input) {
if
(input ==
null
|| input.size() ==
0
) {
return
""
;
}
StringBuffer sb =
new
StringBuffer();
for
(
int
i =
0
; i < input.size(); i++) {
sb.append(input.get(i));
if
(i == input.size() -
1
) {
break
;
}
sb.append(
"|"
);
}
return
sb.toString();
}
airbnb面试题汇总
给定一个CSV文件,格式是 “some_name|some_address|some_phone|some_job”
要求输出Json format “{name:some_name, address:some_addres,phone:some_phone, job:some_job}”
输入内容中有些特殊符号要注意处理
vector<string> parseCSV(string s) {
vector<string> ans;
bool inQuote = false;
string tmp = "";
for(int i = 0; i < s.length(); ++i) {
if(inQuote) {
if(s[i] == '"') {
if(i == s.length() - 1) {
ans.push_back(tmp);
return ans;
} else if(s[i + 1] == '"') {
tmp += '"';
++i;
} else {
ans.push_back(tmp);
tmp = "";
inQuote = false;
i++;
}
} else tmp += s[i];
} else {
if(s[i] == '"')
inQuote = true;
else if(s[i] == ',') {
ans.push_back(tmp);
tmp = "";
} else tmp += s[i];
}
}
if(!tmp.empty()) ans.push_back(tmp);
return ans;
}
int main() {
string ss[] = {"John,Smith,john.smith@gmail.com,Los Angeles,1", "\"Alexandra \"\"Alex\"\"\",Menendez,alex.menendez@gmail.com,Miami,1"};
for(auto s : ss) {
auto parsed = parseCSV(s);
for (int i = 0; i < parsed.size() - 1; ++i)
cout << parsed[i] << "|";
cout<<parsed[parsed.size() - 1]<<endl;
}
return 0;
}
https://github.com/allaboutjst/airbnb/blob/master/src/main/java/csv_parser/CSVParser.java public String parseCSV(String str) {
List<String> res = new ArrayList<>();
boolean inQuote = false;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < str.length(); i++) {
if (inQuote) {
if (str.charAt(i) == '\"') {
if (i < str.length() - 1 && str.charAt(i + 1) == '\"') {
sb.append("\"");
i++;
} else {
inQuote = false;
}
} else {
sb.append(str.charAt(i));
}
} else {
if (str.charAt(i) == '\"') {
inQuote = true;
} else if (str.charAt(i) == ',') {
res.add(sb.toString());
sb.setLength(0);
} else {
sb.append(str.charAt(i));
}
}
}
if (sb.length() > 0) {
res.add(sb.toString());
}
return String.join("|", res);
}