Buttercola: Airbnb: CSV Parser
/*
Understand the problem:
For this problem, there are several cases need to consider:
1. For comma, transform to |
2. If comma is inside a quote, don't treat the comma as separated. Remove the comma and print the entire token. e.g. "San Francisco, CA" => San Francisco, CA
3. If there are double quotes, remove one. e.g. "Alexandra ""Alex""" => Alexandra "Alex".
Note that """Alexandra Alex""" becomes "Alexandra Alex" because we first remove the outer-most quote, and then remove one quote of the double quote.
It's very complex to build a real csv parser - need consider white space etc
airbnb面试题汇总
给定一个CSV文件,格式是 “some_name|some_address|some_phone|some_job”
要求输出Json format “{name:some_name, address:some_addres,phone:some_phone, job:some_job}”
输入内容中有些特殊符号要注意处理
/*
John,Smith,john.smith@gmail.com,Los Angeles,1
Jane,Roberts,janer@msn.com,"San Francisco, CA",0
"Alexandra ""Alex""",Menendez,alex.menendez@gmail.com,Miami,1
"""Alexandra Alex"""
Jane,Roberts,janer@msn.com,"San Francisco, CA",0
"Alexandra ""Alex""",Menendez,alex.menendez@gmail.com,Miami,1
"""Alexandra Alex"""
John|Smith|john.smith@gmail.com|Los Angeles|1
Jane|Roberts|janer@msn.com|San Francisco, CA|0
Alexandra "Alex"|Menendez|alex.menendez@gmail.com|Miami|1
"Alexandra Alex"
*/
Jane|Roberts|janer@msn.com|San Francisco, CA|0
Alexandra "Alex"|Menendez|alex.menendez@gmail.com|Miami|1
"Alexandra Alex"
*/
For this problem, there are several cases need to consider:
1. For comma, transform to |
2. If comma is inside a quote, don't treat the comma as separated. Remove the comma and print the entire token. e.g. "San Francisco, CA" => San Francisco, CA
3. If there are double quotes, remove one. e.g. "Alexandra ""Alex""" => Alexandra "Alex".
Note that """Alexandra Alex""" becomes "Alexandra Alex" because we first remove the outer-most quote, and then remove one quote of the double quote.
It's very complex to build a real csv parser - need consider white space etc
public static String parseCSV(String s) { List<String> result = new ArrayList<>(); if (s == null || s.length() == 0) { return ""; } boolean inQuote = false; StringBuffer sb = new StringBuffer(); for (int i = 0; i < s.length(); i++) { if (inQuote) { if (s.charAt(i) == '"') { if (i == s.length() - 1) { result.add(sb.toString()); // change the 2 lines to break return printStr(result); // } else if (s.charAt(i + 1) == '"') { sb.append('"'); i++; } else { result.add(sb.toString()); sb.setLength(0); inQuote = false; i++; } } else { sb.append(s.charAt(i)); } } else { if (s.charAt(i) == '"') { inQuote = true; } else if (s.charAt(i) == ',') { result.add(sb.toString()); sb.setLength(0); } else { sb.append(s.charAt(i)); } } } if (sb.length() > 0) { result.add(sb.toString()); } return printStr(result); } private static String printStr(List<String> input) { if (input == null || input.size() == 0) { return ""; } StringBuffer sb = new StringBuffer(); for (int i = 0; i < input.size(); i++) { sb.append(input.get(i)); if (i == input.size() - 1) { break; } sb.append("|"); } return sb.toString(); }airbnb面试题汇总
给定一个CSV文件,格式是 “some_name|some_address|some_phone|some_job”
要求输出Json format “{name:some_name, address:some_addres,phone:some_phone, job:some_job}”
输入内容中有些特殊符号要注意处理
vector<string> parseCSV(string s) {
vector<string> ans;
bool inQuote = false;
string tmp = "";
for(int i = 0; i < s.length(); ++i) {
if(inQuote) {
if(s[i] == '"') {
if(i == s.length() - 1) {
ans.push_back(tmp);
return ans;
} else if(s[i + 1] == '"') {
tmp += '"';
++i;
} else {
ans.push_back(tmp);
tmp = "";
inQuote = false;
i++;
}
} else tmp += s[i];
} else {
if(s[i] == '"')
inQuote = true;
else if(s[i] == ',') {
ans.push_back(tmp);
tmp = "";
} else tmp += s[i];
}
}
if(!tmp.empty()) ans.push_back(tmp);
return ans;
}
int main() {
string ss[] = {"John,Smith,john.smith@gmail.com,Los Angeles,1", "\"Alexandra \"\"Alex\"\"\",Menendez,alex.menendez@gmail.com,Miami,1"};
for(auto s : ss) {
auto parsed = parseCSV(s);
for (int i = 0; i < parsed.size() - 1; ++i)
cout << parsed[i] << "|";
cout<<parsed[parsed.size() - 1]<<endl;
}
return 0;
}
https://github.com/allaboutjst/airbnb/blob/master/src/main/java/csv_parser/CSVParser.java public String parseCSV(String str) {
List<String> res = new ArrayList<>();
boolean inQuote = false;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < str.length(); i++) {
if (inQuote) {
if (str.charAt(i) == '\"') {
if (i < str.length() - 1 && str.charAt(i + 1) == '\"') {
sb.append("\"");
i++;
} else {
inQuote = false;
}
} else {
sb.append(str.charAt(i));
}
} else {
if (str.charAt(i) == '\"') {
inQuote = true;
} else if (str.charAt(i) == ',') {
res.add(sb.toString());
sb.setLength(0);
} else {
sb.append(str.charAt(i));
}
}
}
if (sb.length() > 0) {
res.add(sb.toString());
}
return String.join("|", res);
}