1、工具类
1 package test; 2 3 import lombok.extern.slf4j.Slf4j; 4 5 import java.io.BufferedReader; 6 import java.io.FileInputStream; 7 import java.io.IOException; 8 import java.io.InputStreamReader; 9 import java.util.ArrayList; 10 import java.util.HashMap; 11 import java.util.List; 12 import java.util.Map; 13 14 @Slf4j 15 public class CSVFileUtil { 16 /** 17 * CSV文件编码 18 */ 19 private static final String ENCODE = "UTF-8"; 20 21 /** 22 * 读取CSV文件得到List,默认使用UTF-8编码 23 * @param fileName 文件路径 24 * @return 25 */ 26 public static List<String> getLines(String fileName) { 27 return getLines(fileName, ENCODE); 28 } 29 30 /** 31 * 读取CSV文件得到List 32 * @param fileName 文件路径 33 * @param encode 编码 34 * @return 35 */ 36 public static List<String> getLines(String fileName, String encode) { 37 List<String> lines = new ArrayList<String>(); 38 BufferedReader br = null; 39 InputStreamReader isr = null; 40 FileInputStream fis = null; 41 try { 42 fis = new FileInputStream(fileName); 43 isr = new InputStreamReader(fis, encode); 44 br = new BufferedReader(isr); 45 String line; 46 while ((line = br.readLine()) != null) { 47 StringBuilder sb = new StringBuilder(); 48 sb.append(line); 49 boolean readNext = countChar(sb.toString(), '"', 0) % 2 == 1; 50 // 如果双引号是奇数的时候继续读取。考虑有换行的是情况 51 while (readNext) { 52 line = br.readLine(); 53 if (line == null) { 54 return null; 55 } 56 sb.append(line); 57 readNext = countChar(sb.toString(), '"', 0) % 2 == 1; 58 } 59 lines.add(sb.toString()); 60 } 61 } catch (Exception e) { 62 log.error("Read CSV file failure :{}", e); 63 } finally { 64 try { 65 if (br != null) { 66 br.close(); 67 } 68 if (isr != null) { 69 isr.close(); 70 } 71 if (fis != null) { 72 fis.close(); 73 } 74 } catch (IOException e) { 75 log.error("Close stream failure :{}", e); 76 } 77 } 78 return lines; 79 } 80 81 public static String[] fromCSVLine(String source) { 82 return fromCSVLine(source, 0); 83 } 84 85 /** 86 * 把CSV文件的一行转换成字符串数组。指定数组长度,不够长度的部分设置为null 87 * @param source 88 * @param size 89 * @return 90 */ 91 public static String[] fromCSVLine(String source, int size) { 92 List list = fromCSVLineToArray(source); 93 if (size < list.size()) { 94 size = list.size(); 95 } 96 String[] arr = new String[size]; 97 list.toArray(arr); 98 return arr; 99 } 100 101 public static List fromCSVLineToArray(String source) { 102 if (source == null || source.length() == 0) { 103 return new ArrayList(); 104 } 105 int currentPosition = 0; 106 int maxPosition = source.length(); 107 int nextComa = 0; 108 List list = new ArrayList(); 109 while (currentPosition < maxPosition) { 110 nextComa = nextComma(source, currentPosition); 111 list.add(nextToken(source, currentPosition, nextComa)); 112 currentPosition = nextComa + 1; 113 if (currentPosition == maxPosition) { 114 list.add(""); 115 } 116 } 117 return list; 118 } 119 120 /** 121 * 把字符串类型的数组转换成一个CSV行。(输出CSV文件的时候用) 122 * 123 * @param arr 124 * @return 125 */ 126 public static String toCSVLine(String[] arr) { 127 if (arr == null) { 128 return ""; 129 } 130 StringBuilder sb = new StringBuilder(); 131 for (int i = 0; i < arr.length; i++) { 132 String item = addQuote(arr[i]); 133 sb.append(item); 134 if (arr.length - 1 != i) { 135 sb.append(","); 136 } 137 } 138 return sb.toString(); 139 } 140 141 /** 142 * 将list的第一行作为Map的key,下面的列作为Map的value 143 * @param list 144 * @return 145 */ 146 public static List<Map<String, String>> parseList(List<String> list) { 147 List<Map<String, String>> resultList = new ArrayList<Map<String, String>>(); 148 String firstLine = list.get(0); 149 String[] fields = firstLine.split(","); 150 for (int i = 1; i < list.size(); i++) { 151 String valueLine = list.get(i); 152 String[] valueItems = CSVFileUtil.fromCSVLine(valueLine); 153 Map<String, String> map = new HashMap<String, String>(); 154 for (int j = 0; j < fields.length; j++) { 155 map.put(fields[j], valueItems[j]); 156 } 157 resultList.add(map); 158 } 159 return resultList; 160 } 161 162 /** 163 * 字符串类型的List转换成一个CSV行。(输出CSV文件的时候用) 164 * 165 * @param strArrList 166 * @return 167 */ 168 public static String toCSVLine(ArrayList strArrList) { 169 if (strArrList == null) { 170 return ""; 171 } 172 String[] strArray = new String[strArrList.size()]; 173 for (int idx = 0; idx < strArrList.size(); idx++) { 174 strArray[idx] = (String) strArrList.get(idx); 175 } 176 return toCSVLine(strArray); 177 } 178 179 /** 180 * 计算指定字符的个数 181 * 182 * @param str 文字列 183 * @param c 字符 184 * @param start 开始位置 185 * @return 个数 186 */ 187 private static int countChar(String str, char c, int start) { 188 int index = str.indexOf(c, start); 189 return index == -1 ? 0 : countChar(str, c, index + 1) + 1; 190 } 191 192 /** 193 * 查询下一个逗号的位置。 194 * 195 * @param source 文字列 196 * @param st 检索开始位置 197 * @return 下一个逗号的位置。 198 */ 199 private static int nextComma(String source, int st) { 200 int maxPosition = source.length(); 201 boolean inquote = false; 202 while (st < maxPosition) { 203 char ch = source.charAt(st); 204 if (!inquote && ch == ',') { 205 break; 206 } else if ('"' == ch) { 207 inquote = !inquote; 208 } 209 st++; 210 } 211 return st; 212 } 213 214 /** 215 * 取得下一个字符串 216 * 217 * @param source 218 * @param st 219 * @param nextComma 220 * @return 221 */ 222 private static String nextToken(String source, int st, int nextComma) { 223 StringBuilder strb = new StringBuilder(); 224 int next = st; 225 while (next < nextComma) { 226 char ch = source.charAt(next++); 227 if (ch == '"') { 228 if ((st + 1 < next && next < nextComma) && (source.charAt(next) == '"')) { 229 strb.append(ch); 230 next++; 231 } 232 } else { 233 strb.append(ch); 234 } 235 } 236 return strb.toString(); 237 } 238 239 /** 240 * 在字符串的外侧加双引号。如果该字符串的内部有双引号的话,把"转换成""。 241 * 242 * @param item 字符串 243 * @return 处理过的字符串 244 */ 245 private static String addQuote(String item) { 246 if (item == null || item.length() == 0) { 247 return "\"\""; 248 } 249 StringBuilder sb = new StringBuilder(); 250 sb.append('"'); 251 for (int idx = 0; idx < item.length(); idx++) { 252 char ch = item.charAt(idx); 253 if ('"' == ch) { 254 sb.append("\"\""); 255 } else { 256 sb.append(ch); 257 } 258 } 259 sb.append('"'); 260 return sb.toString(); 261 } 262 }
2、使用方法
1 package test; 2 3 import com.alibaba.fastjson.JSONObject; 4 import lombok.extern.slf4j.Slf4j; 5 6 import java.util.List; 7 import java.util.Map; 8 9 @Slf4j 10 public class CsvTest { 11 public static void main(String[] args) { 12 List<String> lines = CSVFileUtil.getLines("d:/智能问答.csv", "UTF-8"); 13 List<Map<String, String>> mapList = CSVFileUtil.parseList(lines); 14 System.out.println(Arrays.toString(mapList.toArray())); 15 } 16 17 }
System.out.println(Arrays.toString(mapList.toArray()));