首先吐槽python的多線程是真的垃圾。。。
業務:
對文件里的近2萬條數據進行處理,然后存回文件
0. 讀取txt存入ArrayList
1. 把ArrayList以2000為一組切割
2. 把2000數據存入各自的線程中
3.把線程放入線程池
4.線程池運行完畢后把結果存回txt
package edu.thu.xlore.unitId; import java.io.*; import java.util.ArrayList; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; public class Test { public void unitFile(String filePath){ System.out.println("unitFile start"); ArrayList<String> testArrayList = new ArrayList<>(); File file = new File(filePath); if (!file.exists()) { return; } //從txt中讀取數據 BufferedReader bufferedReaderRaw = null; try { bufferedReaderRaw = new BufferedReader(new FileReader(file)); int count = 0; while (true) { String line = bufferedReaderRaw.readLine(); if(line == null){ break; } testArrayList.add(line); } } catch (Exception e) { e.printStackTrace(); }finally { if(bufferedReaderRaw != null) { try { bufferedReaderRaw.close(); } catch (IOException e) { e.printStackTrace(); } } } System.out.println("unitFile: 讀取完畢,數量:" + testArrayList.size()); ArrayList<ArrayList<String>> arrayListArrayList = new ArrayList<>(); if(testArrayList.size() < 2000){ arrayListArrayList.add(testArrayList); } //如果數量大於2000,分割ArrayList進行多線程; else { for (int i = 0; i < testArrayList.size(); i++) { int num = i / 2000; if (i % 2000 == 0) { // i = 0, 2000, 4000... arrayListArrayList.add(new ArrayList<String>(2000)); } if (arrayListArrayList.size() == num + 1) { arrayListArrayList.get(num).add(testArrayList.get(i)); } } } System.out.println("unitFile: 分割數量:" + arrayListArrayList.size()); Long time_start = System.currentTimeMillis(); //使用線程池 ThreadPoolExecutor threadPoolExecutor = new ThreadPoolExecutor(10, 10, 0, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>()); for(int i = 0; i < arrayListArrayList.size(); i++){ UnitThread unitThread = new UnitThread(arrayListArrayList, i); threadPoolExecutor.execute(unitThread); } threadPoolExecutor.shutdown(); // shutdown線程池會把已經提交的剩余線程執行完然后關閉, shutdownNow是直接關閉執行中的線程返回剩余沒執行的線程 while(true) { //等線程全部執行完畢 //System.out.println("線程池剩余線程數量:" + threadPoolExecutor.getActiveCount()); if (threadPoolExecutor.isTerminated()) { System.out.println("線程全部運行完畢"); break; } } Long time_end = System.currentTimeMillis(); //結果存回txt FileWriter fileWriter = null; BufferedWriter bufferedWriter = null; try { fileWriter = new FileWriter(file); bufferedWriter = new BufferedWriter(fileWriter); for(int i = 0; i < arrayListArrayList.size(); i++){ System.out.println("共有數據: " + arrayListArrayList.get(i).size()); for(int j = 0; j < arrayListArrayList.get(i).size(); j++) { bufferedWriter.write(arrayListArrayList.get(i).get(j) + "\n"); } } } catch (IOException e) { e.printStackTrace(); }finally { try { if(fileWriter != null) { fileWriter.close(); } } catch (IOException e) { e.printStackTrace(); } } System.out.println("共用時:" + (time_end - time_start) + "毫秒"); } //內部線程類 public class UnitThread extends Thread{ private int pageIndex; private ArrayList<ArrayList<String>> arrayListArrayList; public UnitThread(ArrayList<ArrayList<String>> arrayListArrayList, int pageIndex){ //線程不能取得局部變量,只能作為參數傳進來,ArrayList是引用變量,所以值可以直接修改,不需要返回結果。 this.pageIndex = pageIndex; this.arrayListArrayList = arrayListArrayList; } @Override public void run(){ System.out.println("線程" + pageIndex + "開始"); ArrayList<String> unitedCategory = dealwithArrayList(arrayListArrayList.get(pageIndex)); arrayListArrayList.set(this.pageIndex, unitedCategory); //把新的list傳回給list集合 System.out.println("線程" + pageIndex + "運行完畢"); } } public ArrayList<String> dealwithArrayList(ArrayList<String> arrayList){ ArrayList<String> reArrayList = new ArrayList<>(); //要對list進行的操作 for(int i = 0 ; i < arrayList.size(); i++){ reArrayList.add(arrayList.get(i) + " 已經處理完了"); } return reArrayList; } public static void main(String[] args){ Test test = new Test(); String filePath = "C:\\Users\\Administrator\\Desktop\\test\\wordFile.txt"; test.unitFile(filePath); } }
運行結果:
unitFile start
unitFile: 讀取完畢,數量:19399
unitFile: 分割數量:10
線程0開始
線程1開始
線程3開始
線程2開始
線程0運行完畢
線程1運行完畢
線程3運行完畢
線程4開始
線程2運行完畢
線程5開始
線程8開始
線程4運行完畢
線程8運行完畢
線程5運行完畢
線程9開始
線程6開始
線程9運行完畢
線程7開始
線程6運行完畢
線程7運行完畢
線程全部運行完畢
共有數據: 2000
共有數據: 2000
共有數據: 2000
共有數據: 2000
共有數據: 2000
共有數據: 2000
共有數據: 2000
共有數據: 2000
共有數據: 2000
共有數據: 1399
共用時:8毫秒
Process finished with exit code 0