這東西啊,本身是無用的,但是要是移植就有用。
package util;
import java.util.Properties;
public class HttpProxyConfiger {
public static void configProxy(){
Properties prop=System.getProperties();
prop.setProperty("proxySet","true");
prop.setProperty("http.proxyHost","192.168.xx.xx");
prop.setProperty("http.proxyPort","port");
}
}
工具類中包括很多種方法,讀寫txt,發post請求等。
package util;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.sql.Timestamp;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringEscapeUtils;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.sun.org.apache.bcel.internal.generic.NEW;
import dao.DataBaseDao;
import dao.impl.DataBaseDaoImpl;
import entity.AiDataBase;
import entity.AiResultBai;
import entity.AiResultFs;
public class Tools {
static Date datetime=new Date();
static Timestamp ts = new Timestamp(datetime.getTime());
//讀取本地文件pos.txt
public static List<String> readFile02(String path) {
// 使用一個字符串集合來存儲文本中的路徑 ,也可用String []數組
List<String> list = new ArrayList<String>();
try {
FileInputStream fis = new FileInputStream(path);
// 防止路徑亂碼 如果utf-8 亂碼 改GBK eclipse里創建的txt 用UTF-8,在電腦上自己創建的txt 用GBK
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
BufferedReader br = new BufferedReader(isr);
String line = "";
while ((line = br.readLine()) != null) {
// 如果 t x t文件里的路徑 不包含---字符串 這里是對里面的內容進行一個篩選
list.add(line);
}
br.close();
isr.close();
fis.close();
} catch (Exception e) {
e.printStackTrace();
}
return list;
}
/**
* 分析fs數據結果時用
* @param path
* @return
*/
public static List<String> readFiletofs(String path) {
// 使用一個字符串集合來存儲文本中的路徑 ,也可用String []數組
List<String> list = new ArrayList<String>();
try {
FileInputStream fis = new FileInputStream(path);
// 防止路徑亂碼 如果utf-8 亂碼 改GBK eclipse里創建的txt 用UTF-8,在電腦上自己創建的txt 用GBK
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
BufferedReader br = new BufferedReader(isr);
String line = "";
while ((line = br.readLine()) != null) {
// 如果 t x t文件里的路徑 不包含---字符串 這里是對里面的內容進行一個篩選
list.add(line.replace("+", "%2B"));
}
br.close();
isr.close();
fis.close();
} catch (Exception e) {
e.printStackTrace();
}
return list;
}
/*
* 專門給百度去用的,把一份大文件拆分成幾個小文件
* 本地讀取13W基礎數據,拆分成每1W條存入一個list執行一次分析
*/
public static Map readFilechaifen(String path) {
Map map=new HashMap();
//存放總數的text
List<String> listall=new ArrayList<String>();
try {
FileInputStream fis = new FileInputStream(path);
// 防止路徑亂碼 如果utf-8 亂碼 改GBK eclipse里創建的txt 用UTF-8,在電腦上自己創建的txt 用GBK
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
BufferedReader br = new BufferedReader(isr);
String line = "";
while ((line = br.readLine()) != null) {
// 如果 t x t文件里的路徑 不包含---字符串 這里是對里面的內容進行一個篩選
listall.add(line.replace("+", "%2B"));
}
br.close();
isr.close();
fis.close();
} catch (Exception e) {
e.printStackTrace();
}
//每讀取一樣,index加一,到了index % 5000 == 0 的時候,就用一個新的list
//把if里面的代碼放到后面來,list定義在for外面, 在你這個if里面再次new一下
//就好了
List<String> listnew=null;
for(int i=0;i<=listall.size();i++) {
if(i%5000==0) {
listnew=new ArrayList<String>();
listnew.add(listall.get(i));
}
listnew.add(listall.get(i));
}
return map;
}
public static List<AiDataBase> readFiletoadb(String path) {
// 使用一個字符串集合來存儲文本中的路徑 ,也可用String []數組
List<AiDataBase> adblist = new ArrayList<AiDataBase>();
try {
FileInputStream fis = new FileInputStream(path);
// 防止路徑亂碼 如果utf-8 亂碼 改GBK eclipse里創建的txt 用UTF-8,在電腦上自己創建的txt 用GBK
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
BufferedReader br = new BufferedReader(isr);
String line = "";
//int i=0;
while ((line = br.readLine()) != null) {
//i++;
//System.out.println(i+"==========="+line);
// 如果 t x t文件里的路徑 不包含---字符串 這里是對里面的內容進行一個篩選
if(line.contains("+")) {
line=line.replace("+", "%2B");
//System.out.println("替換+后=="+line);
}
if(line.contains("'")) {
line=line.replaceAll("'", "");
//System.out.println("替換'后=="+line);
}
if(line.contains("\\")){
line=line.replace("\\", "");
}
AiDataBase aiDataBase=new AiDataBase();
aiDataBase.setText(line.replaceAll(" ", "").trim());
aiDataBase.setCreate_time(ts);
adblist.add(aiDataBase);
}
br.close();
isr.close();
fis.close();
} catch (Exception e) {
e.printStackTrace();
}
return adblist;
}
/*public static void main(String[] args) {
List<AiDataBase> adblist =readFiletoadb("E:\\360downloads\\111\\data_jd.txt");
System.out.println(adblist.size());
}*/
//讀取本地文件result_fs.txt
public static List<AiResultFs> readFileforFs(String path) {
// 使用一個字符串集合來存儲文本中的路徑 ,也可用String []數組
List<AiResultFs> list = new ArrayList<AiResultFs>();
try {
FileInputStream fis = new FileInputStream(path);
// 防止路徑亂碼 如果utf-8 亂碼 改GBK eclipse里創建的txt 用UTF-8,在電腦上自己創建的txt 用GBK
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
BufferedReader br = new BufferedReader(isr);
DataBaseDao dbd=new DataBaseDaoImpl();
String line = "";
int j=0;
while ((line = br.readLine())!=null) {
if(line.contains("…")) {
line=StringEscapeUtils.escapeHtml(line);
}
if(line.contains("+")) {
line=line.replace("+", "%2B");
//System.out.println("替換+后=="+line);
}
/*if(line.contains("'")) {
line=line.replaceAll("'", "");
//System.out.println("替換'后=="+line);
}*/
j++;
if((dbd.getIdByText1(line.split("===")[0].trim().replaceAll(" ", "")))>0){
AiResultFs arf=new AiResultFs();
System.out.println(j+line);
arf.setData_base_id(dbd.getIdByText1(line.split("===")[0].trim().replaceAll(" ", "")));
arf.setSentiment(Integer.parseInt(((((line.split("==="))[1]).split(":"))[1])));
arf.setScore(Double.parseDouble(((((line.split("==="))[2]).split(":"))[1])));
arf.setCreate_time(ts);
list.add(arf);
}else {
System.out.println(line+"不存在");
}
//}
}
br.close();
isr.close();
fis.close();
} catch (Exception e) {
e.printStackTrace();
}
return list;
}
/**
* 給aibaidu.java用的,把13W的數據拆成5000一個list然后進行分析
* @param paramlist
* @return
*/
public static Map<Integer, List<String>> getmapchafen(List<String> paramlist){
List<String> listnew=new ArrayList<String>();
Map<Integer , List<String>> lMap=new HashMap<Integer , List<String>>();
for(int i=0;i<paramlist.size();i++) {
if(i==0 ||i%5000==0) {
listnew=new ArrayList<String>();
listnew.add(paramlist.get(i));
lMap.put(i+1, listnew);
}else {
//listnew=new ArrayList<String>();
listnew.add(paramlist.get(i));
}
}
System.out.println(lMap.size());
return lMap;
}
//讀取本地文件result_baidu.txt,存儲為List<AiResultBai>
public static List<AiResultBai> readFileforbaidu(String path) {
// 使用一個字符串集合來存儲文本中的路徑 ,也可用String []數組
List<AiResultBai> list = new ArrayList<AiResultBai>();
try {
FileInputStream fis = new FileInputStream(path);
// 防止路徑亂碼 如果utf-8 亂碼 改GBK eclipse里創建的txt 用UTF-8,在電腦上自己創建的txt 用GBK
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
BufferedReader br = new BufferedReader(isr);
DataBaseDao dbd=new DataBaseDaoImpl();
String line = "";
int j=0;
while ((line = br.readLine())!=null) {
if(!line.equals("")) {
if(line.length()>=256) {
line.substring(0, 256);
}
if(line.contains("+")) {
line=line.replace("+", "%2B");
//System.out.println("替換+后=="+line);
}
/*if(line.contains("'")) {
line=line.replaceAll("'", "''");
//System.out.println("替換'后=="+line);
}*/
/*if(line.contains("…")) {
line=StringEscapeUtils.escapeHtml(line);
}*/
j++;
System.out.println(j+line);
;
AiResultBai arBai=new AiResultBai();
if((dbd.getIdByText1(((line.split("===")[0]).trim().replaceAll(" ", ""))))>0) {
arBai.setData_base_id(dbd.getIdByText1((line.split("===")[0]).trim().replaceAll(" ", "")));
arBai.setSentiment(Integer.parseInt(((((line.split("==="))[1]).split(":"))[1])));
arBai.setPositive_prob(Double.parseDouble(((((line.split("==="))[2]).split(":"))[1])));
arBai.setNegative_prob(Double.parseDouble(((((line.split("==="))[3]).split(":"))[1])));
arBai.setCreate_time(ts);
list.add(arBai);
}else {
System.out.println(line+"不存在");
}
}
}
br.close();
isr.close();
fis.close();
} catch (Exception e) {
e.printStackTrace();
}
return list;
}
/*public static void main(String[] args) {
String string="物流很快,手機殼也很精美,自己真的超喜歡啊……但河馬嘴巴上的灰漬是臟";
StringEscapeUtils.escapeHtml(string);
System.out.println(string);
//System.out.println(list.size());
}*/
//把字符串一行行寫入文件
public void writeTxt(String result,String resultfilepath) {
//寫入中文字符時解決中文亂碼問題
try {
FileOutputStream fos = new FileOutputStream(new File(resultfilepath),true);
OutputStreamWriter osw = new OutputStreamWriter(fos, "UTF-8");
BufferedWriter bw = new BufferedWriter(osw);
bw.write(result + "\t\n");
//注意關閉的先后順序,先打開的后關閉,后打開的先關閉
bw.close();
osw.close();
fos.close();
} catch (Exception e) {
e.printStackTrace();
}
}
//2個文本文件一行一行對比
public static List<String> compare(String path1,String path2) {
// 使用一個字符串集合來存儲文本中的路徑 ,也可用String []數組
List<String> strlist = new ArrayList<String>();
try {
FileInputStream fis = new FileInputStream(path1);
// 防止路徑亂碼 如果utf-8 亂碼 改GBK eclipse里創建的txt 用UTF-8,在電腦上自己創建的txt 用GBK
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
BufferedReader br = new BufferedReader(isr);
FileInputStream fis1 = new FileInputStream(path2);
InputStreamReader isr1 = new InputStreamReader(fis1, "UTF-8");
BufferedReader br1 = new BufferedReader(isr1);
String line1 = "";
String line2="";
while (((line1 = br.readLine()) != null)&&((line2 = br1.readLine()) != null)) {
// 如果 t x t文件里的路徑 不包含---字符串 這里是對里面的內容進行一個篩選
if(!((line1.trim().replace(" ", "")).equals(line2.trim().replace(" ", "")))) {
strlist.add((line1.split("==="))[0].trim()+"=="+(line1.split("==="))[1]+"=="+(line2.split("==="))[1]);
}
}
br.close();
isr.close();
fis.close();
} catch (Exception e) {
e.printStackTrace();
}
return strlist;
}
//post請求獲取結果
/**
* sendUrl (遠程請求的URL)
* param (遠程請求參數)
* JSONObject (遠程請求返回的JSON)
*/
public String sendPostUrl(String url,String param){
PrintWriter out = null;
BufferedReader in = null;
Gson gson = new Gson();
JsonParser parser=new JsonParser();
String result = "";
List<String> reslutlist=new ArrayList<String>();
String qingganqingxiang="";
String jieguo="";
try {
URL realUrl = new URL(url);
// 打開和URL之間的連接
URLConnection conn = realUrl.openConnection();
// 發送POST請求必須設置如下兩行
conn.setDoOutput(true);
conn.setDoInput(true);
// 獲取URLConnection對象對應的輸出流(設置請求編碼為UTF-8)
out = new PrintWriter(new OutputStreamWriter(conn.getOutputStream(), "UTF-8"));
// 發送請求參數
out.print(param);
// flush輸出流的緩沖
out.flush();
// 獲取請求返回數據(設置返回數據編碼為UTF-8)
in = new BufferedReader(
new InputStreamReader(conn.getInputStream(), "UTF-8"));
String line;
while ((line = in.readLine()) != null) {
result += line;
}
JsonObject jsonObject = parser.parse(result).getAsJsonObject();
// System.out.println(jsonObject);
//分析結果,得出字符串 裝了好幾台機子了,一直都用這個===情感傾向:2===分值:0.632563
if((Double.parseDouble(jsonObject.get("score").toString()))>0.5){
qingganqingxiang="===2";
}
else {
qingganqingxiang="===0";
}
String score="==="+jsonObject.get("score").toString();
String sentence=jsonObject.get("sentence").toString();
System.out.println("json獲取的值"+sentence);
jieguo=sentence+qingganqingxiang+score;
// jieguo=param+qingganqingxiang+score;
} catch (IOException e) {
e.printStackTrace();
} finally{
try{
if(out!=null){
out.close();
}
if(in!=null){
in.close();
}
}
catch(IOException ex){
ex.printStackTrace();
}
}
return jieguo;
}
/**
* 改變post方法,最終獲取的是一個list<airesultfs>
* @param url
* @param paramlist
* @return
*/
public static List<AiResultFs> sendPostUrl1(String url,List<String> paramlist){
Date datetime=new Date();
Timestamp ts = new Timestamp(datetime.getTime());
PrintWriter out = null;
BufferedReader in = null;
Gson gson = new Gson();
JsonParser parser=new JsonParser();
DataBaseDao dBaseDao=new DataBaseDaoImpl();
List<AiResultFs> reslutlist=new ArrayList<AiResultFs>();
for (String param : paramlist) {
try {
HttpProxyConfiger.configProxy();
//System.out.println(param);
URL realUrl = new URL(url);
// 打開和URL之間的連接
HttpURLConnection conn = (HttpURLConnection)realUrl.openConnection();
//URLConnection conn = realUrl.openConnection();
// 發送POST請求必須設置如下兩行
conn.setDoOutput(true);
conn.setDoInput(true);
// 獲取URLConnection對象對應的輸出流(設置請求編碼為UTF-8)
out = new PrintWriter(new OutputStreamWriter(conn.getOutputStream(), "UTF-8"));
// 發送請求參數
out.print("sentence="+param);
// flush輸出流的緩沖
out.flush();
// 獲取請求返回數據(設置返回數據編碼為UTF-8)
in = new BufferedReader(
new InputStreamReader(conn.getInputStream(), "UTF-8"));
String line;
String result = "";
while ((line = in.readLine()) != null) {
result += line;
}
JsonObject jsonObject =parser.parse(result.replace("'", "''")).getAsJsonObject();
//System.out.println("jsonObject=="+jsonObject);
//分析結果,得出字符串 裝了好幾台機子了,一直都用這個===情感傾向:2===分值:0.632563
int sentiment;
if((Double.parseDouble(jsonObject.get("score").toString()))>0.5){
sentiment=2;
}
else {
sentiment=0;
}
double score=Double.parseDouble(jsonObject.get("score").toString());
String sentence=jsonObject.get("sentence").toString();
System.out.println("param========"+param);
int database_id=dBaseDao.getIdByText1(param.trim().replace(" ", ""));
System.out.println(database_id);
if(database_id>0) {
AiResultFs arf=new AiResultFs();
arf.setData_base_id(database_id);
arf.setSentiment(sentiment);
arf.setScore(score);
arf.setCreate_time(ts);
reslutlist.add(arf);
}
//System.out.println("結束");
} catch (IOException e) {
e.printStackTrace();
} finally{
try{
if(out!=null){
out.close();
}
if(in!=null){
in.close();
}
}
catch(IOException ex){
ex.printStackTrace();
}
}
}
return reslutlist;
}
}
