package com.originalityTest; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.List; import java.util.Set; import java.util.concurrent.ArrayBlockingQueue; import org.springframework.data.mongodb.core.MongoTemplate; import org.springframework.data.mongodb.core.query.Query; import org.yqm.nlp.cn.seg.ISegTagger; import org.yqm.nlp.cn.seg.impl.CharNgramSegTagger; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.mongodb.BasicDBObject; import com.mongodb.DB; import com.mongodb.DBCollection; import com.mongodb.DBCursor; import com.mongodb.DBObject; import com.mongodb.MongoClient; import com.mongodb.MongoClientOptions; import com.mongodb.ServerAddress; import com.originalityTest.Test.Consumer; import com.originalityTest.Test.Producer; import redis.clients.jedis.Jedis; import redis.clients.jedis.JedisPool; import redis.clients.jedis.JedisPoolConfig; import us.codecraft.background.entity.KeywordDetailed; import us.codecraft.background.solr.SolrService; import us.codecraft.background.solr.VSMTextSimilarity; import us.codecraft.webmagic.main.CollectInterface; import us.codecraft.webmagic.main.testMain; import us.codecraft.webmagic.model.samples.iask.IaskQuestionModel; import us.codecraft.webmagic.utils.HttpUtils; import us.codecraft.webmagic.utils.MongoUtils; /** * *----------------------------------------------------------------------------- * <br>Copyright (c) 2018 深圳問我時代科技有限公司 * * <p>跑360采集數據跑SEO質量得分 </p> * * @project name : webmagic-samples * @package name : com.originalityTest * @file name : RunJob.java * @author : flm * @date : 2018年10月29日 <br> * *----------------------------------------------------------------------------- */ public class RunJob { protected static String host = "192.168.1.90"; //192.168.9.40:30000 protected static int port = 30000; protected static String dbname = "5118baiduzhidao"; protected static DB mongoDB = null; protected static DB mongoDBURL = null; protected static boolean isTestFlag = false; protected static int initDate = 1000*60*10; private int queueSize = 10000000; private ArrayBlockingQueue<BasicDBObject> queue = new ArrayBlockingQueue<BasicDBObject>(queueSize); static { MongoClientOptions.Builder buide = new MongoClientOptions.Builder(); buide.connectionsPerHost(100);// 與目標數據庫可以建立的最大鏈接數 buide.connectTimeout(1000 * 60 * 20);// 與數據庫建立鏈接的超時時間 buide.maxWaitTime(100 * 60 * 5);// 一個線程成功獲取到一個可用數據庫之前的最大等待時間 buide.threadsAllowedToBlockForConnectionMultiplier(100); buide.maxConnectionIdleTime(0); buide.maxConnectionLifeTime(0); buide.socketTimeout(0); buide.socketKeepAlive(true); MongoClientOptions myOptions = buide.build(); try { MongoClient mongoClient = new MongoClient(new ServerAddress(host, port), myOptions); mongoDB = mongoClient.getDB(dbname); mongoDBURL = mongoClient.getDB("seo_keyword"); } catch (UnknownHostException e) { e.printStackTrace(); System.exit(0); } } /** * 手動執行方法 * @param args * @throws Exception */ public static void main(String[] args) throws Exception { RunJob runJob = new RunJob(); Producer producer = runJob.new Producer(); Consumer consumer1 = runJob.new Consumer(1); Consumer consumer2 = runJob.new Consumer(2); Consumer consumer3 = runJob.new Consumer(3); Consumer consumer4 = runJob.new Consumer(4); Consumer consumer5 = runJob.new Consumer(5); Consumer consumer6 = runJob.new Consumer(6); Consumer consumer7 = runJob.new Consumer(7); Consumer consumer8 = runJob.new Consumer(8); Consumer consumer9 = runJob.new Consumer(9); Consumer consumer10 = runJob.new Consumer(10); // 生產數據 producer.start(); // 跑數據原創度 consumer1.start(); consumer2.start(); consumer3.start(); consumer4.start(); consumer5.start(); consumer6.start(); consumer7.start(); consumer8.start(); consumer9.start(); consumer10.start(); } class Consumer extends Thread{ int i; public Consumer(int i){ this.i = i; } @Override public void run() { consume(); } private void consume() { while(true){ try { System.out.println("隊列獲取 隊列i:"+i); DBCollection collQuestion = mongoDB.getCollection("soQA"); DBCollection collection = mongoDBURL.getCollection("domain"); BasicDBObject d = queue.take(); List<DBObject> answers= (List<DBObject>) d.get("answers"); String questionTxt = d.getString("title") + d.getString("quest"); String questionTitle = ""; if(d.getString("title")!=null&&d.getString("title")!=""){ questionTitle = d.getString("title"); }else{ questionTitle = d.getString("quest"); } float score = 0F; float answerLenOriginality = OriginalityUtitls.getAnswerLenOriginality(answers); float answerSizeOriginality = OriginalityUtitls.getAnswerrSizeOriginality(answers); float goodOriginality = OriginalityUtitls.getGoodOriginality(answers); float questionOriginality = OriginalityUtitls.getQuestionOriginality(questionTxt, answers); float titleOriginality = OriginalityUtitls.getTitleOriginality(questionTitle); float wenwoOriginality = OriginalityUtitls.getWenwoOriginality(questionTitle, collection); score += Float.valueOf(questionOriginality*0.3+""); score += Float.valueOf(answerLenOriginality*0.2+""); score += Float.valueOf(answerSizeOriginality*0.1+""); score += Float.valueOf(goodOriginality*0.05+""); score += Float.valueOf(titleOriginality*0.2+""); score += Float.valueOf(titleOriginality*0.2+""); score += Float.valueOf(wenwoOriginality*0.1+""); System.out.println("score :"+score); d.put("originality", score); d.put("run", 0); collQuestion.save(d); } catch (InterruptedException e) { e.printStackTrace(); } } } } class Producer extends Thread{ @Override public void run() { produce(); } private void produce() { try { DBCollection collQuestion = mongoDB.getCollection("soQA"); DBObject idQuery = new BasicDBObject(); idQuery.put("state",1); DBCursor lists = collQuestion.find(idQuery); lists.addOption(com.mongodb.Bytes.QUERYOPTION_NOTIMEOUT); int i = 0; while(lists.hasNext()){ BasicDBObject d = (BasicDBObject)lists.next(); queue.put(d); System.out.println((++i)+"條記錄 ,向隊列取中插入一個元素,隊列剩余空間:"+(queueSize-queue.size())); try { Thread.sleep(500); // 控制生產速度,防止隊列滿 } catch (Exception e) { System.err.println("Thread.sleep....."); } } } catch (InterruptedException e) { e.printStackTrace(); } } } }