一.項目包含的功能
1.高亮搜索;
2.詞語自動補全;
3.分頁查詢;
4.復合查詢;
5.對索引的增、刪、改、查;
6.對文檔的增、刪、改;
7.搜索結果排序;
8.其他功能:文件讀取(word、txt、pdf)
二.依賴環境版本
1.Elasticsearch 6.6.1;
2.jdk 1.8
3.前端分頁插件:網上找的
4.elasticsearch-rest-high-level-client 6.6.1
5.spring boot 2.1.3
6.kibana 6.6.0
三.訪問路徑
1.項目路徑:http://localhost:8080/searchTest.html
2.kibana路徑:http://localhost:5601
3.Elasticsearch啟動檢測路徑:http://localhost:9200/
四.頁面效果
五.代碼
1.bean實體類
package com.demo.elasticsearch.bean; import java.util.Date; /** * @Author: ln * @Date: 2019/2/26 08:59 * @Description: */ public class FileBean { //text支持分詞搜索的字段有:name,author,content,filePath //keyword支持不分詞搜索的字段有:name,author //suggest支持自動補全搜索的字段有:name,author /** 主鍵id */ private String id; /** 文件名稱 */ private String name; /** 作者名稱 */ private String author; /** 文件內容 */ private String content; /** 文件路徑 */ private String filePath; public String getId() { return id; } public void setId(String id) { this.id = id; } public String getName() { return name; } public void setName(String name) { this.name = name; } public String getAuthor() { return author; } public void setAuthor(String author) { this.author = author; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } public String getFilePath() { return filePath; } public void setFilePath(String filePath) { this.filePath = filePath; } //不分詞搜索 public String getKeywordName() { return this.name; } public String getKeywordAuthor() { return this.author; } //自動補全 public String getSuggestName() { return this.name; } public String getSuggestAuthor() { return this.author; } }
package com.demo.elasticsearch.bean; /** * @Author: ln * @Date: 2019/2/26 08:59 * @Description: */ public class FileBeanQuery { /** 文件名稱 */ private String name; /** 作者名稱 */ private String author; /** 文件內容 */ private String content; /** 文件路徑 */ private String filePath; public String getName() { return name; } public void setName(String name) { this.name = name; } public String getAuthor() { return author; } public void setAuthor(String author) { this.author = author; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } public String getFilePath() { return filePath; } public void setFilePath(String filePath) { this.filePath = filePath; } }
2.controller控制層
package com.demo.elasticsearch.controller; import com.demo.elasticsearch.bean.FileBean; import com.demo.elasticsearch.bean.FileBeanQuery; import com.demo.elasticsearch.bean.FileMapping; import com.demo.elasticsearch.service.ElasticsearchService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.ResponseBody; import java.io.IOException; /** * @Author: ln * @Date: 2019/2/22 14:04 * @Description: elasticsearch demo */ @Controller @RequestMapping("/demo") public class ElasticsearchController { @Autowired private ElasticsearchService elasticsearchService; /* 創建索引 */ @RequestMapping("/createIndex") @ResponseBody public String createIndex(String index, FileMapping mapping) throws IOException { String result = elasticsearchService.createIndex(index, mapping); return result; } /* 刪除索引 Elasticsearch的版本要與client的版本一致,spring的start版本6.4.3調試失敗,6.6.1調試成功 */ @RequestMapping("/delIndex") @ResponseBody public String delIndex(String index) throws IOException { String result = elasticsearchService.delIndex(index); return result; } /* 新建文檔(若索引不存在則新建) */ @RequestMapping("/putDocument") @ResponseBody public String putDocument(String index, FileBean fileBean) throws IOException { String result = elasticsearchService.putDocument(index, fileBean); return result; } /* 刪除文檔 */ @RequestMapping("/delDocument") @ResponseBody public String delDocument(String index, String id) throws IOException { String result = elasticsearchService.delDocument(index, id); return result; } /* 獲取文檔 */ @RequestMapping("/getDocument") @ResponseBody public String getDocument(String index, String id) throws IOException { String result = elasticsearchService.getDocument(index, id); return result; } /* 全局搜索 */ @RequestMapping("/keywordSearch") @ResponseBody public String keywordSearch(String index, String value, int current, int size) throws IOException { String result = elasticsearchService.keywordSearch(index, value, current, size); return result; } /* 復合搜索 TODO:還未調試成功*/ @RequestMapping("/multiSearch") @ResponseBody public String multiSearch(String index, FileBeanQuery query, int current, int size) throws IOException, IllegalAccessException { String result = elasticsearchService.multiSearch(index, query, current, size); return result; } /* 高亮搜索(注意QueryBuilders的查詢方法) */ @RequestMapping("/highlightSearch") @ResponseBody public String highlightSearch(String index, String value, int current, int size) throws IOException { String result = elasticsearchService.highlightSearch(index, value, current, size); return result; } /* 詞語補全(只能根據前綴補全) */ @RequestMapping("/suggestSearch") @ResponseBody public String suggestSearch(String index, String value) throws IOException { String result = elasticsearchService.suggestSearch(index, value); return result; } /* 全局搜索 */ @RequestMapping("/searchAll") @ResponseBody public String searchAll(String index, int current, int size) throws IOException { String result = elasticsearchService.searchAll(index, current, size); return result; }/* 查詢文檔總數 */ @RequestMapping("/countQuery") @ResponseBody public String countQuery(String index) throws IOException { String result = elasticsearchService.countQuery(index); return result; } }
3.service業務邏輯層
package com.demo.elasticsearch.service; import com.demo.elasticsearch.bean.FileBean; import com.demo.elasticsearch.bean.FileBeanQuery; import com.demo.elasticsearch.bean.FileMapping; import java.io.IOException; /** * @Author: ln * @Date: 2019/2/26 08:59 * @Description: */ public interface ElasticsearchService { /** * @Description: 新建索引 * @Author: ln 2019/3/1 16:51 * @Param: [index:索引名稱] **/ String createIndex(String index, FileMapping mapping) throws IOException; String delIndex(String index) throws IOException; String putDocument(String index, FileBean fileBean) throws IOException; String delDocument(String index, String id) throws IOException; String getDocument(String index, String id) throws IOException; String keywordSearch(String index, String value, int current, int size) throws IOException; String multiSearch(String index, FileBeanQuery query, int current, int size) throws IOException, IllegalAccessException; String highlightSearch(String index, String value, int current, int size) throws IOException; String suggestSearch(String index, String value) throws IOException; String searchAll(String index, int current, int size) throws IOException;
String countQuery(String index) throws IOException; }
package com.demo.elasticsearch.service; import com.alibaba.fastjson.JSON; import com.demo.elasticsearch.bean.FileBean; import com.demo.elasticsearch.bean.FileBeanQuery; import com.demo.elasticsearch.bean.FileMapping; import com.demo.elasticsearch.util.AttachmentReader; import org.apache.http.HttpHost; import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsRequest; import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsResponse; import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest; import org.elasticsearch.action.delete.DeleteRequest; import org.elasticsearch.action.delete.DeleteResponse; import org.elasticsearch.action.get.GetRequest; import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.action.search.MultiSearchRequest; import org.elasticsearch.action.search.MultiSearchResponse; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RestClient; import org.elasticsearch.client.RestHighLevelClient; import org.elasticsearch.client.core.CountRequest; import org.elasticsearch.client.core.CountResponse; import org.elasticsearch.common.text.Text; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; import org.elasticsearch.search.suggest.Suggest; import org.elasticsearch.search.suggest.SuggestBuilder; import org.elasticsearch.search.suggest.SuggestBuilders; import org.elasticsearch.search.suggest.SuggestionBuilder; import org.elasticsearch.search.suggest.completion.CompletionSuggestion; import org.springframework.stereotype.Service; import java.io.File; import java.io.IOException; import java.lang.reflect.Field; import java.util.ArrayList; import java.util.List; import java.util.Map; /** * @Author: ln * @Date: 2019/2/22 15:37 * @Description: */ @Service public class ElasticsearchServiceImpl implements ElasticsearchService { RestHighLevelClient client; @Override public String createIndex(String index, FileMapping mapping) throws IOException { client = new RestHighLevelClient( RestClient.builder(new HttpHost("localhost", 9200, "http"))); CreateIndexRequest request = new CreateIndexRequest(index); //索引配置 request.mapping("doc", "keywordName", "type=keyword", "keywordAuthor", "type=keyword", "suggestName", "type=completion", "suggestAuthor", "type=completion"); CreateIndexResponse createIndexResponse = client.indices().create(request, RequestOptions.DEFAULT); client.close(); return JSON.toJSONString(createIndexResponse); } @Override public String delIndex(String index) throws IOException { client = new RestHighLevelClient( RestClient.builder(new HttpHost("localhost", 9200, "http"))); DeleteIndexRequest request = new DeleteIndexRequest(index); AcknowledgedResponse deleteIndexResponse = client.indices().delete(request, RequestOptions.DEFAULT); System.out.println(JSON.toJSONString(deleteIndexResponse)); client.close(); return JSON.toJSONString(deleteIndexResponse.isAcknowledged()); } @Override public String putDocument(String index, FileBean fileBean) throws IOException { client = new RestHighLevelClient( RestClient.builder(new HttpHost("localhost", 9200, "http"))); File file = new File(fileBean.getFilePath()); String content = AttachmentReader.reader(fileBean.getFilePath()); fileBean.setContent(content); fileBean.setName(file.getName()); IndexRequest indexRequest = new IndexRequest(index, "doc", fileBean.getId()); indexRequest.source(JSON.toJSONString(fileBean), XContentType.JSON); IndexResponse response = client.index(indexRequest, RequestOptions.DEFAULT); System.out.println(JSON.toJSONString(response)); client.close(); return JSON.toJSONString(response.status()); } @Override public String delDocument(String index, String id) throws IOException { client = new RestHighLevelClient( RestClient.builder(new HttpHost("localhost", 9200, "http"))); DeleteRequest request = new DeleteRequest(index,"doc", id ); DeleteResponse deleteResponse = client.delete(request, RequestOptions.DEFAULT); System.out.println(JSON.toJSONString(deleteResponse)); client.close(); return JSON.toJSONString(deleteResponse.status()); } @Override public String getDocument(String index, String id) throws IOException { client = new RestHighLevelClient( RestClient.builder(new HttpHost("localhost", 9200, "http"))); GetRequest getRequest = new GetRequest(index,"doc", id ); GetResponse getResponse = client.get(getRequest, RequestOptions.DEFAULT); System.out.println(JSON.toJSONString(getResponse)); client.close(); return JSON.toJSONString(getResponse); } @Override public String keywordSearch(String index, String value, int current, int size) throws IOException { client = new RestHighLevelClient( RestClient.builder(new HttpHost("localhost", 9200, "http"))); SearchRequest searchRequest = new SearchRequest(); searchRequest.indices(index); SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); //支持全詞搜索的字段有:keywordName,keywordAuthor" searchSourceBuilder.query(QueryBuilders.multiMatchQuery(value, "keywordName", "keywordAuthor")); searchSourceBuilder.from(current); searchSourceBuilder.size(size); searchRequest.source(searchSourceBuilder); SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); System.out.println(JSON.toJSONString(searchResponse)); //處理返回結果 List<Map<String, Object>> result = dealResult(searchResponse.getHits()); client.close(); return JSON.toJSONString(result); } @Override public String multiSearch(String index, FileBeanQuery query, int current, int size) throws IOException, IllegalAccessException { client = new RestHighLevelClient( RestClient.builder(new HttpHost("localhost", 9200, "http"))); MultiSearchRequest request = new MultiSearchRequest(); for (Field field : query.getClass().getDeclaredFields()) { field.setAccessible(true); if(field.get(query) != null){ SearchRequest searchRequest = new SearchRequest(index); SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.query(QueryBuilders.boolQuery().must( QueryBuilders.matchQuery(field.getName(), field.get(query)))); searchRequest.source(searchSourceBuilder); request.add(searchRequest); } } MultiSearchResponse response = client.msearch(request, RequestOptions.DEFAULT); System.out.println(JSON.toJSONString(response)); //返回結果處理 List<Map<String, Object>> result = new ArrayList<>(); MultiSearchResponse.Item[] multiSearchResponses = response.getResponses(); for (MultiSearchResponse.Item multiSearchRespons : multiSearchResponses) { SearchHits hits = multiSearchRespons.getResponse().getHits(); for (SearchHit hit : hits.getHits()) { Map<String, Object> map = hit.getSourceAsMap(); if(!result.contains(map)){ result.add(map); } } } client.close(); return JSON.toJSONString(result); } @Override public String highlightSearch(String index, String value, int current, int size) throws IOException { client = new RestHighLevelClient( RestClient.builder(new HttpHost("localhost", 9200, "http"))); SearchRequest searchRequest = new SearchRequest(); searchRequest.indices(index); SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); //高亮,支持所有FileBean實體的字段 HighlightBuilder highlightBuilder = new HighlightBuilder(); FileBean fileBean = new FileBean(); String[] fieldNames = new String[fileBean.getClass().getDeclaredFields().length]; int i = 0; for (Field f : fileBean.getClass().getDeclaredFields()) { HighlightBuilder.Field highlight = new HighlightBuilder.Field(f.getName()); highlight.highlighterType("unified"); highlightBuilder.field(highlight); fieldNames[i] = f.getName(); i++; } //設置高亮樣式 highlightBuilder.preTags("<label style=\"color: red\">"); highlightBuilder.postTags("</label>"); //添加查詢條件 searchSourceBuilder.highlighter(highlightBuilder); searchSourceBuilder.query(QueryBuilders.multiMatchQuery(value, fieldNames));//搜索也支持所有FileBean實體的字段 searchRequest.source(searchSourceBuilder); SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); System.out.println(JSON.toJSONString(searchResponse)); //獲取高亮字段 List<Map<String, Object>> result = new ArrayList<>(); SearchHits hits = searchResponse.getHits(); for (SearchHit hit : hits.getHits()) { Map<String, HighlightField> highlightFields = hit.getHighlightFields(); for (String fieldName : fieldNames) { HighlightField highlight = highlightFields.get(fieldName); System.out.println(fieldName); if(highlight != null){ Text[] fragments = highlight.fragments(); String fragmentString = fragments[0].string(); System.out.println("高亮值:" + fragmentString); Map<String, Object> map = hit.getSourceAsMap(); map.put(fieldName, fragmentString); if(!result.contains(map)){ result.add(map); } } } } client.close(); return JSON.toJSONString(result); } @Override public String suggestSearch(String index, String value) throws IOException { client = new RestHighLevelClient( RestClient.builder(new HttpHost("localhost", 9200, "http"))); SearchRequest searchRequest = new SearchRequest(index); SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); //查詢補全詞語 SuggestionBuilder completionName = SuggestBuilders.completionSuggestion("suggestName").text(value); SuggestBuilder suggestBuilder = new SuggestBuilder(); suggestBuilder.addSuggestion("suggestName", completionName); SuggestionBuilder completionAuthor = SuggestBuilders.completionSuggestion("suggestAuthor").text(value); suggestBuilder.addSuggestion("suggestAuthor", completionAuthor); searchSourceBuilder.suggest(suggestBuilder); searchRequest.source(searchSourceBuilder); SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); System.out.println(JSON.toJSONString(searchResponse)); //處理返回結果 Suggest suggest = searchResponse.getSuggest(); //支持自動補全搜索的字段有suggestName,suggestAuthor CompletionSuggestion termSuggestion = suggest.getSuggestion("suggestName"); CompletionSuggestion termSuggestionAuthor = suggest.getSuggestion("suggestAuthor"); List<CompletionSuggestion.Entry> list = termSuggestion.getEntries(); list.addAll(termSuggestionAuthor.getEntries()); List<String> suggestList = new ArrayList<>(); for (CompletionSuggestion.Entry entry : list) { for (CompletionSuggestion.Entry.Option option : entry) { String suggestText = option.getText().string(); System.out.println("補全的詞語:" + suggestText); if(!suggestList.contains(suggestText)){ suggestList.add(suggestText); } } } client.close(); return JSON.toJSONString(suggestList); } @Override public String searchAll(String index, int current, int size) throws IOException { client = new RestHighLevelClient( RestClient.builder(new HttpHost("localhost", 9200, "http"))); SearchRequest searchRequest = new SearchRequest(index); SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.query(QueryBuilders.matchAllQuery()); searchSourceBuilder.from(current); searchSourceBuilder.size(size); searchRequest.source(searchSourceBuilder); SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); System.out.println(JSON.toJSONString(searchResponse)); //處理返回結果 SearchHits hits = searchResponse.getHits(); client.close(); return JSON.toJSONString(hits); } @Override public String countQuery(String index) throws IOException { client = new RestHighLevelClient( RestClient.builder(new HttpHost("localhost", 9200, "http"))); CountRequest countRequest = new CountRequest(index); SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.query(QueryBuilders.matchAllQuery()); countRequest.source(searchSourceBuilder); SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); sourceBuilder.query(QueryBuilders.termQuery("user", "kimchy")); countRequest.source(sourceBuilder); CountResponse countResponse = client.count(countRequest, RequestOptions.DEFAULT); long count = countResponse.getCount(); return count + ""; }private List<Map<String, Object>> dealResult(SearchHits hits){ List<Map<String, Object>> result = new ArrayList<>(); for (SearchHit hit : hits.getHits()) { Map<String, Object> map = hit.getSourceAsMap(); result.add(map); } return result; } }
4.pom.xml文件
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-parent</artifactId> <version>2.1.3.RELEASE</version> <relativePath/> <!-- lookup parent from repository --> </parent> <groupId>com.demo</groupId> <artifactId>elasticsearch</artifactId> <version>0.0.1-SNAPSHOT</version> <name>elasticsearch</name> <description>Demo project for Spring Boot</description> <properties> <java.version>1.8</java.version> <poi.version>3.16</poi.version> <commonsio.version>2.4</commonsio.version> <icepdf>6.2.3</icepdf> </properties> <dependencies> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-test</artifactId> <scope>test</scope> </dependency> <!-- fastjson 版本1.2.28以前有遠程代碼漏洞,版本最好是該版本或之后的 --> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.31</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox --> <dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> <version>2.0.9</version> </dependency> <!-- https://mvnrepository.com/artifact/org.elasticsearch.client/elasticsearch-rest-high-level-client --> <dependency> <groupId>org.elasticsearch.client</groupId> <artifactId>elasticsearch-rest-high-level-client</artifactId> <version>6.6.1</version> </dependency> <!-- https://mvnrepository.com/artifact/org.elasticsearch/elasticsearch --> <dependency> <groupId>org.elasticsearch</groupId> <artifactId>elasticsearch</artifactId> <version>6.6.1</version> </dependency> <!-- commons --> <dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> <version>${commonsio.version}</version> </dependency> <dependency> <groupId>org.icepdf.os</groupId> <artifactId>icepdf-pro-intl</artifactId> <version>${icepdf}</version> </dependency> <!-- https://mvnrepository.com/artifact/net.sourceforge.jchardet/jchardet --> <dependency> <groupId>net.sourceforge.jchardet</groupId> <artifactId>jchardet</artifactId> <version>1.0</version> </dependency> <!-- https://mvnrepository.com/artifact/commons-beanutils/commons-beanutils --> <dependency> <groupId>commons-beanutils</groupId> <artifactId>commons-beanutils</artifactId> <version>1.9.2</version> </dependency> <!-- POI --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>${poi.version}</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>${poi.version}</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>${poi.version}</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>${poi.version}</version> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-maven-plugin</artifactId> </plugin> </plugins> </build> </project>
5.其他工具類
package com.demo.elasticsearch.util; import org.apache.commons.io.FileUtils; import org.apache.pdfbox.io.RandomAccessBuffer; import org.apache.pdfbox.pdfparser.PDFParser; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.poi.extractor.ExtractorFactory; import java.io.File; import java.io.FileInputStream; import java.io.IOException; /** * 文檔讀取工具,支持word,pdf,txt * 2017年9月15日 */ public class AttachmentReader { private static final String[] WORD= {"doc","docx","xls","xlsx","ppt","pptx"}; private static final String PDF ="pdf"; private static final String TXT="txt"; public static String reader(String path) { String text = ""; String type = path.substring(path.lastIndexOf(".")+1).toLowerCase(); try { if(TXT.equals(type)) { text= txtReader(path); } else if(PDF.equals(type)) { text = pdfReader(path); } else { for (int i = 0; i < WORD.length; i++) { if(WORD[i].equals(type)){ text = wordReader(path); } } } } catch (Exception e) { e.getMessage(); } return text; }
public static String wordReader(String path) { try { return ExtractorFactory.createExtractor(new File(path)).getText(); } catch (Exception e) { System.out.println(path); throw new RuntimeException(e); } } public static String txtReader(String path) { try { File file = new File(path); //文本編碼探測 FileCharsetDetector detector = new FileCharsetDetector(); String charset = detector.guessFileEncoding(file, 2); String str = FileUtils.readFileToString(file,charset); return str; } catch (Exception e) { throw new RuntimeException(e); } }
public static String pdfReader(String path) { String text = ""; FileInputStream is = null; PDDocument document = null; try { is = new FileInputStream(path); PDFParser parser = new PDFParser(new RandomAccessBuffer(is)); parser.parse(); document = parser.getPDDocument(); PDFTextStripper stripper = new PDFTextStripper(); text = stripper.getText(document); } catch (Exception e) { throw new RuntimeException(e); }finally { if(null!=is){ } try { is.close(); } catch (IOException e) { } } return text; } }
package com.demo.elasticsearch.util; import org.mozilla.intl.chardet.nsDetector; import org.mozilla.intl.chardet.nsICharsetDetectionObserver; import java.io.*; /** * 字符集探測 * 2017年9月22日 */ public class FileCharsetDetector { private boolean found = false; private String encoding = null; public String guessFileEncoding(File file) throws FileNotFoundException, IOException { return guessFileEncoding(file, new nsDetector()); }
public String guessFileEncoding(File file, int languageHint) throws FileNotFoundException, IOException { return guessFileEncoding(file, new nsDetector(languageHint)); }
private String guessFileEncoding(File file, nsDetector det) throws FileNotFoundException, IOException { // Set an observer... // The Notify() will be called when a matching charset is found. det.Init(new nsICharsetDetectionObserver() { public void Notify(String charset) { encoding = charset; found = true; } }); BufferedInputStream imp = new BufferedInputStream(new FileInputStream(file)); byte[] buf = new byte[1024]; int len; boolean done = false; boolean isAscii = false; while ((len = imp.read(buf, 0, buf.length)) != -1) { // Check if the stream is only ascii. isAscii = det.isAscii(buf, len); if (isAscii) { break; } // DoIt if non-ascii and not done yet. done = det.DoIt(buf, len, false); if (done) { break; } } imp.close(); det.DataEnd(); if (isAscii) { encoding = "ASCII"; found = true; } if (!found) { String[] prob = det.getProbableCharsets(); //這里將可能的字符集組合起來返回 for (int i = 0; i < prob.length; i++) { if (i == 0) { encoding = prob[i]; } else { encoding += "," + prob[i]; } } if (prob.length > 0) { // 在沒有發現情況下,也可以只取第一個可能的編碼,這里返回的是一個可能的序列 return encoding; } else { return null; } } return encoding; } }
6.springboot啟動類
package com.demo.elasticsearch; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; @SpringBootApplication public class ElasticsearchApplication { public static void main(String[] args) { SpringApplication.run(ElasticsearchApplication.class, args); } }
待完善。。。