1. 數據庫結構
2. insert 測試
insert 的測試包括
1) 批量拼接values()插入
2) 有事務for循環插入
3) 無事務for循環插入
測試 SQL:
<!-- 普通 insert --> <insert id="insert" parameterType="com.qunar.mybatistask.bean.Post" keyProperty="id"> <![CDATA[ insert into post ( title, content, author, status, created ) values ( #{title}, #{content}, #{author}, #{status}, #{created} ) ]]> </insert> <!-- 拼接values insert --> <insert id="batchInsert" parameterType="java.util.List"> <![CDATA[ insert into post ( title, content, author, status, created ) values ]]> <foreach collection="list" item="post" separator=","> ( #{post.title}, #{post.content}, #{post.author}, #{post.status}, #{post.created} ) </foreach> </insert>
測試代碼:
service
/** * 批量拼接VALUES() insert * * @param postList * @return */ @Override @Transactional(propagation = Propagation.REQUIRED) public int batchInsert(List<Post> postList) { int singleNum = 1000; int affectedRows = 0; for (int i = 0; i < Math.ceil((double)(postList.size() / singleNum)); i++) { affectedRows += sqlSession.insert("post.batchInsert", postList.subList(i * singleNum, (i + 1) * singleNum)); } return affectedRows; } /** * 事務內循環insert * * @param postList * @return */ @Override @Transactional(propagation = Propagation.REQUIRED) public int createList(List<Post> postList) { int affectedRows = 0; for (Post post : postList) { affectedRows += sqlSession.insert("post.insert", post); } return affectedRows; }
test case:
/** * 批量插入效率測試 * * Method: batchInsert(List<Post> postList) * */ @Test public void testBatchInsert() throws Exception { List<Post> postList = Lists.newArrayList(); for (int i = 0; i < 10000; i++) { Post post = new Post(); post.setAuthor("test"); post.setContent("test"); post.setCreated(new Date()); post.setTitle("test"); post.setStatus(PostStatus.NORMAL); postList.add(post); } // 批量拼接SQL插入 long start = System.nanoTime(); int affectedRows = postService.batchInsert(postList); double duration = System.nanoTime() - start; System.out.format("batch: %.2f\n", duration / 1.0e9); System.out.println("affected rows: " + affectedRows); // 事務內循環插入 start = System.nanoTime(); affectedRows = postService.createList(postList); duration = System.nanoTime() - start; System.out.format("transaction: %.2f\n", duration / 1.0e9); System.out.println("affected rows: " + affectedRows); // 無事務直接循環插入 start = System.nanoTime(); affectedRows = 0; for (Post post : postList) affectedRows += postService.create(post); duration = System.nanoTime() - start; System.out.format("simple: %.2f\n", duration / 1.0e9); System.out.println("affected rows: " + affectedRows); }
結果
batch: 1.44
affected rows: 10000
transaction: 2.87
affected rows: 10000
simple: 77.57
affected rows: 10000
總結:
排行
1) 使用拼接的手段,這種插入其實就是batch,只不過這是手動batch
2) 使用事務循環插入,相對於無事務快很多的原因大概是數據庫連接和事務開啟的次數
3) 無事務循環插入, 我想應該沒人這么寫
2. 單表循環查詢與拼接in查詢測試
SQL
<select id="selectById" parameterType="int" resultType="com.qunar.mybatistask.bean.Post"> <![CDATA[ select id, title, content, author, status, created from post where id = #{id} ]]> </select> <!-- 拼接where in條件查詢 --> <select id="selectIn" parameterType="java.util.List" resultType="com.qunar.mybatistask.bean.Post"> <![CDATA[ select id, title, content, author, status, created from post where id in ]]> <foreach collection="list" item="id" open="(" close=")" separator=","> #{id} </foreach> </select>
Service
@Override public Post selectById(int id) { return sqlSession.selectOne("post.selectById", id); } @Override public List<Post> selectByIds(List<Integer> ids) { List<Post> postList = Lists.newArrayList(); int singleNum = 1000; int start; int end; for (int i = 0; i < Math.ceil(((double)ids.size() / (double)singleNum)); i++) { start = i * singleNum; end = (i + 1) * singleNum; end = end > ids.size() ? ids.size() : end; List<Post> result = sqlSession.selectList("post.selectIn", ids.subList(start, end)); postList.addAll(result); } return postList; }
test case
/** * 使用IN查詢效率測試 * * @throws Exception */ @Test public void testInSelect() throws Exception { List<Integer> ids = Lists.newArrayList(); for (int i = 1; i < 10000; i++) { ids.add(i); } // in 查詢 long start = System.nanoTime(); List<Post> list = postService.selectByIds(ids); double duration = System.nanoTime() - start; System.out.format("in select: %.2f\n", duration / 1.0e9); System.out.println("list size: " + list.size()); // 循環查詢 list = Lists.newArrayList(); start = System.nanoTime(); for (int id : ids) list.add(postService.selectById(id)); duration = System.nanoTime() - start; System.out.format("simple select: %.2f\n", duration / 1.0e9); System.out.println("list size: " + list.size()); }
結果
in select: 0.55
list size: 9999
simple select: 6.24
list size: 9999
總結:
我想應該沒人會用for循環去做查詢吧
3. 多表聯結查詢, join, form 2個table, in, exists 比較
SQL
<!-- 用於循環查詢 --> <select id="selectAll" resultType="com.qunar.mybatistask.bean.Comment"> <![CDATA[ select cmt.id as id, cmt.post_id as postId, cmt.content as content from cmt ]]> </select> <!-- join 查詢 --> <select id="selectJoin" resultType="com.qunar.mybatistask.bean.Comment"> <![CDATA[ select cmt.id as id, cmt.post_id as postId, cmt.content as content from cmt join post on post.id = cmt.post_id ]]> </select> <!-- from 2個table --> <select id="selectTowTable" resultType="com.qunar.mybatistask.bean.Comment"> <![CDATA[ select cmt.id as id, cmt.post_id as postId, cmt.content as content from cmt, post where cmt.post_id = post.id ]]> </select> <!-- in 聯表查詢 --> <select id="selectIn" resultType="com.qunar.mybatistask.bean.Comment"> <![CDATA[ select cmt.id as id, cmt.post_id as postId, cmt.content as content from cmt where cmt.post_id in ( select post.id from post ) ]]> </select> <!-- exists 聯表查詢 --> <select id="selectExists" resultType="com.qunar.mybatistask.bean.Comment"> <![CDATA[ select cmt.id as id, cmt.post_id as postId, cmt.content as content from cmt where exists ( select post.id from post where post.id = cmt.id ) ]]> </select>
service
@Override public List<Comment> selectTwoTable() { return sqlSession.selectList("comment.selectTowTable"); } @Override public List<Comment> selectJoin() { return sqlSession.selectList("comment.selectJoin"); } @Override public List<Comment> selectIn() { return sqlSession.selectList("comment.selectIn"); } @Override public List<Comment> selectExists() { return sqlSession.selectList("comment.selectExists"); } @Override public List<Comment> selectAll() { return sqlSession.selectList("comment.selectAll"); }
test case
/** * 測試JOIN查詢效率 * */ @Test public void testJoinSelect() { // join 查詢 long start = System.nanoTime(); List<Comment> list = commentService.selectJoin(); double duration = System.nanoTime() - start; System.out.format("join select: %.2f\n", duration / 1.0e9); System.out.println("list size: " + list.size()); // From 兩個表查詢 start = System.nanoTime(); list = commentService.selectTwoTable(); duration = System.nanoTime() - start; System.out.format("2 table select: %.2f\n", duration / 1.0e9); System.out.println("list size: " + list.size()); // in多表查詢 start = System.nanoTime(); list = commentService.selectIn(); duration = System.nanoTime() - start; System.out.format("in multi table select: %.2f\n", duration / 1.0e9); System.out.println("list size: " + list.size()); // exists多表查詢 start = System.nanoTime(); list = commentService.selectExists(); duration = System.nanoTime() - start; System.out.format("exists multi table select: %.2f\n", duration / 1.0e9); System.out.println("list size: " + list.size()); // 分次查詢, 太慢了, 忽略這種方法的測試吧 // start = System.nanoTime(); // list = commentService.selectAll(); // for (Comment comment : list) { // postService.selectById(comment.getPostId()); // } // duration = System.nanoTime() - start; // System.out.format("separate select: %.2f\n", duration / 1.0e9); // System.out.println("list size: " + list.size()); }
結果
join select: 2.44
list size: 210000
2 table select: 2.26
list size: 210000
in multi table select: 2.03
list size: 210000
exists multi table select: 2.35
list size: 210000
總結:
21W條數據下效率都差不多,而且我們一般會使用limit去限制查詢的條數,所以應該他們的效率差距應該很小,我通過觀察explain發現實際上join和from 2個table的方式的查詢的執行計划是一模一樣的,而in和exists的執行計划也是一模一樣的
這里的表結構相對簡單,也基本能用上索引 post_id 和 post.id 這些primary, 具體更加復雜的情況也許會影響這幾種查詢方式的執行計划, 才會體現出他們直接的差距, 當然我也相信他們執行的效率很大程度上是決定於mysql的優化器的優化策略,而這個優化策略很難人為的去判斷,所以也不好說