Hbase之緩存掃描加快讀取速度

本文轉載自查看原文 2016-08-23 18:24 2235 Hbase/ 大數據

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.client.metrics.ScanMetrics;

import java.io.IOException;

/**
 * Created by similarface on 16/8/23.
 */
public class ScanDataUseCache {
    private static Table table=null;
    public static Table getTable() {
        if(table==null){
            try {
                Configuration configuration = HBaseConfiguration.create();
                Connection connection = ConnectionFactory.createConnection(configuration);
                //建立表的連接
                return connection.getTable(TableName.valueOf("testtable"));
            }catch (IOException e){
                return table;
            }
        }
        return table;
    }
    private static void scan(int caching,int batch,boolean small) {
        int count=0;
        //setCaching 設置的值為每次rpc的請求記錄數，默認是1；cache大可以優化性能，但是太大了會花費很長的時間進行一次傳輸。
        //setBatch 設置每次取的column size；有些row特別大，所以需要分開傳給client，就是一次傳一個row的幾個column。
        //setSmall 是否為小掃描
        //setScanMetricsEnabled 使用了集合
        Scan scan = new Scan().setCaching(caching).setBatch(batch).setSmall(small).setScanMetricsEnabled(true);
        ResultScanner scanner=null;
        try {
            scanner = getTable().getScanner(scan);
        }catch (IOException e){
            System.out.println(e);
        }
        if (scanner!=null){
            for (Result result:scanner){
                count++;
            }
        scanner.close();
        ScanMetrics metrics = scan.getScanMetrics();
        System.out.println("Caching: " + caching + ", Batch: " + batch + ", Small: " + small + ", Results: " + count + ", RPCs: " + metrics.countOfRPCcalls);
        }
        else {
            System.out.println("Error");
        }
    }

    public static void main(String[] args) throws IOException {
        // Caching: 1, Batch: 1, Small: false, Results: 9, RPCs: 12
        scan(1, 1, false);

        //Caching: 1, Batch: 0, Small: false, Results: 4, RPCs: 7
        scan(1, 0, false);

        // Caching: 1, Batch: 0, Small: true, Results: 4, RPCs: 0
        scan(1, 0, true);

        //Caching: 200, Batch: 1, Small: false, Results: 9, RPCs: 3
        scan(200, 1, false);

        //Caching: 200, Batch: 0, Small: false, Results: 4, RPCs: 3
        scan(200, 0, false);

        //Caching: 200, Batch: 0, Small: true, Results: 4, RPCs: 0
        scan(200, 0, true);

        // Caching: 2000, Batch: 100, Small: false, Results: 4, RPCs: 3
        scan(2000, 100, false);

        // Caching: 2, Batch: 100, Small: false, Results: 4, RPCs: 5
        scan(2, 100, false);

        // Caching: 2, Batch: 10, Small: false, Results: 4, RPCs: 5
        scan(2, 10, false);

        // Caching: 2, Batch: 10, Small: false, Results: 4, RPCs: 5
        scan(5, 100, false);

        // Caching: 5, Batch: 100, Small: false, Results: 4, RPCs: 3
        scan(5, 20, false);

        // Caching: 10, Batch: 10, Small: false, Results: 4, RPCs: 3
        scan(10, 10, false);
    }
}

/**
 Caching: 1, Batch: 0, Small: false, Results: 5, RPCs: 8
 Caching: 1, Batch: 0, Small: true, Results: 5, RPCs: 0
 Caching: 200, Batch: 1, Small: false, Results: 1009, RPCs: 8
 Caching: 200, Batch: 0, Small: false, Results: 5, RPCs: 3
 Caching: 200, Batch: 0, Small: true, Results: 5, RPCs: 0
 Caching: 2000, Batch: 100, Small: false, Results: 14, RPCs: 3
 Caching: 2, Batch: 100, Small: false, Results: 14, RPCs: 10
 Caching: 2, Batch: 10, Small: false, Results: 104, RPCs: 55
 Caching: 5, Batch: 100, Small: false, Results: 14, RPCs: 5
 Caching: 5, Batch: 20, Small: false, Results: 54, RPCs: 13
 Caching: 10, Batch: 10, Small: false, Results: 104, RPCs: 13
 **/

這是一個9行數據的表

每行包含一些列

使用緩存為6 批量為3的掃描器

需要3個RPC

3個列裝入一個Result實例

6個result到緩存中組成一個RPC

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.client.metrics.ScanMetrics;

import java.io.IOException;

/**
 * Created by similarface on 16/8/24.
 */
public class ScanWithOffsetAndLimit {
    private static Table table = null;

    public static Table getTable() {
        if (table == null) {
            try {
                Configuration configuration = HBaseConfiguration.create();
                Connection connection = ConnectionFactory.createConnection(configuration);
                //建立表的連接
                return connection.getTable(TableName.valueOf("testtable"));
            } catch (IOException e) {
                return table;
            }
        }
        return table;
    }

    /**
     * 遍歷訪問數據
     * @param num 運行次序
     * @param caching
     * @param batch
     * @param offset
     * @param maxResults
     * @param maxResultSize
     * @param dump
     * @throws IOException
     */
    private static void scan(int num, int caching, int batch, int offset, int maxResults, int maxResultSize, boolean dump
    ) throws IOException {
        int count = 0;
        Scan scan = new Scan().setCaching(caching).setBatch(batch)
                .setRowOffsetPerColumnFamily(offset)
                .setMaxResultsPerColumnFamily(maxResults)
                .setMaxResultSize(maxResultSize)
                .setScanMetricsEnabled(true);
        ResultScanner scanner = getTable().getScanner(scan);
        System.out.println("Scan #" + num + " running...");
        for (Result result : scanner) {
            count++;
            if (dump)
                System.out.println("Result [" + count + "]:" + result);
        }
        scanner.close();
        ScanMetrics metrics = scan.getScanMetrics();
        System.out.println("Caching: " + caching + ", Batch: " + batch +
                ", Offset: " + offset + ", maxResults: " + maxResults +
                ", maxSize: " + maxResultSize + ", Results: " + count +
                ", RPCs: " + metrics.countOfRPCcalls);
    }

    public static void main(String[] args) throws IOException {
        //偏移為0 最大2個cell 所以會掃描到列1 和列2
        scan(1, 11, 0, 0, 2, -1, true);
        //偏移為4 最大2個cell 所以會掃描到列5 和列6
        scan(2, 11, 0, 4, 2, -1, true);
        //
        scan(3, 5, 0, 0, 2, -1, false);
        scan(4, 11, 2, 0, 5, -1, true);
        scan(5, 11, -1, -1, -1, 1, false);
        scan(6, 11, -1, -1, -1, 10000, false);
    }
}

/**
 Caching: 11, Batch: 0, Offset: 0, maxResults: 2, maxSize: -1, Results: 5005, RPCs: 458
 Caching: 11, Batch: 0, Offset: 4, maxResults: 2, maxSize: -1, Results: 1, RPCs: 3
 Caching: 5, Batch: 0, Offset: 0, maxResults: 2, maxSize: -1, Results: 5005, RPCs: 1004
 Caching: 11, Batch: 2, Offset: 0, maxResults: 5, maxSize: -1, Results: 5009, RPCs: 458
 Caching: 11, Batch: -1, Offset: -1, maxResults: -1, maxSize: 1, Results: 5005, RPCs: 11012
 Caching: 11, Batch: -1, Offset: -1, maxResults: -1, maxSize: 10000, Results: 5005, RPCs: 469
**/

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 為何Hbase的讀取速度很快如何配置Memcached高速緩存，加快wordpress的速度 webstorm取消掃描某文件夾，加快打開速度，例如忽略node_modules目錄加快訪問外網的速度 qt加快編譯速度加快訪問GitHub的速度加快GitHub訪問速度加快pytorch訓練速度加快AS的Gradle Build速度加快ssh連接速度