記錄一次elasticsearch-php工作過程


 

 初始化

$hosts = array('192.168.30.41');
$this->client = \Elasticsearch\ClientBuilder::create()->setHosts($hosts)->build();

新建和設置index

    $params = [
            'index' => 'order',
            'body' => [
                'settings' => [
                    'max_result_window' => 10000000  #由於默認只能讀取前10000條數據,這里設置為100w,但是代價就是分頁越靠后,效率越低。也可以使用scan解決
                ],
                'mappings' => [
                    'goods' => [
                        '_source' => [
                            'enabled' => true
                        ],
                        'properties' => [
                            'product_code' => [
                                'type'=>'string',
                                'store'=>'yes',
                                'fielddata'=>true,
                                'fields'=>[
                                    'raw'=>[ #由於需要按照這個字段分組統計,且不能進行分詞,固這樣配置。統計時字段需要寫為 product_code.raw
                                        'type'=>'string',
                                        'index'=>'not_analyzed'
                                    ]
                                ]
                            ],
                            'order_id'=>[
                                'fielddata'=>true,
                                'type'=>'string'
                            ],
                            'price'=>[
                                'type'=>'double'
                            ],
                            'num'=>[
                                'type'=>'integer'
                            ],
                            'pay_time'=>[
                                'type'=>'date',
                                'format'=>'yyyy-MM-dd HH:mm:ss'
                            ],
                            'take_province'=>[
                                'type'=>'string',
                                'fielddata'=>true,
                                'store'=>'yes',
                                'fields'=>[
                                    'raw'=>[
                                        'type'=>'string',
                                        'index'=>'not_analyzed'
                                    ]
                                ]
                            ],
                            'buyer_nike'=>[
                                'type'=>'string',
                                'fielddata'=>true
                            ]
                        ]
                    ]
                ]
            ]
        ];
        $response = $this->client->indices()->create($params);

插入數據(這里引用了官方文檔的例子,大數據導入不使用insert,而使用更為效率的bulk)

$params = ['body' => []];

for ($i = 1; $i <= 1234567; $i++) {
    $params['body'][] = [
        'index' => [
            '_index' => 'my_index',
            '_type' => 'my_type',
            '_id' => $i
        ]
    ];

    $params['body'][] = [
        'my_field' => 'my_value',
        'second_field' => 'some more values'
    ];

    // Every 1000 documents stop and send the bulk request
    if ($i % 1000 == 0) {
        $responses = $client->bulk($params);

        // erase the old bulk request
        $params = ['body' => []];

        // unset the bulk response when you are done to save memory
        unset($responses);
    }
}

// Send the last batch if it exists
if (!empty($params['body'])) {
    $responses = $client->bulk($params);
}

相關查詢

1、查詢某商品某時間段內訂單數、售賣總數和總價格

#where product_code="xxx" and pay_time BETWEEN "2017-01-01 00:00:00" AND "2017-01-31 23:59:59" 
$params = [
    'index' => 'order',
    'type' => 'goods',
    'body' => [
        'size' => 1,
        'query' => [
            "bool"=>[
                "must"=>[
                    "term"=>["product_code.raw"=>$code] #上面解釋過了,這里采用不分詞的統計,使用字段.raw
                ],
                "filter"=>[
                    "range"=>[
                        "pay_time"=>[
                            "gte"=>$start_time,
                            "lte"=>$end_time
                        ]
                    ]
                ]
            ]
        ],
        'aggs' => [
            'sum_this_product'=>['sum'=>['field'=>"num"]], #售賣總數量,sum累加
            'total_price'=>['sum'=>['field'=>"price"]],   #總價格
            'distinct_orderid'=>['cardinality'=>['field'=>'order_id']] #去重訂單數
        ]
    ]
];
$response = $this->client->search($params);     

2、統計某時間段所有商品的訂單數、售賣總數和總價格

#where pay_time BETWEEN "2017-01-01 00:00:00" AND "2017-01-31 23:59:59" 
$params = [
    'index' => 'order',
    'type' => 'goods',
    'body' => [
        'size' => 0,
        'query' => [
            "bool"=>[
                "filter"=>[
                    "range"=>[
                        "pay_time"=>[
                            "gte"=>$start_time,
                            "lte"=>$end_time
                        ]
                    ]
                ]
            ]
        ],
        'aggs' => [ 
            'num'=>[
                'terms'=>[
                    'field'=>'product_code.raw',
                    'size'=>100,
                    'order'=>['sum_this_product'=>'desc'] #根據統計出來的售賣總數排序
                ],
                'aggs'=>[
                    'sum_this_product'=>['sum'=>['field'=>'num']],
                    'total_this_product'=>['sum'=>['field'=>'price']],
                    'distinct_orderid'=>['cardinality'=>['field'=>'order_id']]
                ]
            ]
        ]
    ]
];
$response = $this->client->search($params);

 

 

嘮叨:

1、這次使用的是docker環境,使用阿里鏡像:https://dev.aliyun.com/detail.html?spm=5176.1972343.2.21.F0KOV2&repoId=1209

2、官方文檔:https://www.elastic.co/guide/en/elasticsearch/client/php-api/current/index.html

3、本次工作數據量大約1500w,需要復雜的統計和展現,mysql已經不能滿足,故使用es。但是es不支持類似mysql:select in select這樣的子查詢,着實折騰了不少時間

4、感謝一位大神的博客:https://segmentfault.com/a/1190000004433446,這是個文章系列,很值得參考。


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM