初始化
$hosts = array('192.168.30.41'); $this->client = \Elasticsearch\ClientBuilder::create()->setHosts($hosts)->build();
新建和設置index
$params = [ 'index' => 'order', 'body' => [ 'settings' => [ 'max_result_window' => 10000000 #由於默認只能讀取前10000條數據,這里設置為100w,但是代價就是分頁越靠后,效率越低。也可以使用scan解決 ], 'mappings' => [ 'goods' => [ '_source' => [ 'enabled' => true ], 'properties' => [ 'product_code' => [ 'type'=>'string', 'store'=>'yes', 'fielddata'=>true, 'fields'=>[ 'raw'=>[ #由於需要按照這個字段分組統計,且不能進行分詞,固這樣配置。統計時字段需要寫為 product_code.raw 'type'=>'string', 'index'=>'not_analyzed' ] ] ], 'order_id'=>[ 'fielddata'=>true, 'type'=>'string' ], 'price'=>[ 'type'=>'double' ], 'num'=>[ 'type'=>'integer' ], 'pay_time'=>[ 'type'=>'date', 'format'=>'yyyy-MM-dd HH:mm:ss' ], 'take_province'=>[ 'type'=>'string', 'fielddata'=>true, 'store'=>'yes', 'fields'=>[ 'raw'=>[ 'type'=>'string', 'index'=>'not_analyzed' ] ] ], 'buyer_nike'=>[ 'type'=>'string', 'fielddata'=>true ] ] ] ] ] ]; $response = $this->client->indices()->create($params);
插入數據(這里引用了官方文檔的例子,大數據導入不使用insert,而使用更為效率的bulk)
$params = ['body' => []]; for ($i = 1; $i <= 1234567; $i++) { $params['body'][] = [ 'index' => [ '_index' => 'my_index', '_type' => 'my_type', '_id' => $i ] ]; $params['body'][] = [ 'my_field' => 'my_value', 'second_field' => 'some more values' ]; // Every 1000 documents stop and send the bulk request if ($i % 1000 == 0) { $responses = $client->bulk($params); // erase the old bulk request $params = ['body' => []]; // unset the bulk response when you are done to save memory unset($responses); } } // Send the last batch if it exists if (!empty($params['body'])) { $responses = $client->bulk($params); }
相關查詢
1、查詢某商品某時間段內訂單數、售賣總數和總價格
#where product_code="xxx" and pay_time BETWEEN "2017-01-01 00:00:00" AND "2017-01-31 23:59:59" $params = [ 'index' => 'order', 'type' => 'goods', 'body' => [ 'size' => 1, 'query' => [ "bool"=>[ "must"=>[ "term"=>["product_code.raw"=>$code] #上面解釋過了,這里采用不分詞的統計,使用字段.raw ], "filter"=>[ "range"=>[ "pay_time"=>[ "gte"=>$start_time, "lte"=>$end_time ] ] ] ] ], 'aggs' => [ 'sum_this_product'=>['sum'=>['field'=>"num"]], #售賣總數量,sum累加 'total_price'=>['sum'=>['field'=>"price"]], #總價格 'distinct_orderid'=>['cardinality'=>['field'=>'order_id']] #去重訂單數 ] ] ]; $response = $this->client->search($params);
2、統計某時間段所有商品的訂單數、售賣總數和總價格
#where pay_time BETWEEN "2017-01-01 00:00:00" AND "2017-01-31 23:59:59" $params = [ 'index' => 'order', 'type' => 'goods', 'body' => [ 'size' => 0, 'query' => [ "bool"=>[ "filter"=>[ "range"=>[ "pay_time"=>[ "gte"=>$start_time, "lte"=>$end_time ] ] ] ] ], 'aggs' => [ 'num'=>[ 'terms'=>[ 'field'=>'product_code.raw', 'size'=>100, 'order'=>['sum_this_product'=>'desc'] #根據統計出來的售賣總數排序 ], 'aggs'=>[ 'sum_this_product'=>['sum'=>['field'=>'num']], 'total_this_product'=>['sum'=>['field'=>'price']], 'distinct_orderid'=>['cardinality'=>['field'=>'order_id']] ] ] ] ] ]; $response = $this->client->search($params);
嘮叨:
1、這次使用的是docker環境,使用阿里鏡像:https://dev.aliyun.com/detail.html?spm=5176.1972343.2.21.F0KOV2&repoId=1209
2、官方文檔:https://www.elastic.co/guide/en/elasticsearch/client/php-api/current/index.html
3、本次工作數據量大約1500w,需要復雜的統計和展現,mysql已經不能滿足,故使用es。但是es不支持類似mysql:select in select這樣的子查詢,着實折騰了不少時間
4、感謝一位大神的博客:https://segmentfault.com/a/1190000004433446,這是個文章系列,很值得參考。