Python 使用 Vaex 處理海量數據


Vaex :https://vaex.io/docs/examples.html

Examples — vaex 4.3.0 documentation

 

使用中的問題

#coding:utf-8


import python_utils
import vaex

from vaex import groupby,grids,utils,legacy,selections
import numpy as np
import pandas as pa
from pandas import Series,DataFrame

# df = vaex.open("C:\\Users\\Anchnet\\Desktop\\ttt\\aa.csv_chunk_0..hdf5")
# df_p = pa.read_csv("C:\\Users\\Anchnet\\Desktop\\ttt\\aa.csv")
# print(df_p.count())
#
# print(df_p)
# print(pa.get_versions())

df = vaex.read_csv("C:\\Users\\Anchnet\\Desktop\\ttt\\aa.csv")
# assert isinstance(df, vaex.groupby)
# # print(df)
# df_goup=  df.sort

# assert isinstance(df, vaex.groupby)
# print( df_goup)

# print(df)
# df
# a= vgroup["企業名稱"]  #  df.groupby(df["k"])
df_a= df[df["e"] =="化纖針織內褲"]

print(df.select(df["a"]=="義烏市智洋商品采購有限公司"))

df_s= df.sort('e', ascending=False)  # type: vaex.dataframe.DataFrameLocal
# print(type(df))
print(df_s.count())
# assert isinstance(df, vaex.dataframe.DataFrameLocal)
# print(type(df))

print(df_a)
# dv_group = df_s.groupby(df_s['e'], agg=vaex.agg.sum(df_s['i']))
dv_group = df.groupby(df['i'], agg=vaex.agg.count(df['i']))
print( dv_group)

print(type(df))

# type: vaex.dataframe.DataFrameLocal

可以代碼補全啦!!!!!


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM