1. 程式人生 > 其它 >Python 使用 Vaex 處理海量資料

Python 使用 Vaex 處理海量資料

Vaex :https://vaex.io/docs/examples.html

Examples — vaex 4.3.0 documentation

使用中的問題

#coding:utf-8


import python_utils
import vaex

from vaex import groupby,grids,utils,legacy,selections
import numpy as np
import pandas as pa
from pandas import Series,DataFrame

# df = vaex.open("C:\\Users\\Anchnet\\Desktop\\ttt\\aa.csv_chunk_0..hdf5")
# df_p = pa.read_csv("C:\\Users\\Anchnet\\Desktop\\ttt\\aa.csv") # print(df_p.count()) # # print(df_p) # print(pa.get_versions()) df = vaex.read_csv("C:\\Users\\Anchnet\\Desktop\\ttt\\aa.csv") # assert isinstance(df, vaex.groupby) # # print(df) # df_goup= df.sort # assert isinstance(df, vaex.groupby)
# print( df_goup) # print(df) # df # a= vgroup["企業名稱"] # df.groupby(df["k"]) df_a= df[df["e"] =="化纖針織內褲"] print(df.select(df["a"]=="義烏市智洋商品採購有限公司")) df_s= df.sort('e', ascending=False) # type: vaex.dataframe.DataFrameLocal # print(type(df)) print(df_s.count()) # assert isinstance(df, vaex.dataframe.DataFrameLocal)
# print(type(df)) print(df_a) # dv_group = df_s.groupby(df_s['e'], agg=vaex.agg.sum(df_s['i'])) dv_group = df.groupby(df['i'], agg=vaex.agg.count(df['i'])) print( dv_group)

print(type(df))

# type: vaex.dataframe.DataFrameLocal

可以程式碼補全啦!!!!!