程序如下:
from pyspark import SparkConf, SparkContext conf = SparkConf().setAppName("My test App") sc = SparkContext(conf=conf) """ lines = sc.textFile("/tmp/tmp.txt") print lines.count() print lines.first() """ def load_model(sc, model_filename): rdd = sc.binaryFiles("/tmp/test.pkl") import pickle from io import BytesIO data_arr = rdd.values().map(lambda p: pickle.load(BytesIO(p))).collect() print(data_arr) load_model(sc, None)
其中,test.pkl:
a = {'xx': 999, 'hi': 1223}
>>> f2=open("test.pkl", "wb")
>>> pk.dump(a, f)
>>> f.close()
>>> f2=open("test.pkl", "rb")
>>> pk.load(f2)
{'xx': 999, 'hi': 1223}
>>>
上傳到hdfs:
dfs -put test.pkl /tmp/
然后pyspark里運行上述代碼:
結果輸出:
>>> load_model(sc, None)
[{'xx': 999, 'hi': 1223}]
>>> load_model(sc, None)
[{'xx': 999, 'hi': 1223}]