1. 程式人生 > >Spark之join、leftOuterJoin、rightOuterJoin及fullOuterJoin

Spark之join、leftOuterJoin、rightOuterJoin及fullOuterJoin

from pyspark import SparkConf, SparkContext conf = SparkConf() sc = SparkContext(conf=conf) def func_join(): a = sc.parallelize([("name", "Alice"), ("age", 20), ("job", "student"), ("fav", "basket")]) b = sc.parallelize([("name", "Bob"), ("age", 22), ("address", "WuHan")]) print("join:{}
".format(a.join(b).collect())) print("leftOuterJoin:{}".format(a.leftOuterJoin(b).collect())) print("rightOuterJoin:{}".format(a.rightOuterJoin(b).collect())) print("fullOuterJoin:{}".format(a.fullOuterJoin(b).collect())) func_join() sc.stop() """ result: join:[('name', ('Alice', 'Bob
')), ('age', (20, 22))] leftOuterJoin:[('fav', ('basket', None)), ('name', ('Alice', 'Bob')), ('job', ('student', None)), ('age', (20, 22))] rightOuterJoin:[('name', ('Alice', 'Bob')), ('age', (20, 22)), ('address', (None, 'WuHan'))] fullOuterJoin:[('fav', ('basket', None)), ('
name', ('Alice', 'Bob')), ('job', ('student', None)), ('age', (20, 22)), ('address', (None, 'WuHan'))]