Spark之join、leftOuterJoin、rightOuterJoin及fullOuterJoin
阿新 • • 發佈:2018-12-13
from pyspark import SparkConf, SparkContext
conf = SparkConf()
sc = SparkContext(conf=conf)
def func_join():
a = sc.parallelize([("name", "Alice"), ("age", 20), ("job", "student"), ("fav", "basket")])
b = sc.parallelize([("name", "Bob"), ("age", 22), ("address", "WuHan")])
print("join:{} ".format(a.join(b).collect()))
print("leftOuterJoin:{}".format(a.leftOuterJoin(b).collect()))
print("rightOuterJoin:{}".format(a.rightOuterJoin(b).collect()))
print("fullOuterJoin:{}".format(a.fullOuterJoin(b).collect()))
func_join()
sc.stop()
"""
result:
join:[('name', ('Alice', 'Bob ')), ('age', (20, 22))]
leftOuterJoin:[('fav', ('basket', None)), ('name', ('Alice', 'Bob')), ('job', ('student', None)), ('age', (20, 22))]
rightOuterJoin:[('name', ('Alice', 'Bob')), ('age', (20, 22)), ('address', (None, 'WuHan'))]
fullOuterJoin:[('fav', ('basket', None)), (' name', ('Alice', 'Bob')), ('job', ('student', None)), ('age', (20, 22)), ('address', (None, 'WuHan'))]