第五章_Spark核心编程_Rdd_转换算子_keyValue型_join&leftOuterJoin&rightOuterJoin&fullOuterJoin
2022/3/27 9:22:35
本文主要是介绍第五章_Spark核心编程_Rdd_转换算子_keyValue型_join&leftOuterJoin&rightOuterJoin&fullOuterJoin,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!
1. join
/* * 1.定义 * def join[W](other: RDD[(K, W)]): RDD[(K, (V, W))] * def join[W](other: RDD[(K, W)], numPartitions: Int): RDD[(K, (V, W))] * 2.功能 * 将两个 类型为(K,V)和(K,W)的RDD 进行join,返回一个相同 key 对应的所有元素连接在一起的 (K,(V,W))的 RDD * */ object joinTest extends App { val sparkconf: SparkConf = new SparkConf().setMaster("local").setAppName("distinctTest") val sc: SparkContext = new SparkContext(sparkconf) val rdd1: RDD[(Int, String)] = sc.makeRDD(List((1, "刘备"), (2, "张飞"), (3, "关羽"), (4, "曹操"), (5, "赵云"), (7, "孙权")), 2) val rdd2: RDD[(Int, String)] = sc.makeRDD(List((1, "蜀国"), (2, "蜀国"), (3, "蜀国"), (4, "魏国"), (5, "蜀国"), (6, "吴国")), 3) private val joinRdd: RDD[(Int, (String, String))] = rdd1.join(rdd2) joinRdd.collect().foreach(println(_)) /*(3,(关羽,蜀国)) (4,(曹操,魏国)) (1,(刘备,蜀国)) (5,(赵云,蜀国)) (2,(张飞,蜀国)) */ sc.stop() }
2.leftOuterJoin
/* * 1.定义 * def leftOuterJoin[W](other: RDD[(K, W)]): RDD[(K, (V, Option[W]))] * def leftOuterJoin[W](other: RDD[(K, W)],numPartitions: Int): RDD[(K, (V, Option[W]))] * 2.功能 * 将两个 类型为(K,V)和(K,W)的RDD 进行leftouterjoin,返回一个相同 key 对应的所有元素连接在一起的 (K,(V,W))的 RDD * */ object leftOuterJoinTest extends App { val sparkconf: SparkConf = new SparkConf().setMaster("local").setAppName("distinctTest") val sc: SparkContext = new SparkContext(sparkconf) val rdd1: RDD[(Int, String)] = sc.makeRDD(List((1, "刘备"), (2, "张飞"), (3, "关羽"), (4, "曹操"), (5, "赵云"), (7, "孙权")), 2) val rdd2: RDD[(Int, String)] = sc.makeRDD(List((1, "蜀国"), (2, "蜀国"), (3, "蜀国"), (4, "魏国"), (5, "蜀国"), (6, "吴国")), 3) private val joinRdd: RDD[(Int, (String, Option[String]))] = rdd1.leftOuterJoin(rdd2) joinRdd.collect().foreach(println(_)) /*(3,(关羽,Some(蜀国))) (4,(曹操,Some(魏国))) (1,(刘备,Some(蜀国))) (7,(孙权,None)) (5,(赵云,Some(蜀国))) (2,(张飞,Some(蜀国))) */ sc.stop() }
3.rightOuterJoin
/* * 1.定义 * def rightOuterJoin[W](other: RDD[(K, W)]): RDD[(K, (Option[V], W))] * def rightOuterJoin[W](other: RDD[(K, W)],numPartitions: Int): RDD[(K, (Option[V], W))] * 2.功能 * 将两个 类型为(K,V)和(K,W)的RDD 进行leftouterjoin,返回一个相同 key 对应的所有元素连接在一起的 (K,(V,W))的 RDD * */ object rightOuterJoinTest extends App { val sparkconf: SparkConf = new SparkConf().setMaster("local").setAppName("distinctTest") val sc: SparkContext = new SparkContext(sparkconf) val rdd1: RDD[(Int, String)] = sc.makeRDD(List((1, "刘备"), (2, "张飞"), (3, "关羽"), (4, "曹操"), (5, "赵云"), (7, "孙权")), 2) val rdd2: RDD[(Int, String)] = sc.makeRDD(List((1, "蜀国"), (2, "蜀国"), (3, "蜀国"), (4, "魏国"), (5, "蜀国"), (6, "吴国")), 3) private val joinRdd: RDD[(Int, (Option[String], String))] = rdd1.rightOuterJoin(rdd2) joinRdd.collect().foreach(println(_)) /*(6,(None,吴国)) (3,(Some(关羽),蜀国)) (4,(Some(曹操),魏国)) (1,(Some(刘备),蜀国)) (5,(Some(赵云),蜀国)) (2,(Some(张飞),蜀国)) */ sc.stop() }
4.fullOuterJoin
/* * 1.定义 * def fullOuterJoin[W](other: RDD[(K, W)]): RDD[(K, (Option[V], Option[W]))] * def fullOuterJoin[W](other: RDD[(K, W)],numPartitions: Int): RDD[(K, (Option[V], Option[W]))] * 2.功能 * 将两个 类型为(K,V)和(K,W)的RDD 进行leftouterjoin,返回一个相同 key 对应的所有元素连接在一起的 (K,(V,W))的 RDD * */ object fullOuterJoinTest extends App { val sparkconf: SparkConf = new SparkConf().setMaster("local").setAppName("distinctTest") val sc: SparkContext = new SparkContext(sparkconf) val rdd1: RDD[(Int, String)] = sc.makeRDD(List((1, "刘备"), (2, "张飞"), (3, "关羽"), (4, "曹操"), (5, "赵云"), (7, "孙权")), 2) val rdd2: RDD[(Int, String)] = sc.makeRDD(List((1, "蜀国"), (2, "蜀国"), (3, "蜀国"), (4, "魏国"), (5, "蜀国"), (6, "吴国")), 3) private val joinRdd = rdd1.fullOuterJoin(rdd2) joinRdd.collect().foreach(println(_)) /*(6,(None,Some(吴国))) (3,(Some(关羽),Some(蜀国))) (4,(Some(曹操),Some(魏国))) (1,(Some(刘备),Some(蜀国))) (7,(Some(孙权),None)) (5,(Some(赵云),Some(蜀国))) (2,(Some(张飞),Some(蜀国))) */ sc.stop() }
这篇关于第五章_Spark核心编程_Rdd_转换算子_keyValue型_join&leftOuterJoin&rightOuterJoin&fullOuterJoin的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!
- 2024-07-02springboot项目无法注册到nacos-icode9专业技术文章分享
- 2024-06-26结对编程到底难不难?答案在这里
- 2024-06-19《2023版Java工程师》课程升级公告
- 2024-06-15matplotlib作图不显示3D图,怎么办?
- 2024-06-1503-Loki 日志监控
- 2024-06-1504-让LLM理解知识 -Prompt
- 2024-06-05做软件测试需要懂代码吗?
- 2024-06-0514-ShardingSphere的分布式主键实现
- 2024-06-03为什么以及如何要进行架构设计权衡?
- 2024-05-31全网首发第二弹!软考2024年5月《软件设计师》真题+解析+答案!(11-20题)