import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;
import java.util.Arrays;
import java.util.List;
/**
* reduceByKey(fun,[numTasks]) 算子:
* 根據key將value聚合,然后根據fun進行計算
* 可以設置並行度
* reduceByKey = groupByKey+reduce
*/
public class ReduceByKeyOperator {
public static void main(String[] args){
SparkConf conf = new SparkConf().setMaster("local").setAppName("reduceByKey");
JavaSparkContext sc = new JavaSparkContext(conf);
List<Tuple2<String,Integer>> list = Arrays.asList(
new Tuple2<String,Integer>("w1",1),
new Tuple2<String,Integer>("w2",2),
new Tuple2<String,Integer>("w3",3),
new Tuple2<String,Integer>("w2",22),
new Tuple2<String,Integer>("w1",11)
);
JavaPairRDD<String,Integer> pairRdd = sc.parallelizePairs(list);
JavaPairRDD<String,Integer> result = pairRdd.reduceByKey(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer integer, Integer integer2) throws Exception {
return integer+integer2;
}
},2);
result.foreach(new VoidFunction<Tuple2<String, Integer>>() {
@Override
public void call(Tuple2<String, Integer> stringIntegerTuple2) throws Exception {
System.err.println(stringIntegerTuple2._1+":"+stringIntegerTuple2._2);
}
});
}
}
微信掃描下圖二維碼加入博主知識星球,獲取更多大數據、人工智能、算法等免費學習資料哦!
