import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.VoidFunction;
import java.util.Arrays;
import java.util.List;
/**
* filter 算子使用
* 通過函數篩選出需要的數據元素,返回true表示保留,返回false表示拋棄
*/
public class FilterOperator {
public static void main(String[] args){
SparkConf conf = new SparkConf().setMaster("local").setAppName("filter");
JavaSparkContext sc = new JavaSparkContext(conf);
List<Integer> names = Arrays.asList(12,56,5,14,44);
JavaRDD<Integer> nameRdd = sc.parallelize(names);
JavaRDD<Integer> result = nameRdd.filter(new Function<Integer, Boolean>() {
@Override
public Boolean call(Integer integer) throws Exception {
if(integer<10) {
return false;
}
return true;
}
});
result.foreach(new VoidFunction<Integer>() {
@Override
public void call(Integer integer) throws Exception {
System.err.println("filter算子:"+integer);
}
});
}
}
微信掃描下圖二維碼加入博主知識星球,獲取更多大數據、人工智能、算法等免費學習資料哦!