利用Scala進行自定義排序的幾種方法


# 第一種方法

package day05

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object SortTest1 {

  def main(args: Array[String]): Unit = {

    val conf: SparkConf = new SparkConf().setAppName("SortTest1").setMaster("local[4]")
    val sc: SparkContext = new SparkContext(conf)

    val content = Array("laoduan 29 88","laoyang 30 100","laozhang 24 70","laoli 34 88")

    // parallelize構造RDD對象
    val contentRDD: RDD[String] = sc.parallelize(content)
    // 過濾
    val mapRDD: RDD[(String, Int, Int)] = contentRDD.map(line => {
      val fileds: Array[String] = line.split(" ")
      val name: String = fileds(0)
      val age: Int = fileds(1).toInt
      val faceValue: Int = fileds(2).toInt
      (name, age, faceValue)
    })
    // 排序
    val sorted: RDD[(String, Int, Int)] = mapRDD.sortBy(tp=>new MethodForSort(tp._3,tp._2))
    println(sorted.collect().toBuffer)
    sc.stop()
  }

}

// 自定義排序,sorted排序繼承Ordered,需要序列化
class MethodForSort(val fv:Int, val age:Int) extends Ordered[MethodForSort] with Serializable{
  override def compare(that: MethodForSort): Int = {
    // 如果fv相等,就比較年齡
    if(this.fv == that.fv){
      this.age - that.age
    }else{
      -(this.fv - that.fv)
    }
  }
}

# 第二種方法

package day05

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object SortTest2 {

  def main(args: Array[String]): Unit = {

    val conf: SparkConf = new SparkConf().setAppName("SortTest2").setMaster("local[4]")
    val sc: SparkContext = new SparkContext(conf)

    val content = Array("laoduan 29 88","laoyang 30 100","laozhang 24 70","laoli 34 88")

    // parallelize構造RDD對象
    val contentRDD: RDD[String] = sc.parallelize(content)
    // 過濾
    val mapRDD: RDD[MethodForSort2] = contentRDD.map(line => {
      val fileds: Array[String] = line.split(" ")
      val name: String = fileds(0)
      val age: Int = fileds(1).toInt
      val faceValue: Int = fileds(2).toInt
      // (name, age, faceValue)
      new MethodForSort2(name,age,faceValue)
    })

    // 將RRD里面裝的MethodForSort2類型的數據進行排序
    val sorted: RDD[MethodForSort2] = mapRDD.sortBy(tp=>tp)
    println(sorted.collect().toBuffer)
    sc.stop()
  }

}

// 自定義排序,sorted排序繼承Ordered,需要序列化
class MethodForSort2(val name:String,val age:Int, val fv:Int) extends Ordered[MethodForSort2] with Serializable{
  override def compare(that: MethodForSort2): Int = {
    // 如果fv相等,就比較年齡
    if(this.fv == that.fv){
      this.age - that.age
    }else{
      -(this.fv - that.fv)
    }
  }
  override def toString: String = s"name:$name,age:$age,faceValue:$fv"
}

# 第三種方法

package day05

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object SortTest3 {

  def main(args: Array[String]): Unit = {

    val conf: SparkConf = new SparkConf().setAppName("SortTest3").setMaster("local[4]")
    val sc: SparkContext = new SparkContext(conf)

    val content = Array("laoduan 29 88","laoyang 30 100","laozhang 24 70","laoli 34 88")

    // parallelize構造RDD對象
    val contentRDD: RDD[String] = sc.parallelize(content)
    // 過濾
    val mapRDD: RDD[(String, Int, Int)] = contentRDD.map(line => {
      val fileds: Array[String] = line.split(" ")
      val name: String = fileds(0)
      val age: Int = fileds(1).toInt
      val faceValue: Int = fileds(2).toInt
      (name, age, faceValue)
    })
    // 排序
    val sorted: RDD[(String, Int, Int)] = mapRDD.sortBy(tp=>MethodForSort3(tp._3,tp._2))
    println(sorted.collect().toBuffer)
    sc.stop()
  }

}

// 自定義排序,sorted排序繼承Ordered,需要序列化
// case class 是多例模式
case class MethodForSort3(fv:Int, age:Int) extends Ordered[MethodForSort3] with Serializable{
  override def compare(that: MethodForSort3): Int = {
    // 如果fv相等,就比較年齡
    if(this.fv == that.fv){
      this.age - that.age
    }else{
      -(this.fv - that.fv)
    }
  }
}

#第四種方法

package day05

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object SortTest4 {

  def main(args: Array[String]): Unit = {

    val conf: SparkConf = new SparkConf().setAppName("SortTest4").setMaster("local[4]")
    val sc: SparkContext = new SparkContext(conf)

    val content = Array("laoduan 29 88","laoyang 30 100","laozhang 24 70","laoli 34 88")

    // parallelize構造RDD對象
    val contentRDD: RDD[String] = sc.parallelize(content)
    // 過濾
    val mapRDD: RDD[(String, Int, Int)] = contentRDD.map(line => {
      val fileds: Array[String] = line.split(" ")
      val name: String = fileds(0)
      val age: Int = fileds(1).toInt
      val faceValue: Int = fileds(2).toInt
      (name, age, faceValue)
    })
    // 充分利用元組的比較規則,元組的比較規則:先比第一,相等再比第二個
    val sorted: RDD[(String, Int, Int)] = mapRDD.sortBy(tp=>(-tp._3,tp._2))
    println(sorted.collect().toBuffer)
    sc.stop()
  }

}

#第五種方法

package day05

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object SortTest5 {

  def main(args: Array[String]): Unit = {

    val conf: SparkConf = new SparkConf().setAppName("SortTest5").setMaster("local[4]")
    val sc: SparkContext = new SparkContext(conf)

    val content = Array("laoduan 29 88","laoyang 30 100","laozhang 24 70","laoli 34 88")

    // parallelize構造RDD對象
    val contentRDD: RDD[String] = sc.parallelize(content)
    // 過濾
    val mapRDD: RDD[(String, Int, Int)] = contentRDD.map(line => {
      val fileds: Array[String] = line.split(" ")
      val name: String = fileds(0)
      val age: Int = fileds(1).toInt
      val faceValue: Int = fileds(2).toInt
      (name, age, faceValue)
    })
    //充分利用元組的比較規則,元組的比較規則:先比第一,相等再比第二個
    //Ordering[(Int, Int)]最終比較的規則格式
    //on[(String, Int, Int)]未比較之前的數據格式
    //(t =>(-t._3, t._2))怎樣將規則轉換成想要比較的格式
    implicit val rules = Ordering[(Int,Int)].on[(String,Int,Int)](t=>(-t._3,t._2))
    val sorted: RDD[(String, Int, Int)] = mapRDD.sortBy(tp=>tp)
    println(sorted.collect().toBuffer)
    sc.stop()
  }

}

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM