Spark连接MongoDB之Scala


MongoDB Connector for Spark
  Spark Connector Scala Guide

 

spark-shell --jars "mongo-spark-connector_2.11-2.0.0.jar,mongo-hadoop-core-2.0.2.jar,mongo-java-driver-3.4.2.jar"

 

import org.apache.spark.sql.SparkSession
import com.mongodb.spark._
import com.mongodb.spark.config._
import org.bson.Document

    val spark = SparkSession.builder()
      .master("local")
      .appName("MongoSparkConnector")
      .config("spark.some.config.option", "some-value")
      .getOrCreate()

val uri = "mongodb://172.1.1.1:27017"

    val userDF = spark.sql("""
select
       uid,
       name,
       current_date() version
  from test_table
  limit 100
      """).repartition(8)

// Write to MongoDB
    userDF.write.mode("overwrite").format("com.mongodb.spark.sql").options(
      Map(
        "uri" -> uri,
        "database" -> "test",
        "collection" -> "test_table")).save()

// Read From MongoDB
    val df = spark.read.format("com.mongodb.spark.sql").options(
      Map(
        "uri" -> uri,
        "database" -> "test",
        "collection" -> "test_table")).load()

// 其他方式
    userDF.write.mode("overwrite").format("com.mongodb.spark.sql").options(
      Map(
        "spark.mongodb.input.uri" -> uri,
        "spark.mongodb.output.uri" -> uri,
        "spark.mongodb.output.database" -> "test",
        "spark.mongodb.output.collection" -> "test_table")).save()

    MongoSpark.save(
      userDF.write.mode("overwrite").options(
        Map(
          "spark.mongodb.input.uri" -> uri,
          "spark.mongodb.output.uri" -> uri,
          "spark.mongodb.output.database" -> "test",
          "spark.mongodb.output.collection" -> "test_table")))

    MongoSpark.save(
      userDF.write.mode("overwrite").options(
        Map(
          "uri" -> uri,
          "database" -> "test",
          "collection" -> "test_table")))

    spark.stop()

 


免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM