import org.apache.spark.{SparkConf, SparkContext} import scala.util.parsing.json.JSON /** * Created with IntelliJ IDEA. * User: @別慌 * Date: 2019-11-24 * Time: 18:04 * Description: */ object hello { def main(args: Array[String]): Unit = { // Create spark context val conf = new SparkConf() .setAppName("WordFreq_Spark") .setMaster("local") val sc = new SparkContext(conf) val do01= sc.textFile("D:\\雜七雜八\\瞎畫\\test.json") val do02=do01.map(JSON.parseFull) println(do02.collect().mkString("\t")) do02.foreach ( { case Some(do02) =>println(do02) case None => println("unvaild sign") case _ =>println("other sign") } ) sc.stop() // val rdd = sc.textFile("hdfs://192.168.199.120:9000/words.txt") // val wc=rdd.flatMap(_.split(" ")) // .map(s=>(s,1)) // .reduceByKey((a,b)=>a+b) // .sortBy(_._2,true) // // // //wc.saveAsTextFile("D:\\") // // for (arg <- wc.collect()) // print(arg+" ") // println() // wc.saveAsTextFile("hdfs://192.168.199.120:9000/tai") // sc.stop } }

<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.syllabus</groupId> <artifactId>chapter-3</artifactId> <version>1.0-SNAPSHOT</version> <!-- 額外指定可以通過如下鏈接下載Jar包依賴 --> <repositories> <repository> <id>1</id> <name>MAVEN-CENTRE</name> <url>http://central.maven.org/maven2/</url> </repository> </repositories> <!-- 添加相關依賴 --> <dependencies> <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-library</artifactId> <version>2.11.12</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core --> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.11</artifactId> <version>2.4.4</version> <!-- 排除Spark依賴中關於Hadoop和Scala的依賴,以便於添加自已的版本 --> <exclusions> <exclusion> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> </exclusion> <exclusion> <groupId>org.scala-lang</groupId> <artifactId>scala-library</artifactId> </exclusion> </exclusions> </dependency> <!-- 添加自己的Hadoop版本 --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.9.0</version> </dependency> </dependencies> <!-- 編譯Scala代碼的插件--> <build> <plugins> <plugin> <groupId>org.scala-tools</groupId> <artifactId>maven-scala-plugin</artifactId> <version>2.15.2</version> <executions> <execution> <id>scala-compile-first</id> <goals> <goal>compile</goal> </goals> <configuration> <includes> <include>**/*.scala</include> </includes> </configuration> </execution> <execution> <id>scala-test-compile</id> <goals> <goal>testCompile</goal> </goals> </execution> </executions> </plugin> </plugins> </build> </project>
提交到集群中的時候,shell 代碼為 ./bin/spark-submit --class hello --master spark://192.168.199.120:7077 --executor-memory 1G --total-executor-cores 3 comspark.jar ; 這里 spark端口為 7070 hdfs端口為 9000