注意:將mysql的驅動包拷貝到spark/lib下,將hive-site.xml拷貝到項目resources下,遠程調試不要使用主機名
import org.apache.spark._ import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.sql.hive.HiveContext import java.io.FileNotFoundException import java.io.IOException object HiveSelect { def main(args: Array[String]) { System.setProperty("hadoop.home.dir", "D:\\hadoop") //加載hadoop組件 val conf = new SparkConf().setAppName("HiveApp").setMaster("spark://192.168.66.66:7077") .set("spark.executor.memory", "1g") .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") .setJars(Seq("D:\\workspace\\scala\\out\\scala.jar"))//加載遠程spark //.set("hive.metastore.uris", "thrift://192.168.66.66:9083")//遠程hive的meterstore地址 // .set("spark.driver.extraClassPath","D:\\json\\mysql-connector-java-5.1.39.jar") val sparkcontext = new SparkContext(conf); try { val hiveContext = new HiveContext(sparkcontext); hiveContext.sql("use siat"); //使用數據庫 hiveContext.sql("DROP TABLE IF EXISTS src") //刪除表 hiveContext.sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING) " + "ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ");//創建表 hiveContext.sql("LOAD DATA LOCAL INPATH 'D:\\workspace\\scala\\src.txt' INTO TABLE src "); //導入數據 hiveContext.sql(" SELECT * FROM src").collect().foreach(println);//查詢數據 } catch { case e: FileNotFoundException => println("Missing file exception") case ex: IOException => println("IO Exception") case ee: ArithmeticException => println(ee) case eee: Throwable => println("found a unknown exception" + eee) case ef: NumberFormatException => println(ef) case ec: Exception => println(ec) case e: IllegalArgumentException => println("illegal arg. exception"); case e: IllegalStateException => println("illegal state exception"); } finally { sparkcontext.stop() } } }
附錄1:scala-spark api-http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.package
org.apache.spark org.apache.spark.api.java org.apache.spark.api.java.function org.apache.spark.broadcast org.apache.spark.graphx org.apache.spark.graphx.impl org.apache.spark.graphx.lib org.apache.spark.graphx.util org.apache.spark.input org.apache.spark.internal org.apache.spark.internal.io org.apache.spark.io org.apache.spark.launcher org.apache.spark.mapred org.apache.spark.metrics.source org.apache.spark.ml org.apache.spark.ml.attribute org.apache.spark.ml.classification org.apache.spark.ml.clustering org.apache.spark.ml.evaluation org.apache.spark.ml.feature org.apache.spark.ml.fpm org.apache.spark.ml.linalg org.apache.spark.ml.param org.apache.spark.ml.recommendation org.apache.spark.ml.regression org.apache.spark.ml.source.libsvm org.apache.spark.ml.stat org.apache.spark.ml.stat.distribution org.apache.spark.ml.tree org.apache.spark.ml.tuning org.apache.spark.ml.util org.apache.spark.mllib org.apache.spark.mllib.classification org.apache.spark.mllib.clustering org.apache.spark.mllib.evaluation org.apache.spark.mllib.feature org.apache.spark.mllib.fpm org.apache.spark.mllib.linalg org.apache.spark.mllib.linalg.distributed org.apache.spark.mllib.optimization org.apache.spark.mllib.pmml org.apache.spark.mllib.random org.apache.spark.mllib.rdd org.apache.spark.mllib.recommendation org.apache.spark.mllib.regression org.apache.spark.mllib.stat org.apache.spark.mllib.stat.distribution org.apache.spark.mllib.stat.test org.apache.spark.mllib.tree org.apache.spark.mllib.tree.configuration org.apache.spark.mllib.tree.impurity org.apache.spark.mllib.tree.loss org.apache.spark.mllib.tree.model org.apache.spark.mllib.util org.apache.spark.partial org.apache.spark.rdd org.apache.spark.scheduler org.apache.spark.scheduler.cluster org.apache.spark.security org.apache.spark.serializer org.apache.spark.sql org.apache.spark.sql.api.java org.apache.spark.sql.catalog org.apache.spark.sql.expressions org.apache.spark.sql.expressions.javalang org.apache.spark.sql.expressions.scalalang org.apache.spark.sql.hive org.apache.spark.sql.hive.execution org.apache.spark.sql.hive.orc org.apache.spark.sql.jdbc org.apache.spark.sql.sources org.apache.spark.sql.streaming org.apache.spark.sql.types org.apache.spark.sql.util org.apache.spark.status.api.v1 org.apache.spark.status.api.v1.streaming org.apache.spark.storage org.apache.spark.streaming org.apache.spark.streaming.api.java org.apache.spark.streaming.dstream org.apache.spark.streaming.flume org.apache.spark.streaming.kafka org.apache.spark.streaming.kinesis org.apache.spark.streaming.receiver org.apache.spark.streaming.scheduler org.apache.spark.streaming.scheduler.rate org.apache.spark.streaming.util org.apache.spark.ui.env org.apache.spark.ui.exec org.apache.spark.ui.jobs org.apache.spark.ui.storage org.apache.spark.util org.apache.spark.util.random org.apache.spark.util.sketch