Spark記錄-本地Spark讀取Hive數據簡單例子


注意:將mysql的驅動包拷貝到spark/lib下,將hive-site.xml拷貝到項目resources下,遠程調試不要使用主機名

import org.apache.spark._ import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.sql.hive.HiveContext import java.io.FileNotFoundException import java.io.IOException object HiveSelect { def main(args: Array[String]) { System.setProperty("hadoop.home.dir", "D:\\hadoop") //加載hadoop組件 val conf = new SparkConf().setAppName("HiveApp").setMaster("spark://192.168.66.66:7077") .set("spark.executor.memory", "1g") .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") .setJars(Seq("D:\\workspace\\scala\\out\\scala.jar"))//加載遠程spark //.set("hive.metastore.uris", "thrift://192.168.66.66:9083")//遠程hive的meterstore地址 // .set("spark.driver.extraClassPath","D:\\json\\mysql-connector-java-5.1.39.jar") val sparkcontext = new SparkContext(conf); try { val hiveContext = new HiveContext(sparkcontext); hiveContext.sql("use siat"); //使用數據庫 hiveContext.sql("DROP TABLE IF EXISTS src") //刪除表 hiveContext.sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING) " + "ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ");//創建表 hiveContext.sql("LOAD DATA LOCAL INPATH 'D:\\workspace\\scala\\src.txt' INTO TABLE src "); //導入數據 hiveContext.sql(" SELECT * FROM src").collect().foreach(println);//查詢數據 } catch { case e: FileNotFoundException => println("Missing file exception") case ex: IOException => println("IO Exception") case ee: ArithmeticException => println(ee) case eee: Throwable => println("found a unknown exception" + eee) case ef: NumberFormatException => println(ef) case ec: Exception => println(ec) case e: IllegalArgumentException => println("illegal arg. exception"); case e: IllegalStateException => println("illegal state exception"); } finally { sparkcontext.stop() } } }

 附錄1:scala-spark api-http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.package 

org.apache.spark

org.apache.spark.api.java

org.apache.spark.api.java.function

org.apache.spark.broadcast

org.apache.spark.graphx

org.apache.spark.graphx.impl

org.apache.spark.graphx.lib

org.apache.spark.graphx.util

org.apache.spark.input

org.apache.spark.internal

org.apache.spark.internal.io

org.apache.spark.io

org.apache.spark.launcher

org.apache.spark.mapred

org.apache.spark.metrics.source

org.apache.spark.ml

org.apache.spark.ml.attribute

org.apache.spark.ml.classification

org.apache.spark.ml.clustering

org.apache.spark.ml.evaluation

org.apache.spark.ml.feature

org.apache.spark.ml.fpm

org.apache.spark.ml.linalg

org.apache.spark.ml.param

org.apache.spark.ml.recommendation

org.apache.spark.ml.regression

org.apache.spark.ml.source.libsvm

org.apache.spark.ml.stat

org.apache.spark.ml.stat.distribution

org.apache.spark.ml.tree

org.apache.spark.ml.tuning

org.apache.spark.ml.util

org.apache.spark.mllib

org.apache.spark.mllib.classification

org.apache.spark.mllib.clustering

org.apache.spark.mllib.evaluation

org.apache.spark.mllib.feature

org.apache.spark.mllib.fpm

org.apache.spark.mllib.linalg

org.apache.spark.mllib.linalg.distributed

org.apache.spark.mllib.optimization

org.apache.spark.mllib.pmml

org.apache.spark.mllib.random

org.apache.spark.mllib.rdd

org.apache.spark.mllib.recommendation

org.apache.spark.mllib.regression

org.apache.spark.mllib.stat

org.apache.spark.mllib.stat.distribution

org.apache.spark.mllib.stat.test

org.apache.spark.mllib.tree

org.apache.spark.mllib.tree.configuration

org.apache.spark.mllib.tree.impurity

org.apache.spark.mllib.tree.loss

org.apache.spark.mllib.tree.model

org.apache.spark.mllib.util

org.apache.spark.partial

org.apache.spark.rdd

org.apache.spark.scheduler

org.apache.spark.scheduler.cluster

org.apache.spark.security

org.apache.spark.serializer

org.apache.spark.sql

org.apache.spark.sql.api.java

org.apache.spark.sql.catalog

org.apache.spark.sql.expressions

org.apache.spark.sql.expressions.javalang

org.apache.spark.sql.expressions.scalalang

org.apache.spark.sql.hive

org.apache.spark.sql.hive.execution

org.apache.spark.sql.hive.orc

org.apache.spark.sql.jdbc

org.apache.spark.sql.sources

org.apache.spark.sql.streaming

org.apache.spark.sql.types

org.apache.spark.sql.util

org.apache.spark.status.api.v1

org.apache.spark.status.api.v1.streaming

org.apache.spark.storage

org.apache.spark.streaming

org.apache.spark.streaming.api.java

org.apache.spark.streaming.dstream

org.apache.spark.streaming.flume

org.apache.spark.streaming.kafka

org.apache.spark.streaming.kinesis

org.apache.spark.streaming.receiver

org.apache.spark.streaming.scheduler

org.apache.spark.streaming.scheduler.rate

org.apache.spark.streaming.util

org.apache.spark.ui.env

org.apache.spark.ui.exec

org.apache.spark.ui.jobs

org.apache.spark.ui.storage

org.apache.spark.util

org.apache.spark.util.random

org.apache.spark.util.sketch

  


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM