[root@centos00 ~]$ cd /opt/cdh5.14.2/hadoop-2.6.0-cdh5.14.2/
[root@centos00 hadoop-2.6.0-cdh5.14.2]$ sbin/hadoop-daemon.sh start namenode
[root@centos00 hadoop-2.6.0-cdh5.14.2]$ sbin/hadoop-daemon.sh start datanode
[root@centos00 hadoop-2.6.0-cdh5.14.2]$ bin/hdfs dfs -ls /input/dept
Found 3 items
-rw-r--r-- 1 root supergroup 0 2020-08-27 20:44 /input/dept/_SUCCESS
-rw-r--r-- 1 root supergroup 484 2020-08-27 20:44 /input/dept/part-00000-247a5279-306d-4cae-a85b-4d0196f39ebc-c000.snappy.parquet
-rw-r--r-- 1 root supergroup 472 2020-08-27 20:44 /input/dept/part-00001-247a5279-306d-4cae-a85b-4d0196f39ebc-c000.snappy.parquet
[root@centos00 ~]$ cd /opt/cdh5.14.2/hive-1.1.0-cdh5.14.2/
[root@centos00 hive-1.1.0-cdh5.14.2]$ bin/hive --service metastore &
[root@centos00 hadoop-2.6.0-cdh5.14.2]$ cd ../spark-2.2.1-cdh5.14.2/
[root@centos00 spark-2.2.1-cdh5.14.2]$ sbin/start-master.sh
[root@centos00 spark-2.2.1-cdh5.14.2]$ sbin/start-slaves.sh
[root@centos00 spark-2.2.1-cdh5.14.2]$ bin/spark-shell --master local[2]
/*
* 方法1
*/
scala> val p = Seq("/input/dept")
p: Seq[String] = List(/input/dept)
scala> val df = spark.read.load(p:_*)
df: org.apache.spark.sql.DataFrame = [value: string]
scala> df.show(false)
+-----------------------+
|value |
+-----------------------+
|10 ACCOUNTING NEW YORK|
|20 RESERACH DALLAS |
|30 SALES CHICAGO |
|40 OPREARIONS BOSTON |
+-----------------------+
scala> df.printSchema
root
|-- value: string (nullable = true)
/*
* 方法2
*/
scala> val df2 = spark.read.parquet("/input/dept")
df2: org.apache.spark.sql.DataFrame = [value: string]
scala> df2.show(false)
+----------------------+
|value |
+----------------------+
|10 ACCOUNTING NEW YORK|
|20 RESERACH DALLAS |
|30 SALES CHICAGO |
|40 OPREARIONS BOSTON |
+----------------------+
scala> df2.printSchema
root
|-- value: string (nullable = true)