直接正常读取json格式数据,然后某些下是嵌套的格式,直接使用 "列.属性" 就可以获取对应的值
【1】数据 文件 NestJsonFile 数据如下
{"name":"zhangsan","score":100,"infos":{"age":20,"gender":'man'}}
{"name":"lisi","score":70,"infos":{"age":21,"gender":'femal'}}
{"name":"wangwu","score":80,"infos":{"age":23,"gender":'man'}}
{"name":"maliu","score":50,"infos":{"age":16,"gender":'femal'}}
{"name":"tianqi","score":90,"infos":{"age":19,"gender":'man'}}
【2】scala代码实现
1 package com.it.baizhan.scalacode.sparksql.examples 2 3 import org.apache.spark.sql.SparkSession 4 5 /** 6 * 读取嵌套的json格式数据 7 * 直接正常读取json格式数据,然后某些下是嵌套的格式,直接使用"列.属性"就可以获取对应的值 8 */ 9 object ReadNestJsonFile { 10 def main(args: Array[String]): Unit = { 11 val session = SparkSession.builder().master("local").appName("test").getOrCreate() 12 val df = session.read.json("./data/NestJsonFile") 13 df.printSchema() 14 15 /** 16 * root 17 * |-- infos: struct (nullable = true) 18 * | |-- age: long (nullable = true) 19 * | |-- gender: string (nullable = true) 20 * |-- name: string (nullable = true) 21 * |-- score: long (nullable = true) 22 */ 23 24 df.createTempView("t") 25 session.sql( 26 """ 27 | select name,score,infos.gender,infos.age 28 | from t 29 """.stripMargin).show() 30 31 /** 32 * +--------+-----+------+---+ 33 * | name|score|gender|age| 34 * +--------+-----+------+---+ 35 * |zhangsan| 100| man| 20| 36 * | lisi| 70| femal| 21| 37 * | wangwu| 80| man| 23| 38 * | maliu| 50| femal| 16| 39 * | tianqi| 90| man| 19| 40 * +--------+-----+------+---+ 41 */ 42 43 import org.apache.spark.sql.functions._ 44 // df.select(col("name"),col("score"),col("infos.gender"),col("infos.age")) 45 // .show(100,false) 46 47 } 48 49 }