[root@centos00 ~]$ cd hadoop-2.6.0-cdh5.14.2/ [root@centos00 hadoop-2.6.0-cdh5.14.2]$ sbin/hadoop-daemon.sh start namenode [root@centos00 hadoop-2.6.0-cdh5.14.2]$ sbin/hadoop-daemon.sh start datanode [root@centos00 hadoop-2.6.0-cdh5.14.2]$ sbin/yarn-daemon.sh start resourcemanager [root@centos00 ~]$ cd /opt/cdh5.14.2/hive-1.1.0-cdh5.14.2/ [root@centos00 hive-1.1.0-cdh5.14.2]$ bin/hive --service metastore & [root@centos00 ~]$ cd /opt/cdh5.14.2/spark-2.2.1-cdh5.14.2/ [root@centos00 spark-2.2.1-cdh5.14.2]$ sbin/start-master.sh [root@centos00 spark-2.2.1-cdh5.14.2]$ sbin/start-slaves.sh scala> spark.sql("create table mydemo(id int, name string, gender string)") res0: org.apache.spark.sql.DataFrame = [] scala> spark.sql("insert into mydemo values(1, 'Jack', 'M'),(2, 'Judy', 'F')") res1: org.apache.spark.sql.DataFrame = [] scala> spark.sql("select * from mydemo").show(false) +---+----+------+ |id |name|gender| +---+----+------+ |1 |Jack|M | |2 |Judy|F | +---+----+------+ scala> var ds = spark.table("mydemo") ds: org.apache.spark.sql.DataFrame = [id: int, name: string ... 1 more field] scala> ds = ds.withColumn("flag",when($"gender" === "M",true).otherwise(false)) ds: org.apache.spark.sql.DataFrame = [id: int, name: string ... 2 more fields] scala> ds.show(false) +---+----+------+-------+ |id |name|gender|flag| +---+----+------+-------+ |1 |Jack|M |true | |2 |Judy|F |false | +---+----+------+-------+ scala> var df = Seq( | (1, "regular"), | (2, "Unknown")).toDF("id", "size") df: org.apache.spark.sql.DataFrame = [id: int, size: string] scala> df = df.withColumn("flag",when($"size".isin(Array[String]("regular"):_*),0D).otherwise($"size")) df: org.apache.spark.sql.DataFrame = [id: int, size: string ... 1 more field] scala> df.show(false) +---+-------+-------+ |id |size |flag | +---+-------+-------+ |1 |regular|0.0 | |2 |Unknown|Unknown| +---+-------+-------+