[root@centos00 ~]$ cd hadoop-2.6.0-cdh5.14.2/
[root@centos00 hadoop-2.6.0-cdh5.14.2]$ sbin/hadoop-daemon.sh start namenode
[root@centos00 hadoop-2.6.0-cdh5.14.2]$ sbin/hadoop-daemon.sh start datanode
[root@centos00 hadoop-2.6.0-cdh5.14.2]$ sbin/yarn-daemon.sh start resourcemanager
[root@centos00 ~]$ cd /opt/cdh5.14.2/hive-1.1.0-cdh5.14.2/
[root@centos00 hive-1.1.0-cdh5.14.2]$ bin/hive --service metastore &
[root@centos00 ~]$ cd /opt/cdh5.14.2/spark-2.2.1-cdh5.14.2/
[root@centos00 spark-2.2.1-cdh5.14.2]$ sbin/start-master.sh
[root@centos00 spark-2.2.1-cdh5.14.2]$ sbin/start-slaves.sh
scala> spark.sql("create table mydemo(id int, name string, gender string)")
res0: org.apache.spark.sql.DataFrame = []
scala> spark.sql("insert into mydemo values(1, 'Jack', 'M'),(2, 'Judy', 'F')")
res1: org.apache.spark.sql.DataFrame = []
scala> spark.sql("select * from mydemo").show(false)
+---+----+------+
|id |name|gender|
+---+----+------+
|1 |Jack|M |
|2 |Judy|F |
+---+----+------+
scala> var ds = spark.table("mydemo")
ds: org.apache.spark.sql.DataFrame = [id: int, name: string ... 1 more field]
scala> ds = ds.withColumn("flag",when($"gender" === "M",true).otherwise(false))
ds: org.apache.spark.sql.DataFrame = [id: int, name: string ... 2 more fields]
scala> ds.show(false)
+---+----+------+-------+
|id |name|gender|flag|
+---+----+------+-------+
|1 |Jack|M |true |
|2 |Judy|F |false |
+---+----+------+-------+
scala> var df = Seq(
| (1, "regular"),
| (2, "Unknown")).toDF("id", "size")
df: org.apache.spark.sql.DataFrame = [id: int, size: string]
scala> df = df.withColumn("flag",when($"size".isin(Array[String]("regular"):_*),0D).otherwise($"size"))
df: org.apache.spark.sql.DataFrame = [id: int, size: string ... 1 more field]
scala> df.show(false)
+---+-------+-------+
|id |size |flag |
+---+-------+-------+
|1 |regular|0.0 |
|2 |Unknown|Unknown|
+---+-------+-------+