頭歌(Educoder)實踐教學平台——Hive綜合應用案例


說明

  【適用平台】頭歌(Educoder)實踐教學平台

 

  【適用實訓】

    一、Hive綜合應用案例——學生成績查詢

    二、Hive綜合應用案例——用戶學歷查詢

    三、Hive綜合應用案例——用戶搜索日志分析

  【注意】

    看清楚上面說的實訓是不是和你需要做的實訓是同一個實訓!!!

    如果是同一個實訓,直接復制粘貼代碼,然后點擊測評即可。

  【發文時間】2021-05-26 16:03

  【更新時間】2022-04-12

  【更新內容】

    【新增】Hive綜合應用案例——學生成績查詢——第1關:計算每個班的語文總成績和數學總成績

    【新增】Hive綜合應用案例——學生成績查詢——第2關:查詢選修了3門以上的課程的學生姓名

    【新增】Hive綜合應用案例——用戶搜索日志分析——第1關:2018年點擊量最高的10個網站域名

    【新增】Hive綜合應用案例——用戶搜索日志分析——第2關:同一種搜索詞,哪個網站域名被用戶訪問最多

    【新增】Hive綜合應用案例——用戶搜索日志分析——第3關:每月最火的搜索詞

  【更新說明】

    以上【新增】內容未測試是否能通過!!!

    以上【新增】內容未測試是否能通過!!!

    以上【新增】內容未測試是否能通過!!!

正文

一、Hive綜合應用案例 — 學生成績查詢

  • 第 1 關:計算每個班的語文總成績和數學總成績

---------- 禁止修改 ----------
    drop database if exists mydb cascade;
    set hive.auto.convert.join = false;
set hive.ignore.mapjoin.hint=false;
---------- 禁止修改 ----------

---------- begin ----------
---創建mydb數據庫
create database if not exists mydb;
---使用mydb數據庫
use mydb;

---創建表score
create table if not exists score(
name string comment '姓名',
chinese string comment '語文成績',
maths string comment '數學成績'
)
row format delimited fields terminated by ','
stored as textfile;
---導入數據:/root/data/step1_files/score.txt
load data local inpath '/root/data/step1_files/score.txt' into table score;

--創建表class
create table if not exists class(
stuname string comment '姓名',
classname string comment '所在班級'
)
row format delimited fields terminated by ','
stored as textfile;
---導入數據:/root/data/step1_files/class.txt
load data local inpath '/root/data/step1_files/class.txt' into table class;

--計算每個班的語文總成績和數學總成績,要求有哪科低於60分,該名學生成績不計入計算。
select t1.classname,t1.chinese,t2.maths
from(
select c.classname classname,sum(s.chinese) chinese
from class c,score s
where c.stuname=s.name and s.chinese>=60
group by c.classname) as t1,(
select c.classname classname,sum(s.maths) maths
from class c,score s
where c.stuname=s.name and s.maths>=60
group by c.classname) as t2
where t1.classname=t2.classname;
---------- end ---------- 
  • 第 2 關:查詢選修了3門以上的課程的學生姓名

---------- 禁止修改 ----------
 drop database if exists mydb cascade;
  set hive.auto.convert.join = false;
set hive.ignore.mapjoin.hint=false;
---------- 禁止修改 ----------

---------- begin ----------
---創建mydb數據庫
create database if not exists mydb;

---使用mydb數據庫
use mydb;

---創建表my_stu
create table if not exists my_stu(
id string comment '學生id',
name string comment '姓名',
sex string comment '性別',
age string comment '年齡',
col string comment '所選的系'
)
row format delimited fields terminated by ','
stored as textfile;
---導入數據:/root/data/step2_files/my_student.txt
load data local inpath '/root/data/step2_files/my_student.txt' into table my_stu;

--創建表my_score
create table if not exists my_score(
    id string comment '學生id',
    courseid string comment '課程id',
    score string comment '成績'
)
row format delimited fields terminated by ','
stored as textfile;
---導入數據:/root/data/step2_files/my_score.txt
load data local inpath '/root/data/step2_files/my_score.txt' into table my_score;

--創建表my_course
create table if not exists my_course(
courseid string comment '課程id',
coursename string comment '課程名稱'
)
row format delimited fields terminated by ','
stored as textfile;
---導入數據:/root/data/step2_files/my_course.txt
load data local inpath '/root/data/step2_files/my_course.txt' into table my_course; 

---查詢選修了3門以上的課程的學生姓名。
select my_stu.name, new_table.c_s 
from my_stu
join
    (select id,count(courseid) as c_s from my_score group by my_score.id having count(courseid) >= 3) as new_table
on my_stu.id = new_table.id;

---------- end ----------
  • 第 3 關:課程選修人數

 1 ---------- 禁止修改 ----------
 2  drop database if exists mydb cascade;
 3   set hive.auto.convert.join = false;
 4 set hive.ignore.mapjoin.hint=false;
 5 ---------- 禁止修改 ----------
 6 
 7 
 8 ---------- begin ----------
 9 ---創建mydb數據庫
10 create database if not exists mydb;
11 
12 
13 
14 ---使用mydb數據庫
15 use mydb;
16 
17 
18 
19 ---創建表my_stu
20 create table if not exists my_stu(
21 id string comment '學生id',
22 name string comment '姓名',
23 sex string comment '性別',
24 age string comment '年齡',
25 col string comment '所選的系')
26 row format delimited fields terminated by ','
27 stored as textfile;
28 ---導入數據:/root/data/step2_files/my_student.txt
29 load data local inpath '/root/data/step2_files/my_student.txt' into table my_stu;
30 
31 
32 
33 --創建表my_score
34 create table if not exists my_score(
35 id string comment '學生id',
36 courseid string comment '課程id',
37 score string comment '成績')
38 row format delimited fields terminated by ','
39 stored as textfile;
40 ---導入數據:/root/data/step2_files/my_score.txt
41 load data local inpath '/root/data/step2_files/my_score.txt' into table my_score;
42 
43 
44 
45 --創建表my_course
46 create table if not exists my_course(
47 courseid string comment '課程id',
48 coursename string comment '課程名稱')
49 row format delimited fields terminated by ','
50 stored as textfile;
51 ---導入數據:/root/data/step2_files/my_course.txt
52 load data local inpath '/root/data/step2_files/my_course.txt' into table my_course;
53 
54 
55 
56 ---查詢每個課程有多少人選修。
57 select t2.coursename, count(*)
58 from
59     (
60         select t1.name name, course.coursename coursename
61         from
62             (
63                 select stu.name name, score.courseid courseid
64                 from my_score score, my_stu stu
65                 where score.id = stu.id
66             ) as t1,
67             my_course course
68         where t1.courseid = course.courseid
69     )
70     as t2
71 group by t2.coursename;
72 ---------- end ----------
  • 第 4 關:shujuku課程的平均成績

 1 ---------- 禁止修改 ----------
 2  drop database if exists mydb cascade;
 3   set hive.auto.convert.join = false;
 4 set hive.ignore.mapjoin.hint=false;
 5 ---------- 禁止修改 ----------
 6 
 7 
 8 ---------- begin ----------
 9 ---創建mydb數據庫
10 create database if not exists mydb;
11 
12 
13 ---使用mydb數據庫
14 use mydb;
15 
16 
17 ---創建表my_stu
18 create table if not exists my_stu(
19 id string comment '學生id',
20 name string comment '姓名',
21 sex string comment '性別',
22 age string comment '年齡',
23 col string comment '所選的系')
24 row format delimited fields terminated by ','
25 stored as textfile;
26 ---導入數據:/root/data/step2_files/my_student.txt
27 load data local inpath '/root/data/step2_files/my_student.txt' into table my_stu;
28 
29 
30 --創建表my_score
31 create table if not exists my_score(
32 id string comment '學生id',
33 courseid string comment '課程id',
34 score string comment '成績')
35 row format delimited fields terminated by ','
36 stored as textfile;
37 ---導入數據:/root/data/step2_files/my_score.txt
38 load data local inpath '/root/data/step2_files/my_score.txt' into table my_score;
39 
40 
41 --創建表my_course
42 create table if not exists my_course(
43 courseid string comment '課程id',
44 coursename string comment '課程名稱')
45 row format delimited fields terminated by ','
46 stored as textfile;
47 ---導入數據:/root/data/step2_files/my_course.txt
48 load data local inpath '/root/data/step2_files/my_course.txt' into table my_course; 
49 
50 
51 
52 ---計算shujuku課程的平均成績
53 select t3.coursename, t2.avg_score
54 from
55     (
56         select t1.courseid courseid, avg(score.score) avg_score
57         from
58             (
59                 select courseid
60                 from my_course
61                 where my_course.coursename = 'shujuku'
62             ) as t1,
63             my_score score
64         where t1.courseid = score.courseid
65         group by t1.courseid
66     ) as t2,
67     my_course t3
68 where t2.courseid = t3.courseid;
69 
70 ---------- end ----------

二、Hive綜合應用案例——用戶學歷查詢

  • 第 1 關:查詢每一個用戶從出生到現在的總天數

 1 ---------- 禁止修改 ----------
 2  drop database if exists mydb cascade;
 3 ---------- 禁止修改 ----------
 4 
 5 
 6 ---------- begin ----------
 7 ---創建mydb數據庫
 8 create database if not exists mydb;
 9 ---使用mydb數據庫
10 use mydb;
11 
12 
13 
14 ---創建表user
15 create table usertab(
16     id string,
17     sex string,
18     time string,
19     education string,
20     occupation string,
21     income string,
22     area string,
23     desired_area string,
24     city_countryside string
25 )
26 row format delimited fields terminated by ','; 
27 
28 
29 
30 ---導入數據:/root/data.txt
31 load data local inpath '/root/data.txt' into table usertab;
32 
33 
34 
35 --查詢每一個用戶從出生到2019-06-10的總天數
36 select id, datediff('2019-06-10',regexp_replace(time, '/', '-')) from usertab;
  • 第 2 關:同一個地區相同的教育程度的最高收入

 1 ---------- 禁止修改 ----------
 2  drop database if exists mydb cascade;
 3 ---------- 禁止修改 ----------
 4 
 5 
 6 ---------- begin ----------
 7 
 8 
 9 
10 --創建mydb數據庫
11 create database if not exists mydb;
12 
13 
14 
15 ---使用mydb數據庫
16 use mydb;
17 
18 
19 
20 ---創建表user
21 create table usertab1(
22     id int,
23     sex string,
24     time string,
25     education string,
26     occupation string,
27     income string,
28     area string,
29     desired_area string,
30     city_countryside string
31 )
32 row format delimited fields terminated by ','; 
33 
34 
35 
36 ---導入數據:/root/data.txt
37 load data local inpath '/root/data1.txt' into table usertab1;
38 
39 
40 
41 --同一個地區相同的教育程度的最高收入
42 select area,education,income
43 from(
44     select area,education,income,
45     row_number() over(
46         partition by area, education order by income desc
47     ) as t1
48     from usertab1
49 ) as t2
50 where t2.t1 = 1;
51 
52 
53 
54 ---------- end ----------
  • 第 3 關:統計各級學歷所占總人數百分比

 1 ---------- 禁止修改 ----------
 2 
 3  drop database if exists mydb cascade;
 4  set hive.mapred.mode=nonstrict;
 5 ---------- 禁止修改 ----------
 6 
 7 
 8 ---------- begin ----------
 9 
10 
11 
12 --創建mydb數據庫
13 create database if not exists mydb;
14 
15 
16 
17 ---使用mydb數據庫
18 use mydb;
19 
20 
21 
22 ---創建表user
23 create table usertab2(
24     id int,
25     sex string,
26     time string,
27     education string,
28     occupation string,
29     income string,
30     area string,
31     desired_area string,
32     city_countryside string
33 )
34 row format delimited fields terminated by ',';
35 
36 
37 
38 ---導入數據:/root/data.txt
39 load data local inpath '/root/data.txt' into table usertab2;
40 
41 
42 
43 --統計各級學歷所占總人數百分比(對結果保留兩位小數)
44 select concat(round(t1.cnted * 100 / t2.cnt, 2),'%'), t1.education
45 from
46     (
47         select count(*) as cnted,education
48         from usertab2
49         group by education
50     ) as t1,
51 
52     (
53         select count(*) as cnt from usertab2
54     ) as t2
55 order by t1.education;
56 
57 
58 
59 ---------- end ----------

三、Hive綜合應用案例 — 用戶搜索日志分析

  • 第1關:2018年點擊量最高的10個網站域名

---------- 禁止修改 ----------
    drop database if exists mydb cascade;
---------- 禁止修改 ----------
---------- begin ----------
---創建mydb數據庫
create database if not exists mydb;
---使用mydb數據庫
use mydb;
---創建表db_search
create table if not exists db_search(
id string comment '用戶id',
key string comment '搜索關鍵詞',
ranking int comment 'url在返回結果中的排名',
or_der int comment '點擊順序',
url string comment '網站域名',
time string comment '日期'
)
row format delimited fields terminated by ' '
lines terminated by '\n'
stored as textfile;
---導入數據:/root/data.txt
load data local inpath '/root/data.txt' into table db_search;
--查詢2018年點擊量最多的10個網站域名
select url,count(*) cnt 
from db_search 
where year(time)='2018' 
group by url order by cnt desc limit 10;
---------- end ----------
  • 第2關:同一種搜索詞,哪個網站域名被用戶訪問最多

---------- 禁止修改 ----------
    drop database if exists mydb cascade;
---------- 禁止修改 ----------
---------- begin ----------
---創建mydb數據庫
create database if not exists mydb;
---使用mydb數據庫
use mydb;
---創建表db_search
create table if not exists db_search(
id string comment '用戶id',
key string comment '搜索關鍵詞',
ranking int comment 'url在返回結果中的排名',
or_der int comment '點擊順序',
url string comment '網站域名',
time string comment '日期')
row format delimited fields terminated by ' '
lines terminated by '\n'
stored as textfile;
---導入數據:/root/data.txt
load data local inpath '/root/data.txt' into table db_search;
--分析同一種搜索詞,哪個網站域名被用戶訪問最多,並根據訪問次數降序取前十。
select t.key,t.url,t.cnt 
from(
select key,url,count(*) cnt,row_number() over (partition by key order by count(*) desc) rk from db_search group by key,url) t
where t.rk<=1 order by t.cnt desc limit 10;
---------- end ----------
  • 第3關:每月最火的搜索詞

---------- 禁止修改 ----------
    drop database if exists mydb cascade;
---------- 禁止修改 ----------
---------- begin ----------
---創建mydb數據庫
create database if not exists mydb;
---使用mydb數據庫
use mydb;
---創建表db_search
create table if not exists db_search(
id string comment '用戶id',
key string comment '搜索關鍵詞',
ranking int comment 'url在返回結果中的排名',
or_der int comment '點擊順序',
url string comment '網站域名',
time string comment '日期')
row format delimited fields terminated by ' '
lines terminated by '\n'
stored as textfile;
---導入數據:/root/data.txt
load data local inpath '/root/data.txt' into table db_search;
--分析每年每月哪個搜索詞被搜索次數最多。
select concat(t.y,'-',t.m),t.key,t.cnt
from(
select year(time) y,month(time) m,key,count(*) cnt,row_number() over (partition by year(time),month(time) order by count(*) desc) rk
from db_search group by year(time),month(time),key) t
where t.rk<=1;
---------- end ----------

 

 

參考:

[1]:Hive綜合應用案例——學生成績查詢

[2]:Hive綜合應用案例——用戶學歷查詢

 

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM