头歌(Educoder)实践教学平台——Hive综合应用案例


说明

  【适用平台】头歌(Educoder)实践教学平台

 

  【适用实训】

    一、Hive综合应用案例——学生成绩查询

    二、Hive综合应用案例——用户学历查询

    三、Hive综合应用案例——用户搜索日志分析

  【注意】

    看清楚上面说的实训是不是和你需要做的实训是同一个实训!!!

    如果是同一个实训,直接复制粘贴代码,然后点击测评即可。

  【发文时间】2021-05-26 16:03

  【更新时间】2022-04-12

  【更新内容】

    【新增】Hive综合应用案例——学生成绩查询——第1关:计算每个班的语文总成绩和数学总成绩

    【新增】Hive综合应用案例——学生成绩查询——第2关:查询选修了3门以上的课程的学生姓名

    【新增】Hive综合应用案例——用户搜索日志分析——第1关:2018年点击量最高的10个网站域名

    【新增】Hive综合应用案例——用户搜索日志分析——第2关:同一种搜索词,哪个网站域名被用户访问最多

    【新增】Hive综合应用案例——用户搜索日志分析——第3关:每月最火的搜索词

  【更新说明】

    以上【新增】内容未测试是否能通过!!!

    以上【新增】内容未测试是否能通过!!!

    以上【新增】内容未测试是否能通过!!!

正文

一、Hive综合应用案例 — 学生成绩查询

  • 第 1 关:计算每个班的语文总成绩和数学总成绩

---------- 禁止修改 ----------
    drop database if exists mydb cascade;
    set hive.auto.convert.join = false;
set hive.ignore.mapjoin.hint=false;
---------- 禁止修改 ----------

---------- begin ----------
---创建mydb数据库
create database if not exists mydb;
---使用mydb数据库
use mydb;

---创建表score
create table if not exists score(
name string comment '姓名',
chinese string comment '语文成绩',
maths string comment '数学成绩'
)
row format delimited fields terminated by ','
stored as textfile;
---导入数据:/root/data/step1_files/score.txt
load data local inpath '/root/data/step1_files/score.txt' into table score;

--创建表class
create table if not exists class(
stuname string comment '姓名',
classname string comment '所在班级'
)
row format delimited fields terminated by ','
stored as textfile;
---导入数据:/root/data/step1_files/class.txt
load data local inpath '/root/data/step1_files/class.txt' into table class;

--计算每个班的语文总成绩和数学总成绩,要求有哪科低于60分,该名学生成绩不计入计算。
select t1.classname,t1.chinese,t2.maths
from(
select c.classname classname,sum(s.chinese) chinese
from class c,score s
where c.stuname=s.name and s.chinese>=60
group by c.classname) as t1,(
select c.classname classname,sum(s.maths) maths
from class c,score s
where c.stuname=s.name and s.maths>=60
group by c.classname) as t2
where t1.classname=t2.classname;
---------- end ---------- 
  • 第 2 关:查询选修了3门以上的课程的学生姓名

---------- 禁止修改 ----------
 drop database if exists mydb cascade;
  set hive.auto.convert.join = false;
set hive.ignore.mapjoin.hint=false;
---------- 禁止修改 ----------

---------- begin ----------
---创建mydb数据库
create database if not exists mydb;

---使用mydb数据库
use mydb;

---创建表my_stu
create table if not exists my_stu(
id string comment '学生id',
name string comment '姓名',
sex string comment '性别',
age string comment '年龄',
col string comment '所选的系'
)
row format delimited fields terminated by ','
stored as textfile;
---导入数据:/root/data/step2_files/my_student.txt
load data local inpath '/root/data/step2_files/my_student.txt' into table my_stu;

--创建表my_score
create table if not exists my_score(
    id string comment '学生id',
    courseid string comment '课程id',
    score string comment '成绩'
)
row format delimited fields terminated by ','
stored as textfile;
---导入数据:/root/data/step2_files/my_score.txt
load data local inpath '/root/data/step2_files/my_score.txt' into table my_score;

--创建表my_course
create table if not exists my_course(
courseid string comment '课程id',
coursename string comment '课程名称'
)
row format delimited fields terminated by ','
stored as textfile;
---导入数据:/root/data/step2_files/my_course.txt
load data local inpath '/root/data/step2_files/my_course.txt' into table my_course; 

---查询选修了3门以上的课程的学生姓名。
select my_stu.name, new_table.c_s 
from my_stu
join
    (select id,count(courseid) as c_s from my_score group by my_score.id having count(courseid) >= 3) as new_table
on my_stu.id = new_table.id;

---------- end ----------
  • 第 3 关:课程选修人数

 1 ---------- 禁止修改 ----------
 2  drop database if exists mydb cascade;
 3   set hive.auto.convert.join = false;
 4 set hive.ignore.mapjoin.hint=false;
 5 ---------- 禁止修改 ----------
 6 
 7 
 8 ---------- begin ----------
 9 ---创建mydb数据库
10 create database if not exists mydb;
11 
12 
13 
14 ---使用mydb数据库
15 use mydb;
16 
17 
18 
19 ---创建表my_stu
20 create table if not exists my_stu(
21 id string comment '学生id',
22 name string comment '姓名',
23 sex string comment '性别',
24 age string comment '年龄',
25 col string comment '所选的系')
26 row format delimited fields terminated by ','
27 stored as textfile;
28 ---导入数据:/root/data/step2_files/my_student.txt
29 load data local inpath '/root/data/step2_files/my_student.txt' into table my_stu;
30 
31 
32 
33 --创建表my_score
34 create table if not exists my_score(
35 id string comment '学生id',
36 courseid string comment '课程id',
37 score string comment '成绩')
38 row format delimited fields terminated by ','
39 stored as textfile;
40 ---导入数据:/root/data/step2_files/my_score.txt
41 load data local inpath '/root/data/step2_files/my_score.txt' into table my_score;
42 
43 
44 
45 --创建表my_course
46 create table if not exists my_course(
47 courseid string comment '课程id',
48 coursename string comment '课程名称')
49 row format delimited fields terminated by ','
50 stored as textfile;
51 ---导入数据:/root/data/step2_files/my_course.txt
52 load data local inpath '/root/data/step2_files/my_course.txt' into table my_course;
53 
54 
55 
56 ---查询每个课程有多少人选修。
57 select t2.coursename, count(*)
58 from
59     (
60         select t1.name name, course.coursename coursename
61         from
62             (
63                 select stu.name name, score.courseid courseid
64                 from my_score score, my_stu stu
65                 where score.id = stu.id
66             ) as t1,
67             my_course course
68         where t1.courseid = course.courseid
69     )
70     as t2
71 group by t2.coursename;
72 ---------- end ----------
  • 第 4 关:shujuku课程的平均成绩

 1 ---------- 禁止修改 ----------
 2  drop database if exists mydb cascade;
 3   set hive.auto.convert.join = false;
 4 set hive.ignore.mapjoin.hint=false;
 5 ---------- 禁止修改 ----------
 6 
 7 
 8 ---------- begin ----------
 9 ---创建mydb数据库
10 create database if not exists mydb;
11 
12 
13 ---使用mydb数据库
14 use mydb;
15 
16 
17 ---创建表my_stu
18 create table if not exists my_stu(
19 id string comment '学生id',
20 name string comment '姓名',
21 sex string comment '性别',
22 age string comment '年龄',
23 col string comment '所选的系')
24 row format delimited fields terminated by ','
25 stored as textfile;
26 ---导入数据:/root/data/step2_files/my_student.txt
27 load data local inpath '/root/data/step2_files/my_student.txt' into table my_stu;
28 
29 
30 --创建表my_score
31 create table if not exists my_score(
32 id string comment '学生id',
33 courseid string comment '课程id',
34 score string comment '成绩')
35 row format delimited fields terminated by ','
36 stored as textfile;
37 ---导入数据:/root/data/step2_files/my_score.txt
38 load data local inpath '/root/data/step2_files/my_score.txt' into table my_score;
39 
40 
41 --创建表my_course
42 create table if not exists my_course(
43 courseid string comment '课程id',
44 coursename string comment '课程名称')
45 row format delimited fields terminated by ','
46 stored as textfile;
47 ---导入数据:/root/data/step2_files/my_course.txt
48 load data local inpath '/root/data/step2_files/my_course.txt' into table my_course; 
49 
50 
51 
52 ---计算shujuku课程的平均成绩
53 select t3.coursename, t2.avg_score
54 from
55     (
56         select t1.courseid courseid, avg(score.score) avg_score
57         from
58             (
59                 select courseid
60                 from my_course
61                 where my_course.coursename = 'shujuku'
62             ) as t1,
63             my_score score
64         where t1.courseid = score.courseid
65         group by t1.courseid
66     ) as t2,
67     my_course t3
68 where t2.courseid = t3.courseid;
69 
70 ---------- end ----------

二、Hive综合应用案例——用户学历查询

  • 第 1 关:查询每一个用户从出生到现在的总天数

 1 ---------- 禁止修改 ----------
 2  drop database if exists mydb cascade;
 3 ---------- 禁止修改 ----------
 4 
 5 
 6 ---------- begin ----------
 7 ---创建mydb数据库
 8 create database if not exists mydb;
 9 ---使用mydb数据库
10 use mydb;
11 
12 
13 
14 ---创建表user
15 create table usertab(
16     id string,
17     sex string,
18     time string,
19     education string,
20     occupation string,
21     income string,
22     area string,
23     desired_area string,
24     city_countryside string
25 )
26 row format delimited fields terminated by ','; 
27 
28 
29 
30 ---导入数据:/root/data.txt
31 load data local inpath '/root/data.txt' into table usertab;
32 
33 
34 
35 --查询每一个用户从出生到2019-06-10的总天数
36 select id, datediff('2019-06-10',regexp_replace(time, '/', '-')) from usertab;
  • 第 2 关:同一个地区相同的教育程度的最高收入

 1 ---------- 禁止修改 ----------
 2  drop database if exists mydb cascade;
 3 ---------- 禁止修改 ----------
 4 
 5 
 6 ---------- begin ----------
 7 
 8 
 9 
10 --创建mydb数据库
11 create database if not exists mydb;
12 
13 
14 
15 ---使用mydb数据库
16 use mydb;
17 
18 
19 
20 ---创建表user
21 create table usertab1(
22     id int,
23     sex string,
24     time string,
25     education string,
26     occupation string,
27     income string,
28     area string,
29     desired_area string,
30     city_countryside string
31 )
32 row format delimited fields terminated by ','; 
33 
34 
35 
36 ---导入数据:/root/data.txt
37 load data local inpath '/root/data1.txt' into table usertab1;
38 
39 
40 
41 --同一个地区相同的教育程度的最高收入
42 select area,education,income
43 from(
44     select area,education,income,
45     row_number() over(
46         partition by area, education order by income desc
47     ) as t1
48     from usertab1
49 ) as t2
50 where t2.t1 = 1;
51 
52 
53 
54 ---------- end ----------
  • 第 3 关:统计各级学历所占总人数百分比

 1 ---------- 禁止修改 ----------
 2 
 3  drop database if exists mydb cascade;
 4  set hive.mapred.mode=nonstrict;
 5 ---------- 禁止修改 ----------
 6 
 7 
 8 ---------- begin ----------
 9 
10 
11 
12 --创建mydb数据库
13 create database if not exists mydb;
14 
15 
16 
17 ---使用mydb数据库
18 use mydb;
19 
20 
21 
22 ---创建表user
23 create table usertab2(
24     id int,
25     sex string,
26     time string,
27     education string,
28     occupation string,
29     income string,
30     area string,
31     desired_area string,
32     city_countryside string
33 )
34 row format delimited fields terminated by ',';
35 
36 
37 
38 ---导入数据:/root/data.txt
39 load data local inpath '/root/data.txt' into table usertab2;
40 
41 
42 
43 --统计各级学历所占总人数百分比(对结果保留两位小数)
44 select concat(round(t1.cnted * 100 / t2.cnt, 2),'%'), t1.education
45 from
46     (
47         select count(*) as cnted,education
48         from usertab2
49         group by education
50     ) as t1,
51 
52     (
53         select count(*) as cnt from usertab2
54     ) as t2
55 order by t1.education;
56 
57 
58 
59 ---------- end ----------

三、Hive综合应用案例 — 用户搜索日志分析

  • 第1关:2018年点击量最高的10个网站域名

---------- 禁止修改 ----------
    drop database if exists mydb cascade;
---------- 禁止修改 ----------
---------- begin ----------
---创建mydb数据库
create database if not exists mydb;
---使用mydb数据库
use mydb;
---创建表db_search
create table if not exists db_search(
id string comment '用户id',
key string comment '搜索关键词',
ranking int comment 'url在返回结果中的排名',
or_der int comment '点击顺序',
url string comment '网站域名',
time string comment '日期'
)
row format delimited fields terminated by ' '
lines terminated by '\n'
stored as textfile;
---导入数据:/root/data.txt
load data local inpath '/root/data.txt' into table db_search;
--查询2018年点击量最多的10个网站域名
select url,count(*) cnt 
from db_search 
where year(time)='2018' 
group by url order by cnt desc limit 10;
---------- end ----------
  • 第2关:同一种搜索词,哪个网站域名被用户访问最多

---------- 禁止修改 ----------
    drop database if exists mydb cascade;
---------- 禁止修改 ----------
---------- begin ----------
---创建mydb数据库
create database if not exists mydb;
---使用mydb数据库
use mydb;
---创建表db_search
create table if not exists db_search(
id string comment '用户id',
key string comment '搜索关键词',
ranking int comment 'url在返回结果中的排名',
or_der int comment '点击顺序',
url string comment '网站域名',
time string comment '日期')
row format delimited fields terminated by ' '
lines terminated by '\n'
stored as textfile;
---导入数据:/root/data.txt
load data local inpath '/root/data.txt' into table db_search;
--分析同一种搜索词,哪个网站域名被用户访问最多,并根据访问次数降序取前十。
select t.key,t.url,t.cnt 
from(
select key,url,count(*) cnt,row_number() over (partition by key order by count(*) desc) rk from db_search group by key,url) t
where t.rk<=1 order by t.cnt desc limit 10;
---------- end ----------
  • 第3关:每月最火的搜索词

---------- 禁止修改 ----------
    drop database if exists mydb cascade;
---------- 禁止修改 ----------
---------- begin ----------
---创建mydb数据库
create database if not exists mydb;
---使用mydb数据库
use mydb;
---创建表db_search
create table if not exists db_search(
id string comment '用户id',
key string comment '搜索关键词',
ranking int comment 'url在返回结果中的排名',
or_der int comment '点击顺序',
url string comment '网站域名',
time string comment '日期')
row format delimited fields terminated by ' '
lines terminated by '\n'
stored as textfile;
---导入数据:/root/data.txt
load data local inpath '/root/data.txt' into table db_search;
--分析每年每月哪个搜索词被搜索次数最多。
select concat(t.y,'-',t.m),t.key,t.cnt
from(
select year(time) y,month(time) m,key,count(*) cnt,row_number() over (partition by year(time),month(time) order by count(*) desc) rk
from db_search group by year(time),month(time),key) t
where t.rk<=1;
---------- end ----------

 

 

参考:

[1]:Hive综合应用案例——学生成绩查询

[2]:Hive综合应用案例——用户学历查询

 

 


免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM