hive常考SQL題

本文轉載自查看原文 2020-05-16 14:29 1178 hive/ 面試

sql執行順序：from -> on -> join -> where -> group by -> 聚集函數 -> having -> having -> select ->distinct -> union -> order by -> limit

（1）分組求最大值

　　1、單表分組最大

//在每一個州中找出最大面積的國家，列出洲份 continent, 國家名字 name 及面積 area
SELECT continent, name, area 
FROM world x
WHERE area >= ALL(    //子查詢相當於查找每一個州的所有area =>笛卡爾乘積，X里的每一個area都會與y的area比較(前提是continent相等)
    SELECT area FROM world y 
　 WHERE y.continent=x.continent
　 AND area>0
)

//有些國家的人口是同洲份的所有其他國的3倍或以上。列出 國家名字name 和 洲份 continent。
SELECT name,continent FROM world x
WHERE x.population / 3 >= ALL(//子查詢相當於找到每州中所有符合的population
    SELECT population FROM world y 
　 WHERE y.continent = x.continent AND population >0 AND y.name != x.name
)

　　2、兩表分組求最大

//每個部門工資最高的員工
方法1
SELECT Department.name AS 'Department',Employee.name AS 'Employee',Salary
FROM Employee JOIN Department ON Employee.DepartmentId = Department.Id
WHERE (Employee.DepartmentId , Salary) IN (
    SELECT 
    DepartmentId, MAX(Salary)
    FROM Employee GROUP BY DepartmentId
)；

方法2
select Department.Name Department,e1.Name Employee,e1.Salary Salary
from Employee e1 join Department on DepartmentId=Department.Id
where Salary >=ALL(
    select Salary
    from Employee e2
    where e1.DepartmentId=e2.DepartmentId
)；

　　3、分組求topN

SQL寫法
SELECT d.Name AS 'Department', e1.Name AS 'Employee', e1.Salary
FROM Employee e1 JOIN Department d ON e1.DepartmentId = d.Id
WHERE 3 > (
    SELECT COUNT(DISTINCT e2.Salary)
    FROM Employee e2
    WHERE e1.Salary < e2.Salary AND e1.DepartmentId = e2.DepartmentId
);

取分組前3的代碼
SELECT e1.Salary
FROM Employee AS e1
WHERE 3 >(
    SELECT count(DISTINCT e2.Salary)
    FROM Employee AS e2
    WHERE e1.Salary <e2.Salary AND e2.DepartmentId = e1.DepartmentId
);

HQL寫法
select name,score
from(
    select name,score,row_number() over(partition by name order by score desc) rank
    from stu_score
)tmp_table
where tmp_table.rank<=3;

（2）利用row_number()和over()分組求topN

select name,score,rank
from (
    select name,score,row_number() over(partition by name order by score desc) rank
    from stu_score
)t
where rank<=3

解析：利用排序函數row_number()和開窗函數over()，其中排序函數row_number()是順序排序，dense_rank()是連續排序，rank()函數是跳躍排序。

（3）利用row_number()和over()求用戶連續登錄最長天數

select uid,max(continuous_days)
from(
    select uid,date_sub(date1-rank),count(1) as continuous_days
    from(
        select uid,date1,row_number() over(partition by uid order by date1) rank
        from user_login
    )
    group by uid,date_sub(date1-rank)
)
group by uid;

解析：其中date_sub(2019-12-31,1)=2019-12-30

（4）求留存率

　　1、先求每天的活躍用戶數，記為A表

select dayNo,count(distinct(uid)) num1
from user_login
group by dayNo

　　2、每天留存數，記為B表

select u.dayNo,count(distinct(s.uid)) num2
from userInfo u1 left join userInfo u2 on u1.uid=u2.uid;
where datediff(u2.dayNo,u1.dayNo)=1
group by u.dayNo

　　3、留層率

select t1.date,t2.num2/t1.num2
from (
    select date,count(distinct(user_id)) num1
    from login 
    group by date
) t1 join (
    select u1.date,count(distinct(u2.user_id)) num2
    from login u1 left join login u2 on u1.user_id=u2.user_id;
    where datediff(u2.date,u1.date)=1
    group by u1.date
) t2 on t1.date=t2.date
union
select date,0.000 as p
from login
where date not in(
　　select t2.date
　　from t2
)

解析：利用A表和B表即可求出哪天的留存率。datediff(2019-12-30,2019-12-31)=1，若求3天留存，則將datediff(2019-12-30,2019-12-31)=3

（5）case when與group by

　　1、利用case when構造多個字段

FName    FCourse    FScore              FName    語文    數學    英語
張三         語文          95.0           張三     95      0       0
張三         數學          80.0           張三     0       80.0    0
張三         英語          70.0    =>     張三     0       0       70
李四         語文          88.0           李四     88.0    0       0           
李四         數學          90.0           李四     0       87.0    0
李四         英語          70.0           李四     0       0       86.0

答案：
FName    語文    數學    英語
張三         95      80      70  
李四         88      87      86
select 
 FName ,
 max(case FCourse when '語文' then FScore else 0 end) as '語文',
 max(case FCourse when '數學' then FScore else 0 end) as '數學',
 max(case FCourse when '英語' then FScore else 0 end) as '英語'
from score
group by FName

　　2、利用case when構造單個字段

解法一：
select FName,
 case 
 when FCourse='數學' then '理科'  else '文科' end as '科別', sum(FScore) as '總分'
from score
group by FName,case when FCourse='數學' then '理科' else '文科' end 
order by FName

解法二
select FName, FType as '科別', sum(FScore) as '總分' 
from (
 select FName, FCourse, FScore, FType = case when FCourse in ('語文','英語') then '文科' else '理科' end 
 from score
) as a
group by FName,FType
order by FName

（6）登錄表：t_login: user_id,login_time,login_ip，支付表：t_pay: user_id,pay_time,pay_money

　　1、用1個SQL統計出登錄總次數、登錄總人數、登錄但未支付的總人數

select count(1),count(distinct a.user_id),
count(distinct case when b.user_id is null then a.user_id end)
from t_login as a left join t_pay as b on a.user_id = b.user_id
或者
select,count1,count2,count(distinct(user_id))
from t_login t3 join (
    select t1.user_id,t2.pay_time,count(login_ip) count1,
    count(distinct(t1.user_id)) count2
    from t_login t1 left join t_pay t2 on t1.user_id=t2.user_id
)t on t.user_id=t3.user_id
where pay_time=null;

　　2、查詢出每個用戶最近的一條登錄數據 user_id，login_time，login_ip

select user_id,login_time,login_ip
(select user_id,login_time,login_ip,
row_number over(partition by user_id order by login_time desc) new_time
from t_login) t
where new_time=1;

　　3、按總支付金額范圍分組統計支付的總人數、支付的平均金額

select
case
when pay_money >= 1 and pay_money <= 100 then '1~100'
when pay_money >= 101 and pay_money <= 1000 then '101~1000'
when pay_money >= 1001 then '1001+' end pay_limits,
count(distinct(user_id)),
avg(pay_money)
from
t_pay
group by
    case
    when pay_money >= 1 and pay_money <= 100 then '1~100'
    when pay_money >= 101 and pay_money <= 1000 then '101~1000'
    when pay_money >= 1001 then '1001+' end

（7）多表join

select t1.name,t2.name,t3.name
from (city t1 join city t2 on t1.id=t2.parentID) join city t3 on t2.id=t3.parentID

（8）利用explode行轉列

===》

解析：explode(col_name)：將hive中的一列的array或者map分成多行，如果是字符串類型則利用split將其變成數組；表movie_info與虛表temp_table利用lateral view進行相關聯，其中temp_table表中的字段為category_type

（9）列轉行

將列表中一個id可能會占用多行轉換為每個user占一行的目標表格式

解析：concat(string1，string2，... )：連接括號內字符串，數量不限；

　　　 concat_ws(separator,string1，string2，... )：連接括號內字符串或者數組，數量不限，連接符為separator。

　　　 collect_set(字段)：此函數只接受基本類型，主要是將字段的值進行去重匯總，產生array類型字段。

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 軟考下午題--UML建模練習（一）一些常考的智力題面試常考算法題之並查集問題考博買真題被騙經歷軟考真題之設計模式軟考下午題--UML建模練習（二） PHP和MySQL愛考的10道題軟考下午題--UML建模練習（五）大數據開發-Hive-常用日期函數&&日期連續題sql套路為了考PMP，我做了一個刷題小程序