1. 程式人生 > 實用技巧 >Spark專案實戰從0到1之(13)hive經典50題

Spark專案實戰從0到1之(13)hive經典50題

一.建表和載入資料
1.student表
create table if not exists student(s_id int,s_name string,s_birth string,s_sex string)
row format delimited
fields terminated by ','
;
load data local inpath '/root/data.txt' into table student;

2.course表
create table if not exists course(c_id int,c_course string,t_id int)
row format delimited
fields terminated 
by ',' ; load data local inpath '/root/data.txt' into table course; 3.teacher表 create table if not exists teacher(t_id int,t_name string) row format delimited fields terminated by ',' ; load data local inpath '/root/data.txt' into table teacher; 4.score表 create table if not exists score(s_id int,c_id int
, s_score DOUBLE) row format delimited fields terminated by ',' ; load data local inpath '/root/data.txt' into table score; 二.查詢"01"課程比"02"課程成績高的學生的資訊及課程分數? 答案①: select stu.*,c.* from student stu join score a on a.c_id = '01' and a.s_id= stu.s_id left join score b on b.c_id = '02' and b.s_id= stu.s_id join
score c on c.s_id= stu.s_id where a.s_score > b.s_score or b.s_score is null ; 答案②: select stu.*,c.* from student stu left join score a on a.c_id = '02' and a.s_id= stu.s_id join score b on b.c_id = '01' and b.s_id= stu.s_id join score c on c.s_id= stu.s_id where a.s_score < b.s_score or a.s_score is null ; 三.查詢"01"課程比"02"課程成績低的學生的資訊及課程分數: 答案①: select stu.*,c.* from student stu join score a on a.c_id = '02' and a.s_id= stu.s_id left join score b on b.c_id = '01' and b.s_id= stu.s_id join score c on c.s_id= stu.s_id where a.s_score > b.s_score or b.s_score is null ; 答案②: select stu.*,c.* from student stu left join score a on a.c_id = '01' and a.s_id= stu.s_id join score b on b.c_id = '02' and b.s_id= stu.s_id join score c on c.s_id= stu.s_id where a.s_score < b.s_score or a.s_score is null ; 總結:對於二題和三題的查詢連線的方法:誰大就把誰放在左邊,誰小就把誰捨棄。 四.查詢平均成績大於等於60分的同學的學生編號和學生姓名和平均成績? 答案①: select a.s_id,stu.s_name,avg(a.s_score) as avgscore from score a join student stu on a.s_id = stu.s_id group by a.s_id,stu.s_name having avgscore >= 60 ; 答案②: select a.s_id,stu.s_name,avg(a.s_score)>=60 from score a join student stu on a.s_id = stu.s_id group by a.s_id,stu.s_name ; 五.查詢平均成績小於60分的同學的學生編號和學生姓名和平均成績? 答案①: select a.s_id,stu.s_name,avg(a.s_score) as avgscore from score a join student stu on a.s_id = stu.s_id group by a.s_id,stu.s_name having avgscore < 60 union all select stu.s_id,stu.s_name,NULL as avgscore from student stu left join score a on stu.s_id = a.s_id where a.s_score is null ; 六.查詢所有同學的學生編號、學生姓名、選課總數、所有課程的總成績 答案: select stu.s_id,stu.s_name,count(sc.s_id) as totalSubjects,sum(sc.s_score) as sumScores from student stu left join score sc on stu.s_id=sc.s_id group by stu.s_id,stu.s_name; 七.查詢"李"姓老師的數量? select count(1)from teacher where t_name like '李%'; 八.查詢學過"張三"老師授課的同學的資訊? select distinct stu.* from student stu join score sc on stu.s_id=sc.s_id join course co on sc.c_id=co.c_id join teacher te on co.t_id =te.t_id where te.t_name='張三'; 九.查詢沒學過"張三"老師授課的同學的資訊? select * from student stu join teacher te on te.t_name='張三' join course co on te.t_id=co.t_id left join score sc on stu.s_id=sc.s_id and co.c_id=sc.c_id where sc.s_score is null; 十.查詢學過編號為"01"並且也學過編號為"02"的課程的同學的資訊? select stu.*,sc.* from student stu,score sc,score sc1 where stu.s_id=sc.s_id and stu.s_id=sc1.s_id and sc.c_id=1 and sc1.c_id=2; 十一.查詢學過編號為"01"但是沒有學過編號為"02"的課程的同學的資訊: select stu.* from student stu join score sc on sc.s_id =stu.s_id and sc.c_id = '01' where not exists (select 1 from score sc1 where sc1.c_id = '02' and stu.s_id = sc1.s_id) ; 十二.查詢沒有學全所有課程的同學的資訊? select distinct stu.* from student stu join score sc left join course co on stu.s_id=sc.s_id and sc.c_id=co.c_id where sc.s_score is null; 十三.查詢至少有一門課與學號為"01"的同學所學相同的同學的資訊? select distinct stu.* from student stu join score sc on stu.s_id=sc.s_id where stu.s_id <> 1 and sc.c_id in (select c_id from score where s_id=1); 十四.查詢和"01"號的同學學習的課程完全相同的其他同學的資訊? select 十五.查詢沒學過"張三"老師講授的任一門課程的學生姓名? select stu.* from student stu join teacher te on te.t_name = '張三' join course co on co.t_id = te.t_id left join score sc on sc.c_id = co.c_id and sc.s_id = stu.s_id where sc.s_score is null; 十六.查詢兩門及其以上不及格課程的同學的學號,姓名及其平均成績? select * from student stu join score sc on sc.s_id = stu.s_id where sc.s_score < 60 ; 十七.檢索"01"課程分數小於60,按分數降序排列的學生資訊? select * from student stu join score sc on sc.s_id = stu.s_id where sc.c_id = 1 and sc.s_score < 60 order by sc.s_score desc; 十八.按平均成績從高到低顯示所有學生的所有課程的成績以及平均成績? select *, round(avg(sc.s_score) over(distribute by sc.s_id),2) as avg1 from score sc order by avg1 desc,sc.s_score desc; 總結:在這裡啊,round是hive的內建函式,其功能是四捨五入。 十九..查詢各科成績最高分、最低分和平均分:以如下形式顯示:課程ID,課程name,最高分,最低分,平均分,及格率,中等率,優良率,優秀率: -- 及格為>=60,中等為:70-80,優良為:80-90,優秀為:>=90? select co.c_id, co.c_course, max(sc.s_score), min(sc.s_score), round(avg(sc.s_score),3), round(sum(case when sc.s_score >=60 then 1 else 0 end)/count(1) *100,3) as `及格率`, round(sum(case when sc.s_score between 70 and 79 then 1 else 0 end)/count(1) *100,3) as `中等率`, round(sum(case when sc.s_score between 80 and 89 then 1 else 0 end)/count(1) *100,3) as `優良率`, round(sum(case when sc.s_score>=90 then 1 else 0 end)/count(1) *100,3) as `優秀率` from score sc join course co on sc.c_id=co.c_id group by co.c_id,co.c_course; 二十.按各科成績進行排序,並顯示排名:– row_number() over()分組排序功能? select *, row_number() over(distribute by c_id sort by s_score desc) from score; 二十一.查詢學生的總成績並進行排名? select s_id,sum(s_score) as sumScores from score group by s_id order by sumScores desc; 二十二:查詢不同老師所教不同課程平均分從高到低顯示? select t_id,sc.c_id,round(avg(sc.s_score),2) as avgscore from score sc join course co on sc.c_id=co.c_id group by t_id,sc.c_id order by t_id,avgscore desc; 二十三.查詢所有課程的成績第2名到第3名的學生資訊及該課程成績? select * from ( select *, row_number() over(distribute by c_id sort by s_score desc) as rm from score ) a where a.rm between 2 and 3; 二十四.統計各科成績各分數段人數:課程編號,課程名稱,[100-85],[85-70],[70-60],[0-60]及所佔百分比? select c_id, sum(case when s_score>=85 then 1 else 0 end) as 85score, sum(case when s_score between 70 and 84 then 1 else 0 end) as 70score, sum(case when s_score between 60 and 69 then 1 else 0 end) as 60score, sum(case when s_score<60 then 1 else 0 end) as 0score, count(1) as totalscore from score group by c_id; 二十五.查詢學生平均成績及其名次? select *, row_number() over(sort by a.avgscore desc) as rm from ( select s_id,round(avg(s_score),2) as avgscore from score group by s_id) a; 二十六.查詢各科成績前三名的記錄三個語句? select * from ( select *, row_number() over(distribute by c_id sort by s_score desc) as rm, rank() over(distribute by c_id sort by s_score desc) as rk, dense_rank() over(distribute by c_id sort by s_score desc) as drk from score) a where a.rm<4; 二十七.查詢每門課程被選修的學生數? select c_id,count(1) as `學生人數` from score group by c_id; 二十八.查詢出只有兩門課程的全部學生的學號和姓名? select stu.s_id,stu.s_name from student stu join score sc on sc.s_id=stu.s_id group by stu.s_id,stu.s_name having count(1)=2; 二十九.查詢男生、女生人數? select s_sex,count(1) as totalstu from student group by s_sex; 三十.查詢名字中含有"風"字的學生資訊? select * from student where s_name like '%風%' ; 三十一.查詢同名同性學生名單,並統計同名人數? select s_name,s_sex,count(1) as totalstu from student group by s_name,s_sex having totalstu>1; 三十二.查詢1990年出生的學生名單? select * from student where s_birth like '1990%'; select * from student where substr(s_birth,0,4)='1990'; 三十三.查詢每門課程的平均成績,結果按平均成績降序排列,平均成績相同時,按課程編號升序排列? select c_id,round(avg(s_score),2) as avgscore from score group by c_id order by avgscore desc,c_id asc; 三十四:查詢平均成績大於等於85的所有學生的學號、姓名和平均成績? select stu.s_name,avg(sc.s_score) as avgscore from student stu join score sc on stu.s_id=sc.s_id group by stu.s_id,stu.s_name having avgscore>85; 三十五:查詢課程名稱為"數學",且分數低於60的學生姓名和分數? select stu.s_name,sc.s_score from student stu join score sc on stu.s_id=sc.s_id join course co on sc.c_id=co.c_id and co.c_course='數學' where sc.s_score<60; 三十六:查詢所有學生的課程及分數情況? select * from student stu join score sc on stu.s_id=sc.s_id right join course co on sc.c_id=co.c_id ; 三十七:查詢任何一門課程成績在70分以上的學生姓名、課程名稱和分數? select stu.s_name,co.c_course,sc.s_score from student stu join score sc on stu.s_id=sc.s_id join course co on sc.c_id=co.c_id group by stu.s_id,stu.s_name,co.c_course,sc.s_score having min(sc.s_score)>=70; 三十八:查詢課程不及格的學生? select stu.* from student stu join score sc on stu.s_id=sc.s_id where sc.s_score<60; 三十九:查詢課程編號為01且課程成績在80分以上的學生的學號和姓名? select stu.s_id,stu.s_name,sc.c_id,sc.s_score from student stu join score sc on stu.s_id=sc.s_id where c_id=1 and s_score>=80; 四十:每門課程的學生人數? select sc.c_id,co.c_course,count(1) as stunum from score sc join course co on sc.c_id=co.c_id group by sc.c_id,co.c_course; 四十一:查詢選修"張三"老師所授課程的學生中,成績最高的學生資訊及其成績? select * from ( select dense_rank() over(distribute by sc.c_id sort by sc.s_score desc) drk from score sc join course co on sc.c_id=co.c_id join teacher te on co.t_id=te.t_id where te.t_name='張三') aa where aa.drk=1; 四十二:查詢不同課程成績相同的學生的學生編號、課程編號、學生成績? ①select distinct sc.s_id,sc.c_id,sc.s_score from score sc,score sc1 where sc.c_id<>sc1.c_id and sc.s_score=sc1.s_score and sc.s_id=sc1.s_id; ②select distinct sc.s_id,sc.c_id,sc.s_score from score sc,score sc1 where sc.c_id !=sc1.c_id and sc.s_score=sc1.s_score and sc.s_id=sc1.s_id; 四十三:查詢每門課程成績最好的前三名? select * from (select *, row_number() over(distribute by c_id sort by s_score desc) rn from score) aa where aa.rn<=3; 四十四:統計每門課程的學生選修人數(超過5人的課程才統計)? select c_id,count(*) as stunum from score group by c_id having stunum>5 order by c_id asc,stunum desc; 四十五:檢索至少選修兩門課程的學生學號? select s_id,count(1) as coursenum from score group by s_id having coursenum >=2; 四十六:查詢選修了全部課程的學生資訊? select stu.s_id,stu.s_name from student stu join score sc on stu.s_id=sc.s_id left join course co on sc.c_id=co.c_id group by stu.s_id,stu.s_name having sum(case when sc.s_score is null then 1 else 0 end)=0; 四十七:查詢各學生的年齡(週歲)? select s_birth, year(current_date())-year(s_birth)- (case when month(current_date())>month(s_birth) then 0 when month(current_date())=month(s_birth) and day(current_date())>=day(s_birth) then 0 else 1 end) from student; 四十八:查詢本週過生日的學生? select * from student where weekofyear(s_birth)=weekofyear(current_date()); 四十九:查詢下週過生日的學生? select * from student where weekofyear(s_birth)=weekofyear(current_date())+1; 四十九:查詢上週過生日的學生? select * from student where weekofyear(s_birth)=weekofyear(current_date())-1; 五十:查詢本月過生日的學生? select * from student where month(s_birth)=month(current_date()); 五十:查詢上月過生日的學生? select * from student where month(s_birth)=month(current_date())-1; 五十:查詢下月過生日的學生? select * from student where month(s_birth)=month(current_date())+1; 五十一:查詢12月份過生日的學生? ①select * from student where month(s_birth)=12; ②select * from student where substring(s_birth,4,2)=12;