sas proc sql 基礎入門 (原創作品,轉載請註明出處 )
阿新 • • 發佈:2019-01-07
/*結構*/
/*
PROC SQL;
SELECT column(s)
FROM table(s) | view(s)
WHERE expression
GROUP BY column(s)
HAVING expression
ORDER BY column(s);
QUIT;
*/
/*1. 建立資料集a label 等於空去掉原資料集的label*/
data a;
set sasuser.Students;
label Student_Name= Student_Company = City_State = ;
run;
/*2. 選出資料集a中的所有資料*/
proc sql;
from a;
quit;
/*3. 選擇變數student_name 和 student_company所有觀測量*/
proc sql;
select Student_Name,Student_Company
from a;
quit;
/*4. 限制觀測量的數量*/
/*create table 建立新的表格*/
/*使用outobs = 50 即可選擇變數student_name 和 student_company 前五十個觀測量*/
proc sql outobs = 50;
create table student as
select Student_Name,Student_Company
from a;
run;
/*5. 給輸出的變數重新命名,格式為name as name1*/
proc sql;
from a;
quit;
/*6. 去除所有變數都重複的觀測*/
proc sql;
select distinct *
from a;
quit;
/*7. 去除變數student_name 和 student_company重複的觀測*/
proc sql;
select distinct student_name,student_company
from a;
quit;
/*8. 給變數上標籤 label = ''和格式化 format = .*/
proc sql;
select student_name format = $35.,
from a;
quit;
/*9. 排序 order by 變數名稱 ASC(ascending)升序 DESC(descending)降序*/
proc sql;
select student_name,student_company
from a
order by student_name ASC, student_company DESC;
quit;
/*10. 選擇滿足條件的子資料集*/
/*between and : where salary between 1000 and 3000*/
/*contains: where student_name contains 'DE' 或者表示為 where student_name ? 'DE'*/
/*in ();包含括號所有的選項*/
/*is missing or is null: 選出所選變數的值為空的觀測*/
/*like:選出類似的觀測 _ 單個任意變數 %任意變數任意數量的*/
proc sql;
select *
from a
where student_name contains 'A' and student_company in ('A','B');
quit;
proc sql;
select *
from a
where student_company is missing;
quit;
proc sql;
select *
from a
where student_name like 'A_l%';
quit;
/*11. 計算變量表達式calculated*/
/*where calculated new_salary > 500; or where (salary * 0.1) > 500;*/
proc sql;
select ID, (salary * 0.1) as new_salary
from sasuser.pilots
where calculated new_salary > 500;
quit;
proc sql;
select ID, (salary * 0.1) as new_salary
from sasuser.pilots
where (salary * 0.1) > 500;
quit;
/*12. 多條件輸出新變數*/
/*case 表示式 = < > NOT NE AND OR IN,between and, contains, ?, is null, is missing = like*/
proc sql;
select salary,
case
when salary between 0 and 1000 then 'LOW'
when salary between 1001 and 2000 then 'MEDIUM'
when salary between 2001 and 3000 then 'HIGH'
else 'VERY HIGH'
end as new_flag
from sasuser.pilots;
quit;
/*13. 對資料統計和求和*/
/*count 類似於 proc freq*/
/*max,min,sum 類似於proc means*/
/*avg/mean, count/freq/n,sum,max,min,nmiss,std,var,*/
/*T,uss(uncorrelated sum of square),css(corelatted sum of square),range*/
proc sql;
select ID, firstname, count(distinct ID) as t_salary /*distinct 選取唯一的ID進行統計*/
from sasuser.pilots
group by ID
order by firstname;
quit;
proc sql;
select ID, firstname, max(salary) as M_salary
from sasuser.pilots
group by firstname
order by firstname;
quit;
proc sql;
select max(salary) as M_salary, uss(salary) as uss_s, css(salary) as css_s
,range(salary) as r_s,avg(salary) as a_s,sum(salary) as s_s,std(salary) as std_s
from sasuser.pilots;
quit;
/*14. 計算缺失值的數量 NMISS*/
data miss;
input id;
cards;
1
.
2
3
4
.
.
5
6
.
78
.
1
.
78
;
run;
proc sql;
create table miss_sts as
select nmiss(id) as N_missings,
count(id) as N,calculated N_missings + calculated N as total
from miss;
quit;
proc sql;
select id, count(id),nmiss(id)
from miss
group by id;
quit;
/*15. 去掉某一列和保留某一列*/
/*drop = 變數名稱*/
/*keep = 變兩名稱*/
proc sql;
create table student1(drop = city_state) as
select *
from sasuser.students;
quit;
/*16. 直接刪除某幾行*/
/*delete from*/
data pilots;
set sasuser.pilots;
run;
proc sql;
delete from pilots
where salary > 100000;
quit;
/*17. 使用having*/
/*對每一個分組進行篩選*/
proc sql;
select id, firstname, count(salary) as s_s
from sasuser.pilots
group by id
having s_s >1;
quit;
/*18. 巢狀使用sql*/
proc sql;
create table pilots as
select *
from sasuser.pilots
where salary > 100000;
quit;
proc sql;
select id from sasuser.pilots
where id not in(select id from pilots);
run;