1. 程式人生 > 實用技巧 >C# 獲取windows下端口占用與程序名稱

C# 獲取windows下端口占用與程序名稱

hive筆記

#hive基本語法
-----------------------------------------------------
#http://dblab.xmu.edu.cn/blog/2440-2/
-- 不區分大小寫
--建表  if not exists 
CREATE TABLE if not exists page_view(viewTime INT, userid BIGINT,
                page_url STRING, referrer_url STRING,
                friends ARRAY<BIGINT>, properties MAP<STRING, STRING>,
                ip STRING COMMENT 'IP Address of the User')
COMMENT 'This is the page view table'
PARTITIONED BY(dt STRING, country STRING)	--分割槽
CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS	--分桶
ROW FORMAT DELIMITED	--行預設分隔符
        FIELDS TERMINATED BY '1'	--欄位分隔符
        COLLECTION ITEMS TERMINATED BY '2'
        MAP KEYS TERMINATED BY '3'
STORED AS SEQUENCEFILE;	--以二進位制格式進行儲存資料在hdfs上


SHOW TABLES;
SHOW TABLES 'page.*';
SHOW PARTITIONS page_view;  --列出表的分割槽,無分割槽則丟擲錯誤。

DESCRIBE page_view;  --列出表的列和列的型別
DESCRIBE EXTENDED page_view;  --列出表的列和表的其他屬性。這會列印很多資訊,且輸出的風格不是很友好,通常用於除錯。
DESCRIBE EXTENDED page_view PARTITION (ds='2016-08-08');   --列出列和分割槽的所有屬性。這也會打印出許多資訊,通常也是用於除錯。

ALTER TABLE old_table_name RENAME TO new_table_name;  --修改表名
ALTER TABLE old_table_name REPLACE COLUMNS (col1 TYPE, ...);  --修改列名(列型別需一致),不修改的列需附上,否則丟失
ALTER TABLE tab1 ADD COLUMNS (c1 INT COMMENT 'a new int column', c2 STRING DEFAULT 'def val');  --增加列

DROP TABLE if exists pv_users;  --刪表
ALTER TABLE pv_users DROP PARTITION (ds='2016-08-08')  --刪除分割槽

--載入資料到Hive表 -> 外部表
CREATE EXTERNAL TABLE page_view_stg(viewTime INT, userid BIGINT,
                page_url STRING, referrer_url STRING,
                ip STRING COMMENT 'IP Address of the User',
                country STRING COMMENT 'country of origination')
COMMENT 'This is the staging page view table'
ROW FORMAT DELIMITED 
FIELDS TERMINATED BY '44' 	--‘44’是逗號,‘12’是換頁符
LINES TERMINATED BY '12'
STORED AS TEXTFILE
LOCATION '/user/data/staging/page_view';

hadoop dfs -put /tmp/pv_2016-06-08.txt /user/data/staging/page_view

FROM page_view_stg pvs
INSERT OVERWRITE TABLE page_view PARTITION(dt='2016-06-08', country='US')
SELECT pvs.viewTime, pvs.userid, pvs.page_url, pvs.referrer_url, null, null, pvs.ip
WHERE pvs.country = 'US';


--載入資料到Hive表 -> 直接(格式一致),路徑引數可以是一個目錄(不含子目錄),一個檔案,或一個萬用字元
LOAD DATA LOCAL INPATH /tmp/pv_2016-06-08_us.txt INTO TABLE page_view PARTITION(date='2016-06-08', country='US')


--select
INSERT OVERWRITE TABLE user_active
SELECT user.*
FROM user
WHERE user.active = 1;


INSERT OVERWRITE TABLE xyz_com_page_views
SELECT page_views.*
FROM page_views
WHERE page_views.date >= '2008-03-01' AND page_views.date <= '2008-03-31' AND
      page_views.referrer_url like '%xyz.com';

--join
INSERT OVERWRITE TABLE pv_users
SELECT pv.*, u.gender, u.age
FROM user u JOIN page_view pv 
ON (pv.userid = u.id)
WHERE pv.date = '2008-03-03';


INSERT OVERWRITE TABLE pv_users
SELECT pv.*, u.gender, u.age
FROM user u FULL OUTER JOIN page_view pv 
ON (pv.userid = u.id)
WHERE pv.date = '2008-03-03';


INSERT OVERWRITE TABLE pv_friends
SELECT pv.*, u.gender, u.age, f.friends
FROM page_view pv JOIN user u ON (pv.userid = u.id) JOIN friend_list f ON (u.id = f.uid)
WHERE pv.date = '2008-03-03';


--union all
INSERT OVERWRITE TABLE actions_users
SELECT u.id, actions.date
FROM (
    SELECT av.uid AS uid
    FROM action_video av
    WHERE av.date = '2008-06-03'

    UNION ALL

    SELECT ac.uid AS uid
    FROM action_comment ac
    WHERE ac.date = '2008-06-03'
) actions JOIN users u 
ON(u.id = actions.uid);


--聚合(不可distinct作用於不同的列)
INSERT OVERWRITE TABLE pv_gender_agg
SELECT pv_users.gender, count(DISTINCT pv_users.userid), count(*), sum(DISTINCT pv_users.userid)
FROM pv_users
GROUP BY pv_users.gender;


--多表/檔案插入
FROM pv_users
INSERT OVERWRITE TABLE pv_gender_sum  --插入hive
    SELECT pv_users.gender, count_distinct(pv_users.userid)
    GROUP BY pv_users.gender

INSERT OVERWRITE DIRECTORY '/user/data/tmp/pv_age_sum'  --插入HDFS檔案系統
    SELECT pv_users.age, count_distinct(pv_users.userid)
    GROUP BY pv_users.age;


--動態分割槽插入
FROM page_view_stg pvs  --不便於維護
INSERT OVERWRITE TABLE page_view PARTITION(dt='2008-06-08', country='US')
       SELECT pvs.viewTime, pvs.userid, pvs.page_url, pvs.referrer_url, null, null, pvs.ip WHERE pvs.country = 'US'
INSERT OVERWRITE TABLE page_view PARTITION(dt='2008-06-08', country='CA')
       SELECT pvs.viewTime, pvs.userid, pvs.page_url, pvs.referrer_url, null, null, pvs.ip WHERE pvs.country = 'CA'
INSERT OVERWRITE TABLE page_view PARTITION(dt='2008-06-08', country='UK')
       SELECT pvs.viewTime, pvs.userid, pvs.page_url, pvs.referrer_url, null, null, pvs.ip WHERE pvs.country = 'UK';

FROM page_view_stg pvs  --多分割槽插入,目前只允許最後一列放置動態分割槽列,層級次序
INSERT OVERWRITE TABLE page_view PARTITION(dt='2008-06-08', country)
       SELECT pvs.viewTime, pvs.userid, pvs.page_url, pvs.referrer_url, null, null, pvs.ip, pvs.country


--匯出到本地檔案
INSERT OVERWRITE LOCAL DIRECTORY '/tmp/pv_gender_sum'
SELECT pv_gender_sum.*
FROM pv_gender_sum;


--抽樣
--TABLESAMPLE(BUCKET 3 OUT OF 16) 表示第3個桶和第19個桶
--TABLESAMPLE(BUCKET 3 OUT OF 64 ON userid) 表示第3個桶的一半
INSERT OVERWRITE TABLE pv_gender_sum_sample
SELECT pv_gender_sum.*
FROM pv_gender_sum TABLESAMPLE(BUCKET 3 OUT OF 32);  --32個桶中選擇第三個,必須是CLUSTERED BY修飾的列


--表的array、map列操作
CREATE TABLE array_table (int_array_column ARRAY<INT>);
SELECT pv.userid, pv.friends[2], size(pv.friends)
FROM page_view pv;

INSERT OVERWRITE page_views_map
SELECT pv.userid, pv.properties['page type'], size(pv.properties)
FROM page_views pv;