數據定義和描述

阿新 • • 發佈：2018-10-22

ace employee comment ram internal osi 自動創建 ted pop

create table employee (
name string,
work_place array<string>,         -- 調用樣式 array_name[0]
gender_age struct<gender:string, age:int>,  --struct<col_name:type, ...>      類似於Hbase的family，調用樣式 sruct_name.col_name
skills_score map<string, int>,    -- map_name[key]
apart_title map<string, array<string>>
)
row format delimited
fields terminated by "|"
collection items terminated by ","
map keys terminated by ":";

!table employee --不用
!column employee --不用
describe formatted employee;  --用這個,可讀性更好
load data local inpath "/home/centos/hive essential/ch03/employee.txt" overwrite into table employee; 
# query the whole array 
select work_place from employee; 
select work_place[0] as col_1, work_place[1] as col_2, work_place[2] as col_3 from employee;
#query the whole map
select gender_age from employee;
select gender_age.gender , gender_age.age from employee;
#query the whole struct and each column in table:
select skills_score from employee;
select name, 
skills_score["DB"] as DB, 
skills_score["Perl"] as Perl,
skills_score["Python"] as Python,
skills_score["Sales"] as Sales,
skills_score["HR"] as HR
from employee;
#query composite type
select apart_title from employee;
select name,
apart_title["Product"] as Product,
apart_title["Test"] as Test,
apart_title["COE"] as COE,
apart_title["Sales"] as Sales
from employee;

DDL
操作數據庫
create database if not exists myhivebook
comment "cho3 hive database in practice"  --添加描述
location "/hdfs/hive"   --hdfs上的路徑
with dbproperties ("name"="MengRui", "date"="2018-08-20");

show databases;
describe database myhivebook;   --打印出指定數據庫的信息

use myhivebook;

drop database if exists myhivebook; --刪除空庫
drop database if exists myhivebook cascade; --刪除含表的庫

alter database myhivebook   --設置數據庫屬性
set dbproperties ("edited by"="dog");
alter database myhivebook
set owner user dog;

操作表
create external table external_employee (
name string,
work_place array<string>,         -- 調用樣式 array_name[0]
gender_age struct<gender:string, age:int>,  --struct<col_name:type, ...>      調用樣式 sruct_name.col_name
skills_score map<string, int>,    -- map_name[key]
apart_title map<string, array<string>>
)
comment "this is a external table" --屬性位置固定，否則會報錯
row format delimited
fields terminated by "|"
collection items terminated by ","
map keys terminated by ":"
stored as textfile   --
location "/user/ch03/employee"; --此路徑下不能包含其他文件夾，否則，在查詢時會出錯。若路徑不存在，Hive會自動創建路徑
    
load data local inpath "/home/centos/hive essential/ch03/employee.txt" overwrite into table external_employee;
create temporary table temporary_name... ？？？

--CTAS copy metadata and data to new table
create table ctas_employee as 
select * from external_employee;

--創建CTE
男性中選出名為"Michael"的姓名，並且選擇出所有女性的姓名
create table cte_employee as  -- CTAS
with r1 as (select name from r2 where name = "Michael"), --CTE   
r2 as (select name from employee where agender_age.agender = "Male"),
r3 as (select name from employee where agender_age.agender = "Female")
select * from r1 union all select * from r3;
select * from cte_employee;

--創建空表
//create table empty_ctas_employee as  --CTAS  會使用mapper，耗時不推薦
//select * from employee where 1 = 2;  
create table empty_like_employee  -- use LIKE only metadata replication
like employee; --like [table or view]

-- 統計行數
select count(*) as row_counts from employee;

-- 完全地刪除內部表, removes the metadata completely and moves date to Trash.
drop table if exists empty_ctas_employee; 

-- remove all the rows from a internal table
truncate table cte_employee;

ALTER 只改變元數據
--alter table rename
alter table internal_employee to empty_employee;

alter table employee set --添加或更新表屬性
tblproperties("comment" = "this is internal table");

alter table employee set
serdeproperties("field.delim" = "$");

alter table employee set
location "hdfs://mycluster/user/hive/warehouse/new_employee";  -- 設置路徑，hive不會自動創建路徑，路徑必須為hdfs中的絕對路徑

alter table external_employee partition(year = 2012, month = 1, day = 1) enable no_drop;    --阻止刪除分區表   
alter table external_employee enable off_line;      -- 阻止查詢分區表中的data(not metadata)

alter table employee concatenate;  --merge small files into larger files,only RCFile and ORCFile Formats are supportted right now

alter table employee set fileformat rcfile;    --設置文件格式
alter table employee set fileformat textfile;

--check column type
desc employee;
 
alter table empty_employee  --下述操作只改變元數據，數據必須與更新後的字段匹配
change column name employee_name string  -- change the column
after work_place;  -- move the column

alter table empty_employee 
add columns (wife string);  --添加新列

alter table empty_employee 
replace columns(wife string);  --替換掉原來的所有列為單個列

分區表
--創建分區表，極大地降低查詢時的時間和帶寬
create table partition_employee (
name string,
work_place array<string>,         -- 調用樣式 array_name[0]
gender_age struct<gender:string, age:int>,  --struct<col_name:type, ...>      調用樣式 sruct_name.col_name
skills_score map<string, int>,    -- map_name[key]
apart_title map<string, array<string>>
)
partitioned by (year int, month int)
row format delimited
fields terminated by "|"          
collection items terminated by ","
map keys terminated by ":";
--檢查分區
show partitions partition_employee;  
--1）首次創建表時無分區，需要手動添加分區
alter table partition_employee add
partition (year = 2017, month = 07)
partition (year = 2017, month = 08);
--2）load data into partitions
load data local inpath "/home/centos/hive essential/ch03/employee.txt". -- local:從本地文件系統加載數據
overwrite into table partition_employee 
partition (year = 2017, month = 7);

-- 查詢分區數據時，需先設置：
hive.strict.checks.large.query=false
hive.mapred.mode=nonstrict

-- drop the partition
alter table partition_employee
drop if exists partition (year = 2017, month = 7);

分桶表
--1)Prepare another dataset and table for bucket table
create table employee_id (
name string,
employee_id int,   -- bucket column 
work_place array<string>,         -- 調用樣式 array_name[0]
gender_age struct<gender:string, age:int>,  --struct<col_name:type, ...>      調用樣式 sruct_name.col_name
skills_score map<string, int>,    -- map_name[key]
apart_title map<string, array<string>>
)
row format delimited
fields terminated by "|"
collection items terminated by ","
map keys terminated by ":";
load data local inpath "/home/centos/hive essential/ch03/employee_id.txt"
overwrite into table employee_id;
--2)create bucket table
create table employee_id_buckets (
name string,
employee_id int,   -- bucket column
work_place array<string>,         -- 調用樣式 array_name[0] 
gender_age struct<gender:string, age:int>,  --struct<col_name:type, ...>      調用樣式 sruct_name.col_name
skills_score map<string, int>,    -- map_name[key]
apart_title map<string, array<string>>
)
clustered by (employee_id) into 2 buckets  --桶的容量：near two blocks of data(256M) 桶的數量：2N
row format delimited
fields terminated by "|"
collection items terminated by ","           --tuple1,tuple2,...
map keys terminated by ":";

-- 3)
set map.reducer.max.tasks = 2; --reducer的數量等於桶數
set hive.enforce.bucketing = true; 

-- 4)populate data into buckets
insert overwrite table employee_id_buckets     -- insert的作用??: 根據元數據校驗數據
select * from employee_id;
-- 5) verify the buckets in the HDFS
dfs -ls /user/hive/warehouse/employee_id_buckets;

視圖
降低查詢的復雜性，增加數據安全性

參考書籍

Programming_Hive
Apache Hive Essentials

數據定義和描述

ace employee comment ram internal osi 自動創建 ted pop create table employee ( name string, work_place array<string>, -- 調用樣式 a

oracle sql 基礎（五）：數據定義語言（創建和管理序列、索引、同義詞）

aps span 最小值 into 全表掃描條件 creat 返回 ext 許多應用程序要求使用唯一的數字作為主鍵的值，你即可以在應用程序中構建代碼來處理這種需求，也可以用一個序列來產生唯一的數字。如果你想要增進某些查詢的性能，你應該考慮創建一個索引，你也可

數據結構和算法學習

指定位置 -1 img com 優缺點數據機構分享學習一、線性表的順序機構：　　插入某個元素到指定位置，如下：　　刪除某個位置的元素，操作：優缺點：　　二、線性表的鏈式結構：

C#基礎知識-函數的定義和調用（五）

返回 {0} string 訪問修飾符容器列表 rdquo 所有 func 函數也可以稱為方法，可以很方便的把一些行為封裝到函數裏面，當調用這一函數時會把函數塊裏面的代碼按照順序執行，方法可以有多種形式，有無參數，有無返回值等。 1. 函數的定義函數定

數據庫設計之數據庫，數據表和字段等的命名總結

數據庫設計英文單詞數據表下劃線命名數據庫命名規則：根據項目的實際意思來命名。數據表命名規則：1.數據表的命名大部分都是以名詞的復數形式並且都為小寫；2.盡量使用前綴"table_"；3.如果數據表的表名是由多個單詞組成，則盡量用下劃線連接起來；但是不要超過30個字符，一旦超過3

表中的數據備份和恢復

delete -- lac values reat varchar let color into -- 建表 create table emp( sid int(8) primary key, sname varchar(10), sex varchar(2), chu

關於客戶端設計之數據分類和存儲的思考

service his defaults def sqli href 思想 number fault 一、關於數據的分類在Android 客戶端設計過程中，我將數據分為未知，已知（本地），臨時，三者之間根據需求相互轉化。未知主要來自用戶輸入和服務端輸入。已知主

關系數據庫和NOSQL比較

2個二級需求主鍵比較無法需要 strong ron 關系數據庫 NOSQL 功能： NOSQL 功能簡單基本只支持主鍵查詢，有的NOSQL支持非主鍵查詢(不過非主鍵查詢時，其性能也很慢)，很少有NOSQL支

數據結構和算法

數據 .com b+ wid 進行答案 -1 bsp 比較 1.二叉排序樹二叉排序樹又稱二叉查找樹，二叉排序樹或者一顆空樹，或者是具有如下性質的二叉樹：（1）若它的左子樹非空，則左子樹上所有節點的值均小於根節點（2）若它的右子樹非空，

php的數據訪問和封裝運用

ble www 定義 include w3c 如果 str ctype var_dump php數據訪問： <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3

數據庫基礎(1)：數據定義

rop 數據庫基礎 class red 教師 clas 教師表屬性 sna 1．實驗內容 1) 用Management Studio創建數據庫University_Mis 2) 使用SQL語句創建關系數據庫基本表：學生表Students(Sno,Snam

上傳文件到數據庫和服務器

上傳文件上傳文件我們需要做三步。①在頁面中能選擇文件上傳；②能夠將二進制數據轉為byte數組，然後存入數據庫中，註意數據庫字段的類型；③將文件保存到服務器。 @RequestMapping(value = "/upload") public void upload(UpLoadFile uploadFile

數據結構和算法-一元多項式運算算法(加法）

stdlib.h ted 技術分享系統名稱 scanf 設置小數表示算法名稱：一元多項式算法算法介紹：加法運算：將具有與相同冪項的系數相加即可得到合並後的多項式。若某個冪項只存在於一個多項式中，則直接合並到結果中舉例利用代碼實現這裏主要

Android 打造隨意層級樹形控件考驗你的數據結構和設計

getparent layout lin throw draw set code 完整三角形轉載請標明出處：http://blog.csdn.net/lmj623565791/article/details/40212367，本文出自：【張鴻洋的博客】1、概述大家在項

c++中成員函數指針數組定義和初始化方法

fun all turn bsp ati const 成員函數指針溢出 cat 實際項目中經常遇到很多類似操作，比如命令碼對應執行函數等，對於此類操作，比較好的方式是使用const數組，將命令碼和操作函數綁定在一起，通過查表方式找到操作函數，並執行操作函數。這樣可以簡化代

第7講++創建數據表和約束

ref gin mar reat 數據外鍵唯一約束 log weight 二、創建數據表 1.創建簡單的數據表 --命令格式 --create table 表名 -- (列定義列約束 [,……n]) --實例1：在xscj庫中,創

第8講++數據表和約束的創建(實訓)

play 序號創建 lda 數據庫切換 tro 約束 http ast 動手操作1：創建kc表和表約束(續) create database xscj go --表示一個批的結束。go 只能獨自占用一行 use xscj

MongoDB數據模型和索引學習總結

-c 指定 explain creat 生效上下通信協議必須數據類型 MongoDB數據模型和索引學習總結 1. MongoDB數據模型： MongoDB數據存儲結構： MongoDB針對文檔（大文件採用GridFS協議）採用BSON（binary jso

數據定義——索引

pan ext -s index 建立索引 ack under 數據 16px 　　　　建立索引是加快查詢速度的有效手段建立索引：　　　　　　 create [unique] [cluster] index <索引名> on <表名> (&

數據結構和算法之：二分法demo

splay ++ ring maxsize ins 二分查找 logs bound log package com.js.ai.modules.pointwall.testxfz; class OrdArray{ private long[] a; private i

數據定義和描述

參考書籍

相關推薦