1. 程式人生 > >Oracle實驗三 基於大資料集的資料庫操作

Oracle實驗三 基於大資料集的資料庫操作

一、實驗內容

    1.以常用“名字大全”與“百家姓”資料集為基礎,生成不小於1千萬條stud記錄,要求,姓名的重複率不超過10%,學號以ABCD16EFGH為格式模板,即其中16是固定的,AB為從01到80,CD為從01到90,EF為01到50,GH為01到32;性別中,男、女佔比為99%到99.5%。TEL與E-mail不作要求,但不能全空。Birthday要求從‘19940101’到‘19990731’分佈。要求記錄ORACLE資料檔案的大小變化。(需要編制過程)

    2.分別測試stud有主鍵與沒有主鍵情形下生成記錄的時間。

    3.建立基於name的索引index_name,測試建立的時間與建立索引前後查詢某一姓名及某一姓的時間長度。

    4.測試索引index_name建立前後,分姓(簡單地理解為姓名的第1,2位)的記錄數統計時間。

    5.按學號首位建立10個分割槽分別為part_0到part_9,測試建立分割槽前後分首位統計人數與分專業(EF位)統計人數的時間差別。

二、實驗分析

    1.實驗思路

    分別完成姓名錶,學號表,學生其他資訊表的建立和資料生成,最後將三張表資料同時插入學生資訊表。

    2.實驗過程

    0)寫一個記錄系統時間的過程,方便記錄時間差

    1)匯入基本的姓名錶

    2)建立姓名各個字的檢視

    3)將檢視進行笛卡爾積插入學生姓名錶

    4)成學生學號表

    5)建立相關隨機函式:性別,電話,郵箱,出生日期

    6)生成學生其他資訊表資料

    7)將三張表資料同時插入學生資訊表

    8)記錄各個操作的時間

    3.生成姓名錶分析

    對原始的姓名建立3個檢視,分別選出第一、二、三個字,其分別有572、1504和1200條記錄,對於生成千萬條資料,隨機選出500、1200和20條記錄就可達到實驗要求。

    4.生成學生學號分析

--  ABCD16EFGH:每次生成兩位,分四批生成,可以在1分鐘左右完成生成

--建立過程中要使用的表

drop table t_student_sno_gh_j432;

drop table t_student_sno_ef_j432;

drop table t_student_sno_cd_j432;

drop table t_student_sno_ab_j432;

create table t_student_sno_gh_j432(sno number(10));

create table t_student_sno_ef_j432(sno number(10));

create table t_student_sno_cd_j432(sno number(10));

create table t_student_sno_ab_j432(sno varchar2(10));

--通過批處理,建立過程分批生成:

declare

i int;

begin

for i in 1..32 loop--先生成班級內學生序號

insert into t_student_sno_gh_j432 values(i);

end loop;

for i in 1..50 loop--生成班級序號

insert /*+ append */ into t_student_sno_ef_j432 select 160000+i*100+t_student_sno_gh_j432.sno from t_student_sno_gh_j432;

commit;

end loop;

for i in 1..90 loop--生成專業序號

insert /*+ append */ into t_student_sno_cd_j432 select i*1000000+t_student_sno_ef_j432.sno from t_student_sno_ef_j432;

commit;

end loop;

for i in 1..80 loop--生成學院序號

insert /*+ append */ into t_student_sno_ab_j432 select substr(10000000000+i*100000000+t_student_sno_cd_j432.sno,2,10) from t_student_sno_cd_j432;

commit;

end loop;

end;

/

    5.生成學生其他資訊表分析

    1)建立隨機生成性別函式f_getSex_j432,在性別表中插入143條記錄,其中男女佔142條,還有一條為“其他”,每次呼叫函式會隨機返回一條記錄,可是男女佔比為99.3%,達到實驗要求。

    2)另外建立隨機產生電話函式f_getTel_j432、隨機產生郵箱函式f_getEmail_j432、隨機產生生日函式f_getBirthday_j432

    3)通過批處理產生大量資料

--建立基本資訊模板表

create table t_sequence_id(

id number(10),

sex varchar2(32),

tel varchar2(32),

email varchar2(32),

birthday date);

 

--儲存大量的學生其他資訊,千萬級以上

drop table t_stud_other_information_j432;

create table t_stud_other_information_j432(

id number(16),

sex varchar2(32),

tel varchar2(32),

email varchar2(32),

birthday date);

 

--生成1萬條資料模板

begin

delete from t_sequence_id;

for i in 0..9999 loop

insert into t_sequence_id(id, sex, tel, email, birthday) values(i,f_getsex_j432,f_gettel_j432,f_getemail_j432,f_getbirthday_j432);

end loop;

end;

/

--select * from t_sequence_id;

 

--每1萬條資料插入一次,得到12000000條資料

begin

delete from t_stud_other_information_j432;

for i in 1..1200 loop

insert /*+ append */ into t_stud_other_information_j432(id, sex, tel, email,birthday) select i * 10000 +t_sequence_id.id as MSISDN,t_sequence_id.sex,t_sequence_id.tel,t_sequence_id.email,t_sequence_id.birthday from t_sequence_id;

commit;

end loop;

end;

/

6.生成完整學生資訊表分析

--通過表格自帶的序號來連線表格,並且可以防止笛卡爾積

insert into t_student_j432(sno,sname, sex, tel, email, birthday)

select X.sno, A.sname, B.sex, B.tel, B.email, B.birthday

from

 (select rownum rownum_X,sno from t_student_sno_ab_j432) X,

 (select rownum rownum_A,sname from t_stu_name_j432) A,

 (select rownum rownum_B,sex,tel,email,birthday from t_stud_other_information_j432) B

where rownum_A = rownum_B and rownum_A = rownum_X;

7.記錄各個操作時間差分析

create table t_record_time_j432(things varchar2(128),time varchar2(32));

create or replace procedure p_record_time_j432(input varchar2) as

temp varchar2(32);

begin

select to_char(systimestamp,'yyyy-mm-dd hh24:mi:ss.ff') into temp from dual;

insert into t_record_time_j432(things) values(input);

update t_record_time_j432 set time=temp where things=input;

end;

/

show error

 

三、實驗結果

本次記錄中得到以下實驗資料:

匯入原始姓名錶用時為69.970000秒

學生資訊表沒有主鍵生成時間為40.811000秒

學生資訊表有主鍵生成時間為144.011000秒

沒有姓名索引,查詢一條姓名的時間為2.108000秒

有姓名索引,查詢一條姓名的時間為1.680000秒

沒有分割槽,按學號首位ID統計人數的時間為2.295000秒

沒有分割槽,按學號首位ID統計人數的時間為1.780000秒

 

實驗過程記錄:(整個記錄儲存在txt檔案內,使用Notepad++檢視)

 

 

 

四、實驗小結

    本次實驗主要遇到的問題是數量級太大,效率低。最開始的時候,是想建立整張學生資訊表,先匯入姓名,對整張表做一個遊標,不斷生成其他資訊,55分鐘大約生成160萬資訊,太慢。然後查詢資料可以進行批處理,以及可以先拆開生成不同的表格資料,最後再統一插入學生資訊表,可大大加快效率,實現實驗要求。

    另一個問題就是記錄時間,我在網上查到可以用表格自帶的時間戳,我用過或發現誤差太大,然後我想到獲取毫秒級的系統時間,在每次操作前後都獲取一次系統時間,通過分析時間差來達到實驗目的,所以就有了實驗原始碼之初的記錄時間的過程。

五、完整程式碼

--建立精確記錄時間過程,方便記錄系統時間(表格自帶的時間戳,誤差太大)
--drop table t_record_time_j432;
create table t_record_time_j432(things varchar2(128),time varchar2(32));
create or replace procedure p_record_time_j432(input varchar2) as
temp varchar2(32);
begin 
select to_char(systimestamp,'yyyy-mm-dd hh24:mi:ss.ff') into temp from dual;
insert into t_record_time_j432(things) values(input);
update t_record_time_j432 set time=temp where things=input;
end;
/
show error
exec p_record_time_j432('第一次執行記錄時間過程')
col things format a64;
col time format a32;
select * from t_record_time_j432;

--1.建立基本姓名錶
drop table sname;
create table sname(sname varchar(32));
--2.插入基本姓名
exec p_record_time_j432('開始匯入原始姓名')
@D:\YDDUONG\大學課程學習資料\大三課程\orcale\ODB\name_insert.txt
commit;
exec p_record_time_j432('匯入原始姓名結束')
--3.製造足夠多的姓名
--  為名字的每個字建立檢視
drop view v_name1_j432;
drop view v_name2_j432;
drop view v_name3_j432;
drop view v_name12_j432;
drop view v_name123_j432;
create view v_name1_j432 as select * from (select distinct substr(sname,1,1) na1 from sname order by dbms_random.value) where rownum<=500;
create view v_name2_j432 as select * from (select distinct substr(sname,2,1) na2 from sname order by dbms_random.value) where rownum<=1200;
create view v_name3_j432 as select * from (select distinct substr(sname,3,1) na3 from sname order by dbms_random.value) where rownum<=20;
create view v_name12_j432 as select concat(na1,na2) na12 from v_name1_j432,v_name2_j432;
create view v_name123_j432 as select concat(na12,na3) na123 from v_name12_j432,v_name3_j432;

--4建立t_stu_name_j432學生姓名錶
drop table t_stu_name_j432;
create table t_stu_name_j432(sname varchar(16));

--5為姓名錶t_stu_name_j432插入120200000條學生姓名
insert into t_stu_name_j432(sname) select na12 from v_name12_j432;
insert into t_stu_name_j432(sname) select na123 from v_name123_j432;
exec p_record_time_j432('完成為姓名錶生成120200000條記錄')
--select count(*) from t_stu_name_j432;

--6.生成規範學號表
--  ABCD16EFGH:每次生成兩位,分四批生成,可以在1分鐘左右生成
--建立過程中要使用的表
drop table t_student_sno_gh_j432;
drop table t_student_sno_ef_j432;
drop table t_student_sno_cd_j432;
drop table t_student_sno_ab_j432;
create table t_student_sno_gh_j432(sno number(10));
create table t_student_sno_ef_j432(sno number(10));
create table t_student_sno_cd_j432(sno number(10));
create table t_student_sno_ab_j432(sno varchar2(10));

--建立過程:
declare
i int;
begin
for i in 1..32 loop
	insert into t_student_sno_gh_j432 values(i);
end loop;
for i in 1..50 loop
	insert /*+ append */ into t_student_sno_ef_j432 select 160000+i*100+t_student_sno_gh_j432.sno from t_student_sno_gh_j432;
	commit;
end loop;
for i in 1..90 loop
	insert /*+ append */ into t_student_sno_cd_j432 select i*1000000+t_student_sno_ef_j432.sno from t_student_sno_ef_j432;
	commit;
end loop;
for i in 1..80 loop
	insert /*+ append */ into t_student_sno_ab_j432 select substr(10000000000+i*100000000+t_student_sno_cd_j432.sno,2,10) from t_student_sno_cd_j432;
	commit;
end loop;
end;
/
exec p_record_time_j432('完成11520000條學號ID生成')

--7.1批量處理sex, tel, email, birthday等資料
--   產生隨機性別:男、女佔比為99%到99.5%
--   在性別表t_sex_j432中,143人,男女分別71人,一人性別為其它。隨機查詢一條記錄時,可使男、女佔比為99.3%
--   函式f_getSex_j432返回值為隨機性別
create or replace function f_getSex_j432 return varchar2
is
f_numb number;
f_sex varchar2(8);
begin
select trunc(dbms_random.value(1,143))) into f_numb from dual;
if f_numb<=71 then
	f_sex='男';
elsif f_numb<=142 then
	f_sex='女';
else
	f_sex='其他';
end if;
return f_sex;
end;
/
show error

--7.2產生隨機電話
--   函式f_getTel_j432會返回一個隨機手機號
create table t_tel_j432(tel varchar2(4));
insert into t_tel_j432 values('132');
insert into t_tel_j432 values('135');
insert into t_tel_j432 values('156');
insert into t_tel_j432 values('151');
insert into t_tel_j432 values('138');
insert into t_tel_j432 values('139');
insert into t_tel_j432 values('183');
insert into t_tel_j432 values('187');
insert into t_tel_j432 values('153');
insert into t_tel_j432 values('150');
insert into t_tel_j432 values('186');
insert into t_tel_j432 values('188');
create or replace function f_getTel_j432 return varchar2
is
getTel varchar2(12);
numb_head varchar2(4);
numb_other varchar2(16);
begin
select tel into numb_head from (select tel from t_tel_j432 order by dbms_random.value) where rownum=1;
select substr(cast(dbms_random.value as varchar2(32)),3,8) into numb_other from dual;
getTel:=numb_head||numb_other;
return getTel;
end;
/
show error

--7.3隨機產生郵箱
--   函式f_getEmail_j432會返回一個隨機郵箱號
create table t_email_j432 (email varchar2(16));
insert into t_email_j432 values('126');
insert into t_email_j432 values('139');
insert into t_email_j432 values('sohu');
insert into t_email_j432 values('sina');
insert into t_email_j432 values('163');
insert into t_email_j432 values('foxmail');
insert into t_email_j432 values('qq');
insert into t_email_j432 values('qq');
insert into t_email_j432 values('qq');
insert into t_email_j432 values('qq');
insert into t_email_j432 values('qq');
insert into t_email_j432 values('qq');
insert into t_email_j432 values('qq');
insert into t_email_j432 values('qq');
insert into t_email_j432 values('qq');
insert into t_email_j432 values('qq');
insert into t_email_j432 values('qq');
insert into t_email_j432 values('qq');

create or replace function f_getemail_j432 return varchar2
is
getEmail varchar2(32);
em_name varchar2(16);
em_own varchar2(16);
begin
select substr(cast(dbms_random.value as varchar2(32)),3,11) into em_name from dual;
select email into em_own from (select email from t_email_j432 order by dbms_random.value) where rownum=1;
getEmail:=em_name||'@'||em_own||'.com';
return getEmail;
end;
/
show error

--7.4產生隨機生日
--   函式f_getBirthday_j432會返回一個在[19940101,19990701]時間內的日期
create or replace function f_getBirthday_j432 return date
is
birthday date;
begin
select to_date(trunc(dbms_random.value(2449354,2451186)),'J') into birthday from dual;
return birthday;
end;
/
show error
exec p_record_time_j432('性別、手機號、郵箱、出生日期等隨機函式生成完畢')

--7.5生成完整學生表除學號和姓名之外的所有資訊
--建立基本資訊模板表
create table t_sequence_id(
id number(10),
sex varchar2(32),
tel varchar2(32),
email varchar2(32),
birthday date);

--儲存大量的學生其他資訊,千萬級以上
drop table t_stud_other_information_j432;
create table t_stud_other_information_j432(
	id number(16),
	sex varchar2(32),
	tel varchar2(32),
	email varchar2(32),
	birthday date);

--生成1萬條資料模板
begin
delete from t_sequence_id;
for i in 0..9999 loop
insert into t_sequence_id(id, sex, tel, email, birthday) values(i,f_getsex_j432,f_gettel_j432,f_getemail_j432,f_getbirthday_j432);
end loop;
end;
/
--select * from t_sequence_id;

--每1萬條資料插入一次,得到12000000資料
begin
delete from t_stud_other_information_j432;
for i in 1..1200 loop
insert /*+ append */ into t_stud_other_information_j432(id, sex, tel, email,birthday) select i * 10000 +t_sequence_id.id as MSISDN,t_sequence_id.sex,t_sequence_id.tel,t_sequence_id.email,t_sequence_id.birthday from t_sequence_id;
commit;
end loop;
end;
/
exec p_record_time_j432('完成生成12000000條其他資訊生成')

exec p_record_time_j432('學生資訊表沒有主鍵,生成11520000條記錄,開始時間')
drop table t_student_j432;
create table t_student_j432(
	sno varchar2(10),--學生學號
	sname varchar2(32),
	sex varchar2(32),
	tel varchar2(32),
	email varchar2(32),
	birthday date);
--生成完整的學生資訊表:將學號,姓名和其他資訊,放在一起
insert into t_student_j432(sno,sname, sex, tel, email, birthday) select X.sno,A.sname, B.sex, B.tel, B.email, B.birthday from (select rownum rownum_X,sno from t_student_sno_ab_j432) X, (select rownum rownum_A,sname from t_stu_name_j432) A, (select rownum rownum_B,sex,tel,email,birthday from t_stud_other_information_j432) B where rownum_A = rownum_B and rownum_A = rownum_X;
exec p_record_time_j432('學生資訊表沒有主鍵,生成11520000條記錄,完成時間')

exec p_record_time_j432('學生資訊表有主鍵,生成11520000條記錄,開始時間')
drop table t_student_j432;
create table t_student_j432(
	sno varchar2(10) primary key,--學生學號
	sname varchar2(32),
	sex varchar2(32),
	tel varchar2(32),
	email varchar2(32),
	birthday date);
insert into t_student_j432(sno,sname, sex, tel, email, birthday) select X.sno,A.sname, B.sex, B.tel, B.email, B.birthday from (select rownum rownum_X,sno from t_student_sno_ab_j432) X, (select rownum rownum_A,sname from t_stu_name_j432) A, (select rownum rownum_B,sex,tel,email,birthday from t_stud_other_information_j432) B where rownum_A = rownum_B and rownum_A = rownum_X;
exec p_record_time_j432('學生資訊表有主鍵,生成11520000條記錄,完成時間')

--為學生表新增其他約束
alter table t_student_j432 add constraint ck_student_sex check(sex in('男','女','其它'));
alter table t_student_j432 add constraint ck_student_email check(email like '%@%.%');
alter table t_student_j432 add constraint ck_student_birthday check(birthday>=to_date('19940101','yyyymmdd') and birthday<=to_date('19990731','yyyymmdd'));
--隨機查詢100名同學資訊
set linesize 300;
set pagesize 1000;
col sno format a11;
col sname format a8;
col sex format a6;
col email format a26;
col tel format a12;
alter session set nls_date_format = 'yyyy-mm-dd';
select * from (select * from t_student_j432 order by dbms_random.value()) where rownum<=100;
select count(*) from t_student_j432;

col things format a64;
col time format a32;
select * from t_record_time_j432;
exec p_record_time_j432('沒有姓名索引,查詢一條姓名的開始時間')
select * from t_student_j432 where sname='周平環';
exec p_record_time_j432('沒有姓名索引,查詢一條姓名的結束時間')

exec p_record_time_j432('沒有姓名索引,查詢某一姓氏人數的開始時間')
select * from t_student_j432 where sname like '周%';
exec p_record_time_j432('沒有姓名索引,查詢某一姓氏人數的結束時間')

exec p_record_time_j432('沒有姓名索引,統計某一姓氏人數的開始時間')
select count(*) from t_student_j432 where sname like '周%';
exec p_record_time_j432('沒有姓名索引,統計某一姓氏人數的結束時間')

exec p_record_time_j432('沒有姓名索引,統計某一姓名第二個字相同人數的開始時間')
select count(*) from t_student_j432 where sname like '_平%';
exec p_record_time_j432('沒有姓名索引,統計某一姓名第二個字相同人數的結束時間')
--建立索引
drop index i_stu_sname_j432;
exec p_record_time_j432('開始建立姓名索引')
create index i_stu_sname_j432 on t_student_j432(sname);
exec p_record_time_j432('完成建立姓名索引')

exec p_record_time_j432('有姓名索引,查詢一條姓名,開始時間')
select * from t_student_j432 where sname='周平環';
exec p_record_time_j432('有姓名索引,查詢一條姓名,結束時間')

exec p_record_time_j432('有姓名索引,查詢某一姓氏人數的開始時間')
select * from t_student_j432 where sname like '周%';
exec p_record_time_j432('有姓名索引,查詢某一姓氏人數的結束時間')

exec p_record_time_j432('有姓名索引,統計某一姓氏人數的開始時間')
select count(*) from t_student_j432 where sname like '周%';
exec p_record_time_j432('有姓名索引,統計某一姓氏人數的結束時間')

exec p_record_time_j432('有姓名索引,統計某一姓名第二個字相同人數的開始時間')
select count(*) from t_student_j432 where sname like '_平%';
exec p_record_time_j432('有姓名索引,統計某一姓名第二個字相同人數的結束時間')
exec p_record_time_j432('沒有分割槽,按學號首位ID統計人數的開始時間')
select count(*) from t_student_j432 where sno like '5%';
exec p_record_time_j432('沒有分割槽,按學號首位ID統計人數的結束時間')

exec p_record_time_j432('沒有分割槽,按專業統計人數的開始時間')
select count(*) from t_student_j432 where sno like '______01%';
exec p_record_time_j432('沒有分割槽,按專業統計人數的結束時間')

--重新建立學生表,並分割槽
drop table t_student_j432;
create table t_student_j432(
	sno varchar2(10) primary key,--學生學號
	sname varchar2(32),
	sex varchar2(32),
	tel varchar2(32),
	email varchar2(32),
	birthday date)partition by range(sno)(
partition part_0 values less than ('1000160000'),
partition part_1 values less than ('2000160000'),
partition part_2 values less than ('3000160000'),
partition part_3 values less than ('4000160000'),
partition part_4 values less than ('5000160000'),
partition part_5 values less than ('6000160000'),
partition part_6 values less than ('7000160000'),
partition part_7 values less than ('8000160000'),
partition part_8 values less than ('9000160000'),
partition part_9 values less than (maxvalue));
insert into t_student_j432(sno,sname, sex, tel, email, birthday) select X.sno,A.sname, B.sex, B.tel, B.email, B.birthday from (select rownum rownum_X,sno from t_student_sno_ab_j432) X, (select rownum rownum_A,sname from t_stu_name_j432) A, (select rownum rownum_B,sex,tel,email,birthday from t_stud_other_information_j432) B where rownum_A = rownum_B and rownum_A = rownum_X;
--為學生表新增其他約束
alter table t_student_j432 add constraint ck_student_sex check(sex in('男','女','其它'));
alter table t_student_j432 add constraint ck_student_email check(email like '%@%.%');
alter table t_student_j432 add constraint ck_student_birthday check(birthday>=to_date('19940101','yyyymmdd') and birthday<=to_date('19990731','yyyymmdd'));

exec p_record_time_j432('有分割槽,按學號首位ID統計人數的開始時間')
select count(*) from t_student_j432 where sno like '5%';
exec p_record_time_j432('有分割槽,按學號首位ID統計人數的結束時間')

exec p_record_time_j432('有分割槽,按專業統計人數的開始時間')
select count(*) from t_student_j432 where sno like '______01%';
exec p_record_time_j432('有分割槽,按專業統計人數的結束時間')

col things format a64;
col time format a32;
select * from t_record_time_j432 order by time;
spool off;