shell 指令碼多程序建立 mysql 測試資料
# ------------------------------------------------------------------
#建立 taskNum * perFileRecord 條測試資料,taskNum 為程序併發數,最好與你的cpu個數一致
#資料欄位:| id | sex | age | name | email | start_date | end_date |
#id int 唯一,sex enum,age 0~100,name、email基本唯一,start_date 0~1000天前的日期
#!/usr/bin/env bash # numProcessor=`grep "processor" /proc/cpuinfo | wc -l` # genfile.sh stime=$(date +%s) taskNum=$1 perFileRecord=$2 offset=$3 colorEcho(){ echo -e "e[31;1m$1e[0m" } usage(){ colorEcho " Usage:" colorEcho " bash fileName taskNum(1-99) perFileRecord(>0) [offset(>0)]" exit 1 } initEnv(){ [[ "$1 $2 $3" =~ ^[0-9]{1,2}" "[0-9]{1,} ]] || usage rm -f [0-9]*.txt } createRecord(){ start=$1 end=$2 for i in `seq -f%.10g $start $end` do id=$i sex=`echo $i|awk '{print ($0%2?"male":"female")}'` age=`echo $(($RANDOM/500))` emailName=`head -c8 /proc/sys/kernel/random/uuid` #emailName=`openssl passwd -stdin <<<""|head -c8` name=$emailName rand1000=`echo $(($RANDOM/50))` rand100=$age rand900=`echo "$rand1000 $rand100"|awk '{s=$1-$2;print (s>0?$2:0)}'` start_date=`date -d"$rand1000 days ago" +%Y%m%d` end_date=`date -d"$rand900 days ago" +%Y%m%d` echo -e "[email protected]$start_datet$end_date" done } process(){ for i in `seq $taskNum` do st=`echo "$i $perFileRecord $offset"|awk '{print ($1-1)*$2+1+$3}'` ed=`echo "$i $perFileRecord $offset"|awk '{print $1*$2+$3}'` echo $st $ed createRecord $st $ed > $i.txt & done wait } finally(){ cat `seq $taskNum|sed 's/.*/&.txt/'` > record_`date -I`.txt echo "————————————————————————————————————————————————————————————" file=record_`date -I`.txt line=`echo $taskNum $perFileRecord|awk '{print $1*$2}'` size=`ls -hl $file |awk '{print $5}'` etime=$(date +%s) diffTime=$(($etime-$stime)) echo "--------------->> FileName: $file" echo "--------------->> FileLine: $line" echo "--------------->> FileSize: $size" echo "--------------->> CostTime: $diffTime" echo "————————————————————————————————————————————————————————————" } initEnv $taskNum $perFileRecord $offset process finally
#!/usr/bin/env bash # mgr.sh param1=$1 param2=$2 fileName=record_`date -I`.txt colorEcho(){ echo -e "e[31;1m$1e[0m" } usage(){ colorEcho " Usage:" colorEcho " bash fileName [-c int(1-99)|-k processName]" exit 1 } cpMergeFiles(){ rm -f $fileName"_"* for i in `seq $param2` do cp $fileName $fileName"_"$i & done wait cat ${fileName}"_"*|awk -vOFS="t" '{$1=NR;$0=$0;print}' > $fileName"_merge" echo "copy finished, and FileName is: "$fileName"_merge" } killTask(){ echo "$param2 is number: "`ps -ef|grep $param2|wc -l` echo "will kill..." pkill -9 -f $param2 # here has a bug ... 2 cmds will not exec... # Because "bash mgr.sh -k genfile.sh" will kill itself... echo "kill finished..." echo "$param2 is number: "`ps -ef|grep $param2|grep -v grep|wc -l` } execTask(){ if [[ $param1 == "-c" && $param2 =~ [0-9]{1,2} ]] then cpMergeFiles elif [[ $param1 == "-k" && $param2 =~ .{1,} ]] then killTask else usage fi } execTask
在一臺 2 核的機器上測試結果如下:
# ------------------------------------------------------------------
#建表,儲存引擎請自己指定
create table t_test( id int, sex enum('male','female'), age tinyint unsigned, name char(8), email varchar(16), start_date int, end_date int )ENGINE=InnoDB DEFAULT CHARSET=UTF8 AUTO_INCREMENT=1;
# ------------------------------------------------------------------
#載入資料:
use test;
LOAD DATA LOCAL INFILE "/root/record.txt" INTO TABLE t_test fields terminated by ' ' lines terminated by 'n';
# mysqlimport -f -proot -uroot -h127.0.0.1 --fields-terminated-by='t' database_name /home/Jun/scripts/table_name.txt
# ------------------------------------------------------------------
#1、單純的刪掉主鍵屬性,不刪欄位:
alter table t_test modify id int ,drop PRIMARY key;
#2、設定已有欄位為主鍵、自增屬性:
alter table t_test change id id int primary key auto_increment;
#3、建立聯合索引:
alter table t_test add index id_sex_age (id,sex,age);
#4、刪除聯合索引:
alter table t_test drop index id_sex_age;
#5、檢視索引:
show index from t_test;
# ------------------------------------------------------------------
#建立關聯表
create table t_test1 as select * from t_test order by rand() limit 1000;
# ------------------------------------------------------------------
#最終資料如下
mysql> select * from t_test limit 5;
+------+--------+------+----------+-----------------+------------+----------+
| id | sex | age | name | email | start_date | end_date |
+------+--------+------+----------+-----------------+------------+----------+
| 1 | male | 52 | e67bcd5f | [email protected] | 20101120 | 20110111 |
| 2 | female | 2 | cc92a107 | [email protected] | 20120123 | 20120125 |
| 3 | male | 35 | 2ec6b1d8 | [email protected] | 20120406 | 20120511 |
| 4 | female | 31 | 4c67aade | [email protected] | 20120302 | 20120402 |
| 5 | male | 19 | b518a6a3 | [email protected] | 20120814 | 20120830 |
+------+--------+------+----------+-----------------+------------+----------+
5 rows in set (0.00 sec)
mysql>
附另外一種思路:
#如下是Mysql隨機產生的R、F和M三個指標的數值程式碼,共產生10萬行的虛擬記錄:
#建立資料庫
create database sample;
use sample;
#建立表
drop table if exists data_rfm;
create table data_rfm(
id int unsigned primary key auto_increment,
Recency mediumint unsigned,
Frequency smallint unsigned,
Monetary decimal(10,2)
);
#刪除是否存在的儲存過程
drop procedure if exists randnum_p;
#變換SQL語句的結束符
d //
#建立儲存過程
create procedure randnum_p(n int)
begin
declare i int default 1;
declare Recency mediumint default 0;
declare Frequency smallint default 0;
declare Monetary decimal(10,2) default 0.00;
while i <= n do
set autocommit = 0;
set Recency = floor(1+365*rand());
set Frequency = floor(1+150*rand());
set Monetary = floor(200+3500*rand());
insert into data_rfm
(id,Recency,Frequency,Monetary)
values
(null,Recency,Frequency,Monetary);
set i = i+1;
end while;
commit;
end//
#重新變換SQL語句的結束符
d ;
#呼叫儲存過程
call randnum_p(100000);
推薦閱讀:
[1] 在Java中如何偽造資料
http://blog.sina.com.cn/s/blog_e59371cc0102v34x.html
[2] 實戰: RFM模型使用,MySQL 儲存過程偽造資料