1. 程式人生 > 實用技巧 >datax將mysql資料匯入hive表

datax將mysql資料匯入hive表

環境:CDH 5.12.1版本 ,mysql 5.7

1、mysql表結構

2、mysql表資料(user)

3、下載datax

wget http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz

4、在datax的job目錄編寫一個mysql2hive.json檔案

a) 下面是全量匯入

{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "mysqlreader",
                    
"parameter": { "column": [ "id", "name", "age", "create_time" ], "connection": [ {
"jdbcUrl": [ "jdbc:mysql://192.168.75.101:3306/test" ], "table": [ "user" ] } ],
"password": "yang156122", "username": "root", "where": "" } }, "writer": { "name": "hdfswriter", "parameter": { "column": [ { "name": "id", "type": "INT" }, { "name": "name", "type": "STRING" }, { "name": "age", "type": "INT" }, { "name": "create_time", "type": "TIMESTAMP" } ], "compress": "gzip", "defaultFS": "hdfs://192.168.75.101:8020", "fieldDelimiter": "\t", "fileName": "user", "fileType": "text", "path": "/user/datax/data/ceshi", "writeMode": "append" } } } ], "setting": { "speed": { "channel": "1" } } } }
View Code

b) 下面是按指定的時間,增量匯入

{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "mysqlreader",
                    "parameter": {
                        "column": [
                            "id",
                            "name",
                            "age",
                            "create_time"
                        ],
                        "connection": [
                            {
                                "jdbcUrl": [
                                    "jdbc:mysql://192.168.75.101:3306/test"
                                ],
                                "table": [
                                    "user"
                                ]
                            }
                        ],
                        "password": "yang156122",
                        "username": "root",
                        "where": "create_time >= '2020-10-21'"
                    }
                },
                "writer": {
                    "name": "hdfswriter",
                    "parameter": {
                        "column": [
                            {
                                "name": "id",
                                "type": "INT"
                            },
                            {
                                "name": "name",
                                "type": "STRING"
                            },
                            {
                                "name": "age",
                                "type": "INT"
                            },
                            {
                                "name": "create_time",
                                "type": "TIMESTAMP"
                            }
                        ],
                        "compress": "gzip",
                        "defaultFS": "hdfs://192.168.75.101:8020",
                        "fieldDelimiter": "\t",
                        "fileName": "user",
                        "fileType": "text",
                        "path": "/user/datax/data/ceshi",
                        "writeMode": "append"
                    }
                }
            }
        ],
        "setting": {
            "speed": {
                "channel": "1"
            }
        }
    }
}
View Code

c)動態傳參,增量匯入(推薦看這個)

{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "mysqlreader",
                    "parameter": {
                        "column": [
"id",
"name",
                            "age",
                            "create_time"
                        ],
                        "connection": [
                            {
                                "jdbcUrl": [
                                    "jdbc:mysql://192.168.75.101:3306/test"
                                ],
                                "table": [
                                    "user"
                                ]
                            }
                        ],
                        "password": "yang156122",
                        "username": "root",
                        "where": "create_time >= '$date'"
                    }
                },
                "writer": {
                    "name": "hdfswriter",
                    "parameter": {
                        "column": [
                            {
                                "name": "id",
                                "type": "INT"
                            },
                            {
                                "name": "name",
                                "type": "STRING"
                            },
                            {
                                "name": "age",
                                "type": "INT"
                            },
                            {
                                "name": "create_time",
                                "type": "TIMESTAMP"
                            }
                        ],
                        "compress": "gzip",
                        "defaultFS": "hdfs://192.168.75.101:8020",
                        "fieldDelimiter": "\t",
                        "fileName": "user",
                        "fileType": "text",
                        "path": "/user/datax/data/ceshi",
                        "writeMode": "append"
                    }
                }
            }
        ],
        "setting": {
            "speed": {
                "channel": "1"
            }
        }
    }
}

5、建立hive的表

drop table if exists default.user;
create table default.user(id INT, name STRING , age INT , create_time TIMESTAMP
)ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

6、如果是增量匯入(包括動態傳參),每執行一次datax,都要進行load data

load data inpath '/user/datax/data/ceshi' into table default.user ;

7、這一步,僅針對動態傳參,增量匯入(可以忽略步驟6),vim start.sh

#! /bin/bash
echo "獲取前一天的時間,時間格式為2020-10-21" a
=`date -d yesterday -u +%Y-%m-%d` echo "開始啦" python /root/data/soft/datax/datax/bin/datax.py -p "-Ddate=${a}" /root/data/soft/datax/datax/job/mysql2hive.json sleep 10 echo "開始將資料入hive表" hive -e "load data inpath '/user/datax/data/ceshi' into table default.user;"

僅供參考.....如有問題,請留言....