HDP 3.1 平台,把csv文件动态load到hive分区表
- 上传csv文件到HDFS
[d0pang@hadoop1 ~]$ hdfs dfs -put visit.csv /user/d0pang/
- 创建临时表visit_tmp
因为csv文件第一行为标题行,建表要增加设置TBLPROPERTIES(“skip.header.line.count”=”1”)
drop table visit_tmp; //创建之前先删除
CREATE TABLE `visit_tmp`(
`visit_nbr` int,
`store_nbr` smallint,
`visit_date` date,
`tot_visit_amt` decimal(19,2),
`tot_retail_price` decimal(19,2))
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE
TBLPROPERTIES("skip.header.line.count"="1");
- 创建正式表visit
CREATE TABLE `visit`(
`visit_nbr` int,
`store_nbr` smallint,
`visit_date` date,
`tot_visit_amt` decimal(19,2),
`tot_retail_price` decimal(19,2)
)partitioned by (ts date) stored as orcfile;
- 进入beeline, 把csv文件先load到临时表visit_tmp
load data inpath '/user/d0pang/visit.csv' overwrite into table d0pang_crypto.visit_tmp;
- 进入beeline, 把临时表数据转入正式表
//设置动态分区参数
set hive.exec.dynamic.partition.mode=nonstrict;
SET hive.exec.max.dynamic.partitions=100000;
SET hive.exec.max.dynamic.partitions.pernode=100000;
//插入数据到正式表
INSERT OVERWRITE TABLE visit PARTITION (ts) SELECT t1.*,t1.visit_date from visit_tmp t1;
//查看分区信息
show partitions visit;
Leave a Reply