create table log_orc( track_time string, url string, session_id string, referer string, ip string, end_user_id string, city_id string )ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS orc ;
HIVE的压缩格式
在mapper输出与reducer输出阶段进行压缩
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
set hive.exec.compress.intermediate=true; set mapreduce.map.output.compress=true;
set mapreduce.map.output.compress.codec=org.apache.hadoop.io.compress.SnappyCodec;
--reduce set hive.exec.compress.output=true; --最终输出 set mapreduce.output.fileoutputformat.compress=true; set mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.SnappyCodec; -- 设置为块压缩 set mapreduce.output.fileoutputformat.compress.type=BLOCK;
创建一个snappy压缩的orc存储方式的表
1 2 3 4 5 6 7 8 9 10
create table log_orc_snappy( track_time string, url string, session_id string, referer string, ip string, end_user_id string, city_id string )ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS orc tblproperties ("o rc.compress"="SNAPPY");