-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathhms_dump_create.hql
99 lines (89 loc) · 3.6 KB
/
hms_dump_create.hql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
-- Table Schema for the External HMS DUMP output from the
-- sqoop job.
--
-- Ensure the 'target-hdfs-dir' specified in the sqoop script matches
-- the default location for the table.
--
-- IE: target-hdfs-dir /warehouse/tablespace/external/hive/${DB}.db/hms_dump_${ENV}
--
-- This script should be run AFTER the sqoop job has run so that the sqoop job
-- can create the directory.
--
-- Variables:
-- DB - The database you placed the hms dump table.
-- ENV - IE: dev,qa,prod. Used to support multiple
-- environment dump files in the same database.
CREATE DATABASE IF NOT EXISTS ${DB};
USE ${DB};
CREATE EXTERNAL TABLE IF NOT EXISTS hms_dump_${ENV} (
DB_NAME STRING,
DB_DEFAULT_LOC STRING,
DB_OWNER STRING,
TBL_ID STRING,
TBL_NAME STRING,
TBL_OWNER STRING,
TBL_TYPE STRING,
TBL_INPUT_FORMAT STRING,
TBL_OUTPUT_FORMAT STRING,
TBL_LOCATION STRING,
TBL_NUM_BUCKETS STRING,
TBL_SERDE_SLIB STRING,
TBL_PARAM_KEY STRING,
TBL_PARAM_VALUE STRING,
PART_ID STRING,
PART_NAME STRING,
PART_INPUT_FORMAT STRING,
PART_OUTPUT_FORMAT STRING,
PART_LOCATION STRING,
PART_NUM_BUCKETS STRING,
PART_SERDE_SLIB STRING,
TBL_CREATE_TIME STRING
)
ROW FORMAT DELIMITED NULL DEFINED AS '\002'
STORED AS TEXTFILE
LOCATION '${EXTERNAL_WAREHOUSE_DIR}/${DB}.db/hms_dump_${ENV}'
TBLPROPERTIES (
"external.table.purge" = "true");
CREATE EXTERNAL TABLE IF NOT EXISTS dir_size_${ENV} (
num_of_folders INT, num_of_files INT, size BIGINT, directory STRING
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE LOCATION '${EXTERNAL_WAREHOUSE_DIR}/${DB}.db/dir_size_${ENV}' TBLPROPERTIES ( "external.table.purge" = "true" );
CREATE EXTERNAL TABLE IF NOT EXISTS paths_${ENV} (
path STRING
) PARTITIONED BY (section STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE LOCATION '${EXTERNAL_WAREHOUSE_DIR}/${DB}.db/paths_${ENV}' TBLPROPERTIES ( "external.table.purge" = "true" );
-- Add static partition to store managed table directories where we found delta records
ALTER TABLE paths_${ENV}
ADD IF NOT EXISTS PARTITION (section = "managed_deltas");
CREATE TABLE IF NOT EXISTS known_serdes_${ENV} (
serde_name STRING
);
INSERT INTO TABLE
known_serdes_${ENV} (serde_name)
VALUES ("org.apache.hadoop.hive.ql.io.orc.OrcSerde")
, ("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")
, ("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe")
, ("org.apache.hadoop.hive.hbase.HBaseSerDe")
, ("org.apache.hive.storage.jdbc.JdbcSerDe")
, ("org.apache.hadoop.hive.druid.DruidStorageHandler")
, ("org.apache.phoenix.hive.PhoenixStorageHandler")
, ("org.apache.hadoop.hive.serde2.avro.AvroSerDe")
, ("org.apache.hadoop.hive.serde2.RegexSerDe")
, ("parquet.hive.serde.ParquetHiveSerDe")
, ("org.apache.hadoop.hive.serde2.OpenCSVSerde")
, ("org.apache.hive.hcatalog.data.JsonSerDe");
CREATE EXTERNAL TABLE IF NOT EXISTS missing_dirs_${ENV} (
hdfs_loc STRING
)
ROW FORMAT DELIMITED
STORED AS TEXTFILE
LOCATION '${EXTERNAL_WAREHOUSE_DIR}/${DB}.db/missing_dirs_${ENV}'
TBLPROPERTIES (
"external.table.purge" = "true");
CREATE TABLE IF NOT EXISTS hive_stats_count_${ENV} (
TOTAL_DBS INT,
TOTAL_TABLES INT,
TOTAL_MANAGED_TABLES INT,
TOTAL_MANAGED_TABLES_APPSHIVE INT,
TOTAL_MANAGED_TABLES_USERHIVE INT,
TOTAL_EXTERNAL_TABLES INT,
TOTAL_VIRTUAL_VIEWS INT
);