-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmissing_table_dirs.hql
64 lines (61 loc) · 2.4 KB
/
missing_table_dirs.hql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
-- Hive Migration Scripts will fail against tables without supporting
-- filesystem directories.
--
-- List all distinct Tbl and Partition Directories
-- Used the resulting 'hdfs_path' items to validate existence.
--
-- With this list, find the missing directories and do
-- one or the following:
--
-- 1. Create the missing directory (Hive needs RWX permissions on the directory)
-- 2. Remove the Table Schema
--
--
-- Variables:
-- DB - The database you placed the hms dump table.
-- ENV - IE: dev,qa,prod. Used to support multiple
-- environment dump files in the same database.
USE ${DB};
WITH TBL_LOCATIONS AS (
SELECT
db_name
, tbl_name
, tbl_type
, NULL AS part_name
, concat('test ', '-e ',
regexp_extract(tbl_location, 'hdfs://([^/]+)(.*)', 2)) AS hdfs_path_check
, count(1)
FROM
hms_dump_${ENV}
WHERE
part_name IS NULL
AND tbl_type != 'VIRTUAL_VIEW'
GROUP BY db_name, tbl_name, tbl_type, part_name
, regexp_extract(tbl_location, 'hdfs://([^/]+)(.*)', 2)
)
, PART_LOCATIONS AS (
SELECT
db_name
, tbl_name
, tbl_type
, part_name
, concat('test ', '-e ',
regexp_extract(part_location, 'hdfs://([^/]+)(.*)', 2)) AS hdfs_path_check
, count(1)
FROM
hms_dump_${ENV}
WHERE
part_name IS NOT NULL
AND tbl_type != 'VIRTUAL_VIEW'
GROUP BY db_name, tbl_name, tbl_type, part_name
, regexp_extract(part_location, 'hdfs://([^/]+)(.*)', 2)
)
SELECT
hdfs_path_check
FROM
TBL_LOCATIONS
UNION ALL
SELECT
hdfs_path_check
FROM
PART_LOCATIONS;