Wednesday, April 3, 2019

Managing Main and Stage table records using HIVE


1. Create Main table:-

CREATE TABLE kiran.main_table
(
name string,
age string,
city string,
date_ string
)
ROW FORMAT DELIMITED FIELDS
TERMINATED BY '\u0005';

2. Insert few records into main table:-

insert into kiran.main_table (name, age, city, date_) VALUES ("kiran", "37", "Hyd", "12-12-2012");
insert into kiran.main_table (name, age, city, date_) VALUES ("teja", "27", "Chennai", "02-12-2012");
insert into kiran.main_table (name, age, city, date_) VALUES ("avvaru", "17", "Bangalore", "06-12-2012");

+------------------+-----------------+------------------+-------------------+--+
| main_table.name  | main_table.age  | main_table.city  | main_table.date_  |
+------------------+-----------------+------------------+-------------------+--+
| kiran            | 37              | Hyd              | 12-12-2012        |
| teja             | 27              | Chennai          | 02-12-2012        |
| avvaru           | 17              | Bangalore        | 06-12-2012        |
+------------------+-----------------+------------------+-------------------+--+

3.Create Stage table:-

CREATE TABLE kiran.stage_table
(
name string,
age string,
city string
)
ROW FORMAT DELIMITED FIELDS
TERMINATED BY '\u0005';

4. Insert few records into table:-

insert into kiran.stage_table (name, age, city) VALUES ("Sree", "17", "Hyd");
insert into kiran.stage_table (name, age, city) VALUES ("murari", "47", "Chennai");
insert into kiran.stage_table (name, age, city) VALUES ("avvaru", "17", "Bangalore");

+-------------------+------------------+-------------------+--+
| stage_table.name  | stage_table.age  | stage_table.city  |
+-------------------+------------------+-------------------+--+
| Sree              | 17               | Hyd               |
| murari            | 47               | Chennai           |
| avvaru            | 17               | Bangalore         |
+-------------------+------------------+-------------------+--+


5. Insert Non Duplicate from stage to main:
INSERT INTO TABLE kiran.main_table SELECT c.name, c.age, c.city FROM kiran.stage_table c LEFT OUTER JOIN
(SELECT c.name, c.age, c.city FROM kiran.stage_table c INNER JOIN kiran.main_table o ON (o.name = c.name) AND (o.age = c.age) AND (o.city = c.city)) k
WHERE k.name != c.name;

6. Find Duplicates from Stage:-
SELECT c.name, c.age, c.city FROM kiran.stage_table c INNER JOIN kiran.main_table o ON (o.name = c.name) WHERE (o.age = c.age) AND (o.city = c.city);

7. Find Non - Duplicates from Stage:-
SELECT c.name, c.age, c.city FROM kiran.stage_table c LEFT OUTER JOIN (SELECT c.name, c.age, c.city FROM kiran.stage_table c INNER JOIN kiran.main_table o ON (o.name = c.name)) k WHERE k.name != c.name;