1. Create Main table:-
CREATE TABLE kiran.main_table
(
name string,
age string,
city string,
date_ string
)
ROW FORMAT DELIMITED FIELDS
TERMINATED BY '\u0005';
2. Insert few records into main table:-
insert into kiran.main_table (name, age, city, date_) VALUES ("kiran", "37", "Hyd", "12-12-2012");
insert into kiran.main_table (name, age, city, date_) VALUES ("teja", "27", "Chennai", "02-12-2012");
insert into kiran.main_table (name, age, city, date_) VALUES ("avvaru", "17", "Bangalore", "06-12-2012");
+------------------+-----------------+------------------+-------------------+--+
| main_table.name | main_table.age | main_table.city | main_table.date_ |
+------------------+-----------------+------------------+-------------------+--+
| kiran | 37 | Hyd | 12-12-2012 |
| teja | 27 | Chennai | 02-12-2012 |
| avvaru | 17 | Bangalore | 06-12-2012 |
+------------------+-----------------+------------------+-------------------+--+
3.Create Stage table:-
CREATE TABLE kiran.stage_table
(
name string,
age string,
city string
)
ROW FORMAT DELIMITED FIELDS
TERMINATED BY '\u0005';
4. Insert few records into table:-
insert into kiran.stage_table (name, age, city) VALUES ("Sree", "17", "Hyd");
insert into kiran.stage_table (name, age, city) VALUES ("murari", "47", "Chennai");
insert into kiran.stage_table (name, age, city) VALUES ("avvaru", "17", "Bangalore");
+-------------------+------------------+-------------------+--+
| stage_table.name | stage_table.age | stage_table.city |
+-------------------+------------------+-------------------+--+
| Sree | 17 | Hyd |
| murari | 47 | Chennai |
| avvaru | 17 | Bangalore |
+-------------------+------------------+-------------------+--+
5. Insert Non Duplicate from stage to main:
INSERT INTO TABLE kiran.main_table SELECT c.name, c.age, c.city FROM kiran.stage_table c LEFT OUTER JOIN
(SELECT c.name, c.age, c.city FROM kiran.stage_table c INNER JOIN kiran.main_table o ON (o.name = c.name) AND (o.age = c.age) AND (o.city = c.city)) k
WHERE k.name != c.name;
6. Find Duplicates from Stage:-
SELECT c.name, c.age, c.city FROM kiran.stage_table c INNER JOIN kiran.main_table o ON (o.name = c.name) WHERE (o.age = c.age) AND (o.city = c.city);
7. Find Non - Duplicates from Stage:-
SELECT c.name, c.age, c.city FROM kiran.stage_table c LEFT OUTER JOIN (SELECT c.name, c.age, c.city FROM kiran.stage_table c INNER JOIN kiran.main_table o ON (o.name = c.name)) k WHERE k.name != c.name;