SlideShare a Scribd company logo
1 of 7
Nag Arvind Gudiseva
1
HIVE PERFORMANCE OPTIMIZATIONS
SERDE
ORIGINAL
CREATE TABLE IF NOT EXISTS cand_sr.cand_sr_note_nda_detail (RUN_ID BIGINT, GUID STRING,
CIP_COLLECTION_ID BIGINT)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY 't'
LINES TERMINATED BY 'n'
STORED AS TEXTFILE;
OPTIMISED
CREATE TABLE IF NOT EXISTS cand_sr.cand_sr_note_nda_detail (RUN_ID BIGINT, GUID STRING,
CIP_COLLECTION_ID BIGINT)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
OCTAL
ORIGINAL
CREATE TABLE IF NOT EXISTS cand_sr.cand_sr_note_nda_detail (RUN_ID BIGINT, GUID STRING,
CIP_COLLECTION_ID BIGINT)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '<FSP>'
LINES TERMINATED BY '<RSP>'
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
Nag Arvind Gudiseva
2
OPTIMISED
CREATE TABLE employees (name STRING, salary FLOAT, subordinates ARRAY<STRING>, deductions
MAP<STRING, FLOAT>, address STRUCT<street: STRING, city: STRING, state: STRING, zip: INT>)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '001'
COLLECTION ITEMS TERMINATED BY '002'
MAP KEYS TERMINATED BY '003'
LINES TERMINATED BY 'n'
STORED AS TEXTFILE;
INTERMEDIATE TABLES, BUCKETING AND COMPRESSION
ORIGINAL
INSERT OVERWRITE TABLE NDA_CNR_DEVICE_JOIN_MAP_FH PARTITION (CIP_COLLECTION_ID,
ELEMENT_TYPE)
SELECT T1.CIP_DEVICE_ID AS D2,
T2.CIP_DEVICE_ID AS D1,
T2.DV_CNT_BY_GRP,
T2.FH_LVL_GRP,
T2.FAMILY,
T2.PLATFORM,
T2.ROLE,
T2.LEVEL_1,
T2.LEVEL_2,
T2.LEVEL_3,
T2.IS_POLICY,
T2.IS_GRP_POLICY,
T2.ELEMENT,
T1.CIP_COLLECTION_ID,
Nag Arvind Gudiseva
3
T2.ELEMENT_TYPE
FROM NDA_POLICY_DV_DETAILS_FH T1 JOIN NDA_CNR_DEVICE_ELEMENT_MAP_FH T2
ON (T1.FAMILY = T2.FAMILY
AND T1.PLATFORM = T2.PLATFORM
AND T1.ROLE = T2.ROLE
AND T1.CIP_COLLECTION_ID=T2.CIP_COLLECTION_ID)
WHERE ( T1.CIP_COLLECTION_ID=255 AND T2.ELEMENT_TYPE='FTS')
OPTIMISED
set hive.enforce.bucketing=true
Set hive.enforce.sorting=true
CREATE TABLE IF NOT EXISTS NDA_POLICY_DV_DETAILS_FH_TEMP (
FAMILY STRING,
PLATFORM STRING,
ROLE STRING,
D2 BIGINT,
CIP_COLLECTION_ID BIGINT)
CLUSTERED BY (FAMILY, PLATFORM, ROLE)
SORTED BY (FAMILY, PLATFORM, ROLE)
INTO 50 BUCKETS
STORED AS ORC tblproperties ("orc.compress"="SNAPPY");
INSERT OVERWRITE TABLE NDA_POLICY_DV_DETAILS_FH_TEMP
SELECT T1.FAMILY,
T1.PLATFORM,
T1.ROLE,
T1.CIP_DEVICE_ID AS D2,
Nag Arvind Gudiseva
4
T1.CIP_COLLECTION_ID
FROM NDA_POLICY_DV_DETAILS_FH T1
WHERE ( T1.CIP_COLLECTION_ID=‘3159’)
CLUSTER BY T1.FAMILY, T1.PLATFORM, T1.ROLE;
CREATE TABLE IF NOT EXISTS NDA_CNR_DEVICE_ELEMENT_MAP_FH_TEMP (
FAMILY STRING,
PLATFORM STRING,
ROLE STRING,
D1 BIGINT,
DV_CNT_BY_GRP BIGINT,
FH_LVL_GRP STRING,
LEVEL_1 STRING,
LEVEL_2 STRING,
LEVEL_3 STRING,
IS_POLICY STRING,
IS_GRP_POLICY STRING,
ELEMENT STRING,
ELEMENT_TYPE STRING)
CLUSTERED BY (FAMILY, PLATFORM, ROLE)
SORTED BY (FAMILY, PLATFORM, ROLE)
INTO 50 BUCKETS
STORED AS ORC tblproperties ("orc.compress"="SNAPPY");
INSERT OVERWRITE TABLE NDA_CNR_DEVICE_ELEMENT_MAP_FH_TEMP
SELECT T2.FAMILY,
T2.PLATFORM,
T2.ROLE,
T2.CIP_DEVICE_ID AS D1,
Nag Arvind Gudiseva
5
T2.DV_CNT_BY_GRP,
T2.FH_LVL_GRP,
T2.LEVEL_1,
T2.LEVEL_2,
T2.LEVEL_3,
T2.IS_POLICY,
T2.IS_GRP_POLICY,
T2.ELEMENT,
T2.ELEMENT_TYPE
FROM NDA_CNR_DEVICE_ELEMENT_MAP_FH T2
WHERE ( T2.CIP_COLLECTION_ID='3159' AND T2.ELEMENT_TYPE='FTS')
CLUSTER BY T2.FAMILY, T2.PLATFORM, T2.ROLE;
CREATE TABLE IF NOT EXISTS NDA_CNR_DEVICE_JOIN_MAP_FH (
D2 BIGINT,
D1 BIGINT,
DV_CNT_BY_GRP BIGINT,
FH_LVL_GRP STRING,
FAMILY STRING,
PLATFORM STRING,
ROLE STRING,
LEVEL_1 STRING,
LEVEL_2 STRING,
LEVEL_3 STRING,
IS_POLICY STRING,
IS_GRP_POLICY STRING,
ELEMENT STRING)
PARTITIONED BY (CIP_COLLECTION_ID BIGINT, ELEMENT_TYPE STRING);
Nag Arvind Gudiseva
6
INSERT OVERWRITE TABLE NDA_CNR_DEVICE_JOIN_MAP_FH PARTITION (CIP_COLLECTION_ID,
ELEMENT_TYPE)
SELECT T1.D2,
T2.D1,
T2.DV_CNT_BY_GRP,
T2.FH_LVL_GRP,
T2.FAMILY,
T2.PLATFORM,
T2.ROLE,
T2.LEVEL_1,
T2.LEVEL_2,
T2.LEVEL_3,
T2.IS_POLICY,
T2.IS_GRP_POLICY,
T2.ELEMENT,
T1.CIP_COLLECTION_ID,
T2.ELEMENT_TYPE
FROM NDA_POLICY_DV_DETAILS_FH_TEMP T1 JOIN NDA_CNR_DEVICE_ELEMENT_MAP_FH_TEMP T2
ON (T1.FAMILY = T2.FAMILY
AND T1.PLATFORM = T2.PLATFORM
AND T1.ROLE = T2.ROLE);
SET COMMANDS
Use the below SET commands judiciously whererequired:
Set mapred.max.split.size=10857600;
Set mapred.min.split.size=10857600;
Set mapred.map.tasks=50;
Set mapred.reduce.tasks=100;
Nag Arvind Gudiseva
7
Set mapred.job.map.memory.mb=5120;
Set mapred.job.reduce.memory.mb=5120;
Set mapred.map.child.java.opts=-Xmx4096m;
Set mapred.reduce.child.java.opts=-Xmx4096m;
Set io.sort.mb=1000;
Set mapred.reduce.tasks=50;
Set hive.mapred.reduce.tasks.speculative.execution=false;
Set hive.auto.convert.join=false
Set hive.rpc.query.plan=true
OTHER OPTIMIZATIONS
APACHE TEZ
Supported by Hortonworks distribution. Other vendors likeCloudera,MapR, etc. have emphasised more on
Spark. Note: Spark 2.0 is undergoingmajor transformation with syntactical changes.
APACHE PIG
Pigis a data flow scriptinglanguage. It’s best Use Caseis for ETL operations. Use Pig for creating data
pipelines and Hivefor final longrunningbatch queries.

More Related Content

Viewers also liked

Ecc Hungary introduction ADR bodies
Ecc Hungary introduction ADR bodiesEcc Hungary introduction ADR bodies
Ecc Hungary introduction ADR bodiesconsumerenergy
 
Монтаж черепицы icopal
Монтаж черепицы icopalМонтаж черепицы icopal
Монтаж черепицы icopalAl Maks
 
The Expat's Way Principles
The Expat's Way PrinciplesThe Expat's Way Principles
The Expat's Way PrinciplesAneesah Bakker
 
bakalarka pro moji babicku
bakalarka pro moji babickubakalarka pro moji babicku
bakalarka pro moji babickucalan7
 
Евгений Минченко. Новые предвыборные технологии
Евгений Минченко. Новые предвыборные технологииЕвгений Минченко. Новые предвыборные технологии
Евгений Минченко. Новые предвыборные технологииprasu1995
 
Tabla de contenido
Tabla de contenidoTabla de contenido
Tabla de contenidokode99
 
MiS SharePoint 2010-SSRS, Power View & PowerPivot 2012
MiS SharePoint 2010-SSRS, Power View & PowerPivot 2012MiS SharePoint 2010-SSRS, Power View & PowerPivot 2012
MiS SharePoint 2010-SSRS, Power View & PowerPivot 2012Sunny U Okoro
 
Logic tl(01 k5)
Logic tl(01 k5)Logic tl(01 k5)
Logic tl(01 k5)hieusy
 
2012 Presentazione Minerva
2012 Presentazione Minerva2012 Presentazione Minerva
2012 Presentazione MinervaFabio Cerino
 
Presentation learning
Presentation learningPresentation learning
Presentation learningsarachebli91
 
Filming photographs
Filming photographsFilming photographs
Filming photographsrturner93
 
Scuola S.B. Capitanio - Bergamo - Italy
Scuola S.B. Capitanio - Bergamo - ItalyScuola S.B. Capitanio - Bergamo - Italy
Scuola S.B. Capitanio - Bergamo - Italygiovanni quartini
 
Giao trinh logic dc(trần väƒn toă n)
Giao trinh logic dc(trần väƒn toă n)Giao trinh logic dc(trần väƒn toă n)
Giao trinh logic dc(trần väƒn toă n)hieusy
 
How to deal with panic attacks?
How to deal with panic attacks?How to deal with panic attacks?
How to deal with panic attacks?madfinn13
 
Harmonizing Data for the Warehouse
Harmonizing Data for the WarehouseHarmonizing Data for the Warehouse
Harmonizing Data for the WarehouseKalido
 

Viewers also liked (18)

Ecc Hungary introduction ADR bodies
Ecc Hungary introduction ADR bodiesEcc Hungary introduction ADR bodies
Ecc Hungary introduction ADR bodies
 
Монтаж черепицы icopal
Монтаж черепицы icopalМонтаж черепицы icopal
Монтаж черепицы icopal
 
The Expat's Way Principles
The Expat's Way PrinciplesThe Expat's Way Principles
The Expat's Way Principles
 
bakalarka pro moji babicku
bakalarka pro moji babickubakalarka pro moji babicku
bakalarka pro moji babicku
 
Potentials of web standards for automation control in manufacturing systems
Potentials of web standards for automation control in manufacturing systemsPotentials of web standards for automation control in manufacturing systems
Potentials of web standards for automation control in manufacturing systems
 
Vo horegionoord
Vo horegionoordVo horegionoord
Vo horegionoord
 
Naskah soal uas statistik 2014
Naskah soal uas statistik 2014Naskah soal uas statistik 2014
Naskah soal uas statistik 2014
 
Евгений Минченко. Новые предвыборные технологии
Евгений Минченко. Новые предвыборные технологииЕвгений Минченко. Новые предвыборные технологии
Евгений Минченко. Новые предвыборные технологии
 
Tabla de contenido
Tabla de contenidoTabla de contenido
Tabla de contenido
 
MiS SharePoint 2010-SSRS, Power View & PowerPivot 2012
MiS SharePoint 2010-SSRS, Power View & PowerPivot 2012MiS SharePoint 2010-SSRS, Power View & PowerPivot 2012
MiS SharePoint 2010-SSRS, Power View & PowerPivot 2012
 
Logic tl(01 k5)
Logic tl(01 k5)Logic tl(01 k5)
Logic tl(01 k5)
 
2012 Presentazione Minerva
2012 Presentazione Minerva2012 Presentazione Minerva
2012 Presentazione Minerva
 
Presentation learning
Presentation learningPresentation learning
Presentation learning
 
Filming photographs
Filming photographsFilming photographs
Filming photographs
 
Scuola S.B. Capitanio - Bergamo - Italy
Scuola S.B. Capitanio - Bergamo - ItalyScuola S.B. Capitanio - Bergamo - Italy
Scuola S.B. Capitanio - Bergamo - Italy
 
Giao trinh logic dc(trần väƒn toă n)
Giao trinh logic dc(trần väƒn toă n)Giao trinh logic dc(trần väƒn toă n)
Giao trinh logic dc(trần väƒn toă n)
 
How to deal with panic attacks?
How to deal with panic attacks?How to deal with panic attacks?
How to deal with panic attacks?
 
Harmonizing Data for the Warehouse
Harmonizing Data for the WarehouseHarmonizing Data for the Warehouse
Harmonizing Data for the Warehouse
 

Similar to Hive performance optimizations

A BigBench Implementation in the Hadoop Ecosystem
A BigBench Implementation in the Hadoop EcosystemA BigBench Implementation in the Hadoop Ecosystem
A BigBench Implementation in the Hadoop EcosystemTilmann Rabl
 
クラウドDWHとしても進化を続けるPivotal Greenplumご紹介
クラウドDWHとしても進化を続けるPivotal Greenplumご紹介クラウドDWHとしても進化を続けるPivotal Greenplumご紹介
クラウドDWHとしても進化を続けるPivotal Greenplumご紹介Masayuki Matsushita
 
Rob Sullivan at Heroku's Waza 2013: Your Database -- A Story of Indifference
Rob Sullivan at Heroku's Waza 2013: Your Database -- A Story of IndifferenceRob Sullivan at Heroku's Waza 2013: Your Database -- A Story of Indifference
Rob Sullivan at Heroku's Waza 2013: Your Database -- A Story of IndifferenceHeroku
 
How to generate a 100+ page website using parameterisation in R
How to generate a 100+ page website using parameterisation in RHow to generate a 100+ page website using parameterisation in R
How to generate a 100+ page website using parameterisation in RPaul Bradshaw
 
Internationalizing CakePHP Applications
Internationalizing CakePHP ApplicationsInternationalizing CakePHP Applications
Internationalizing CakePHP ApplicationsPierre MARTIN
 
PerlApp2Postgresql (2)
PerlApp2Postgresql (2)PerlApp2Postgresql (2)
PerlApp2Postgresql (2)Jerome Eteve
 
Getting Started with PL/Proxy
Getting Started with PL/ProxyGetting Started with PL/Proxy
Getting Started with PL/ProxyPeter Eisentraut
 
Андрей Козлов (Altoros): Оптимизация производительности Cassandra
Андрей Козлов (Altoros): Оптимизация производительности CassandraАндрей Козлов (Altoros): Оптимизация производительности Cassandra
Андрей Козлов (Altoros): Оптимизация производительности CassandraOlga Lavrentieva
 
SCALE 15x Minimizing PostgreSQL Major Version Upgrade Downtime
SCALE 15x Minimizing PostgreSQL Major Version Upgrade DowntimeSCALE 15x Minimizing PostgreSQL Major Version Upgrade Downtime
SCALE 15x Minimizing PostgreSQL Major Version Upgrade DowntimeJeff Frost
 
Postgresql Database Administration- Day4
Postgresql Database Administration- Day4Postgresql Database Administration- Day4
Postgresql Database Administration- Day4PoguttuezhiniVP
 
Deep dive to PostgreSQL Indexes
Deep dive to PostgreSQL IndexesDeep dive to PostgreSQL Indexes
Deep dive to PostgreSQL IndexesIbrar Ahmed
 
56 Query Optimization
56 Query Optimization56 Query Optimization
56 Query OptimizationMYXPLAIN
 
SWP - A Generic Language Parser
SWP - A Generic Language ParserSWP - A Generic Language Parser
SWP - A Generic Language Parserkamaelian
 
Data Structure in C (Lab Programs)
Data Structure in C (Lab Programs)Data Structure in C (Lab Programs)
Data Structure in C (Lab Programs)Saket Pathak
 
Индексируем базу: как делать хорошо и не делать плохо Winter saint p 2021 m...
Индексируем базу: как делать хорошо и не делать плохо   Winter saint p 2021 m...Индексируем базу: как делать хорошо и не делать плохо   Winter saint p 2021 m...
Индексируем базу: как делать хорошо и не делать плохо Winter saint p 2021 m...Андрей Новиков
 
[Pgday.Seoul 2019] Citus를 이용한 분산 데이터베이스
[Pgday.Seoul 2019] Citus를 이용한 분산 데이터베이스[Pgday.Seoul 2019] Citus를 이용한 분산 데이터베이스
[Pgday.Seoul 2019] Citus를 이용한 분산 데이터베이스PgDay.Seoul
 
ETL Patterns with Postgres
ETL Patterns with PostgresETL Patterns with Postgres
ETL Patterns with PostgresMartin Loetzsch
 
Data Modeling, Normalization, and De-Normalization | PostgresOpen 2019 | Dimi...
Data Modeling, Normalization, and De-Normalization | PostgresOpen 2019 | Dimi...Data Modeling, Normalization, and De-Normalization | PostgresOpen 2019 | Dimi...
Data Modeling, Normalization, and De-Normalization | PostgresOpen 2019 | Dimi...Citus Data
 

Similar to Hive performance optimizations (20)

A BigBench Implementation in the Hadoop Ecosystem
A BigBench Implementation in the Hadoop EcosystemA BigBench Implementation in the Hadoop Ecosystem
A BigBench Implementation in the Hadoop Ecosystem
 
クラウドDWHとしても進化を続けるPivotal Greenplumご紹介
クラウドDWHとしても進化を続けるPivotal Greenplumご紹介クラウドDWHとしても進化を続けるPivotal Greenplumご紹介
クラウドDWHとしても進化を続けるPivotal Greenplumご紹介
 
Rob Sullivan at Heroku's Waza 2013: Your Database -- A Story of Indifference
Rob Sullivan at Heroku's Waza 2013: Your Database -- A Story of IndifferenceRob Sullivan at Heroku's Waza 2013: Your Database -- A Story of Indifference
Rob Sullivan at Heroku's Waza 2013: Your Database -- A Story of Indifference
 
How to generate a 100+ page website using parameterisation in R
How to generate a 100+ page website using parameterisation in RHow to generate a 100+ page website using parameterisation in R
How to generate a 100+ page website using parameterisation in R
 
Internationalizing CakePHP Applications
Internationalizing CakePHP ApplicationsInternationalizing CakePHP Applications
Internationalizing CakePHP Applications
 
PerlApp2Postgresql (2)
PerlApp2Postgresql (2)PerlApp2Postgresql (2)
PerlApp2Postgresql (2)
 
Getting Started with PL/Proxy
Getting Started with PL/ProxyGetting Started with PL/Proxy
Getting Started with PL/Proxy
 
Андрей Козлов (Altoros): Оптимизация производительности Cassandra
Андрей Козлов (Altoros): Оптимизация производительности CassandraАндрей Козлов (Altoros): Оптимизация производительности Cassandra
Андрей Козлов (Altoros): Оптимизация производительности Cassandra
 
SCALE 15x Minimizing PostgreSQL Major Version Upgrade Downtime
SCALE 15x Minimizing PostgreSQL Major Version Upgrade DowntimeSCALE 15x Minimizing PostgreSQL Major Version Upgrade Downtime
SCALE 15x Minimizing PostgreSQL Major Version Upgrade Downtime
 
Postgresql Database Administration- Day4
Postgresql Database Administration- Day4Postgresql Database Administration- Day4
Postgresql Database Administration- Day4
 
Deep dive to PostgreSQL Indexes
Deep dive to PostgreSQL IndexesDeep dive to PostgreSQL Indexes
Deep dive to PostgreSQL Indexes
 
56 Query Optimization
56 Query Optimization56 Query Optimization
56 Query Optimization
 
SWP - A Generic Language Parser
SWP - A Generic Language ParserSWP - A Generic Language Parser
SWP - A Generic Language Parser
 
Data Structure in C (Lab Programs)
Data Structure in C (Lab Programs)Data Structure in C (Lab Programs)
Data Structure in C (Lab Programs)
 
Индексируем базу: как делать хорошо и не делать плохо Winter saint p 2021 m...
Индексируем базу: как делать хорошо и не делать плохо   Winter saint p 2021 m...Индексируем базу: как делать хорошо и не делать плохо   Winter saint p 2021 m...
Индексируем базу: как делать хорошо и не делать плохо Winter saint p 2021 m...
 
[Pgday.Seoul 2019] Citus를 이용한 분산 데이터베이스
[Pgday.Seoul 2019] Citus를 이용한 분산 데이터베이스[Pgday.Seoul 2019] Citus를 이용한 분산 데이터베이스
[Pgday.Seoul 2019] Citus를 이용한 분산 데이터베이스
 
ETL Patterns with Postgres
ETL Patterns with PostgresETL Patterns with Postgres
ETL Patterns with Postgres
 
Hadoop pig
Hadoop pigHadoop pig
Hadoop pig
 
Pig latin
Pig latinPig latin
Pig latin
 
Data Modeling, Normalization, and De-Normalization | PostgresOpen 2019 | Dimi...
Data Modeling, Normalization, and De-Normalization | PostgresOpen 2019 | Dimi...Data Modeling, Normalization, and De-Normalization | PostgresOpen 2019 | Dimi...
Data Modeling, Normalization, and De-Normalization | PostgresOpen 2019 | Dimi...
 

More from Nag Arvind Gudiseva

Git as version control for Analytics project
Git as version control for Analytics projectGit as version control for Analytics project
Git as version control for Analytics projectNag Arvind Gudiseva
 
Creating executable JAR from Eclipse IDE
Creating executable JAR from Eclipse IDECreating executable JAR from Eclipse IDE
Creating executable JAR from Eclipse IDENag Arvind Gudiseva
 
Adding Idea IntelliJ projects to Subversion Version Control
Adding Idea IntelliJ projects to Subversion Version ControlAdding Idea IntelliJ projects to Subversion Version Control
Adding Idea IntelliJ projects to Subversion Version ControlNag Arvind Gudiseva
 
Apache Drill with Oracle, Hive and HBase
Apache Drill with Oracle, Hive and HBaseApache Drill with Oracle, Hive and HBase
Apache Drill with Oracle, Hive and HBaseNag Arvind Gudiseva
 
Hadoop 2.0 cluster setup on ubuntu 14.04 (64 bit)
Hadoop 2.0 cluster setup on ubuntu 14.04 (64 bit)Hadoop 2.0 cluster setup on ubuntu 14.04 (64 bit)
Hadoop 2.0 cluster setup on ubuntu 14.04 (64 bit)Nag Arvind Gudiseva
 
Order Review Solution Application (Version 2.0)
Order Review Solution Application (Version 2.0)Order Review Solution Application (Version 2.0)
Order Review Solution Application (Version 2.0)Nag Arvind Gudiseva
 
MSC Temporary Passwords reset tool
MSC Temporary Passwords reset toolMSC Temporary Passwords reset tool
MSC Temporary Passwords reset toolNag Arvind Gudiseva
 
Store Support Operations - Training on MSC Application
Store Support Operations - Training on MSC ApplicationStore Support Operations - Training on MSC Application
Store Support Operations - Training on MSC ApplicationNag Arvind Gudiseva
 
Store Support Operations - Training on MSC Application
Store Support Operations - Training on MSC ApplicationStore Support Operations - Training on MSC Application
Store Support Operations - Training on MSC ApplicationNag Arvind Gudiseva
 

More from Nag Arvind Gudiseva (13)

Elasticsearch security
Elasticsearch securityElasticsearch security
Elasticsearch security
 
Elasticsearch Security Strategy
Elasticsearch Security StrategyElasticsearch Security Strategy
Elasticsearch Security Strategy
 
Git as version control for Analytics project
Git as version control for Analytics projectGit as version control for Analytics project
Git as version control for Analytics project
 
Exception Handling in Scala
Exception Handling in ScalaException Handling in Scala
Exception Handling in Scala
 
Creating executable JAR from Eclipse IDE
Creating executable JAR from Eclipse IDECreating executable JAR from Eclipse IDE
Creating executable JAR from Eclipse IDE
 
Adding Idea IntelliJ projects to Subversion Version Control
Adding Idea IntelliJ projects to Subversion Version ControlAdding Idea IntelliJ projects to Subversion Version Control
Adding Idea IntelliJ projects to Subversion Version Control
 
Apache Drill with Oracle, Hive and HBase
Apache Drill with Oracle, Hive and HBaseApache Drill with Oracle, Hive and HBase
Apache Drill with Oracle, Hive and HBase
 
ElasticSearch Hands On
ElasticSearch Hands OnElasticSearch Hands On
ElasticSearch Hands On
 
Hadoop 2.0 cluster setup on ubuntu 14.04 (64 bit)
Hadoop 2.0 cluster setup on ubuntu 14.04 (64 bit)Hadoop 2.0 cluster setup on ubuntu 14.04 (64 bit)
Hadoop 2.0 cluster setup on ubuntu 14.04 (64 bit)
 
Order Review Solution Application (Version 2.0)
Order Review Solution Application (Version 2.0)Order Review Solution Application (Version 2.0)
Order Review Solution Application (Version 2.0)
 
MSC Temporary Passwords reset tool
MSC Temporary Passwords reset toolMSC Temporary Passwords reset tool
MSC Temporary Passwords reset tool
 
Store Support Operations - Training on MSC Application
Store Support Operations - Training on MSC ApplicationStore Support Operations - Training on MSC Application
Store Support Operations - Training on MSC Application
 
Store Support Operations - Training on MSC Application
Store Support Operations - Training on MSC ApplicationStore Support Operations - Training on MSC Application
Store Support Operations - Training on MSC Application
 

Recently uploaded

Carero dropshipping via API with DroFx.pptx
Carero dropshipping via API with DroFx.pptxCarero dropshipping via API with DroFx.pptx
Carero dropshipping via API with DroFx.pptxolyaivanovalion
 
Week-01-2.ppt BBB human Computer interaction
Week-01-2.ppt BBB human Computer interactionWeek-01-2.ppt BBB human Computer interaction
Week-01-2.ppt BBB human Computer interactionfulawalesam
 
VidaXL dropshipping via API with DroFx.pptx
VidaXL dropshipping via API with DroFx.pptxVidaXL dropshipping via API with DroFx.pptx
VidaXL dropshipping via API with DroFx.pptxolyaivanovalion
 
04242024_CCC TUG_Joins and Relationships
04242024_CCC TUG_Joins and Relationships04242024_CCC TUG_Joins and Relationships
04242024_CCC TUG_Joins and Relationshipsccctableauusergroup
 
꧁❤ Greater Noida Call Girls Delhi ❤꧂ 9711199171 ☎️ Hard And Sexy Vip Call
꧁❤ Greater Noida Call Girls Delhi ❤꧂ 9711199171 ☎️ Hard And Sexy Vip Call꧁❤ Greater Noida Call Girls Delhi ❤꧂ 9711199171 ☎️ Hard And Sexy Vip Call
꧁❤ Greater Noida Call Girls Delhi ❤꧂ 9711199171 ☎️ Hard And Sexy Vip Callshivangimorya083
 
Log Analysis using OSSEC sasoasasasas.pptx
Log Analysis using OSSEC sasoasasasas.pptxLog Analysis using OSSEC sasoasasasas.pptx
Log Analysis using OSSEC sasoasasasas.pptxJohnnyPlasten
 
Market Analysis in the 5 Largest Economic Countries in Southeast Asia.pdf
Market Analysis in the 5 Largest Economic Countries in Southeast Asia.pdfMarket Analysis in the 5 Largest Economic Countries in Southeast Asia.pdf
Market Analysis in the 5 Largest Economic Countries in Southeast Asia.pdfRachmat Ramadhan H
 
Call me @ 9892124323 Cheap Rate Call Girls in Vashi with Real Photo 100% Secure
Call me @ 9892124323  Cheap Rate Call Girls in Vashi with Real Photo 100% SecureCall me @ 9892124323  Cheap Rate Call Girls in Vashi with Real Photo 100% Secure
Call me @ 9892124323 Cheap Rate Call Girls in Vashi with Real Photo 100% SecurePooja Nehwal
 
April 2024 - Crypto Market Report's Analysis
April 2024 - Crypto Market Report's AnalysisApril 2024 - Crypto Market Report's Analysis
April 2024 - Crypto Market Report's Analysismanisha194592
 
Midocean dropshipping via API with DroFx
Midocean dropshipping via API with DroFxMidocean dropshipping via API with DroFx
Midocean dropshipping via API with DroFxolyaivanovalion
 
Low Rate Call Girls Bhilai Anika 8250192130 Independent Escort Service Bhilai
Low Rate Call Girls Bhilai Anika 8250192130 Independent Escort Service BhilaiLow Rate Call Girls Bhilai Anika 8250192130 Independent Escort Service Bhilai
Low Rate Call Girls Bhilai Anika 8250192130 Independent Escort Service BhilaiSuhani Kapoor
 
Call Girls in Sarai Kale Khan Delhi 💯 Call Us 🔝9205541914 🔝( Delhi) Escorts S...
Call Girls in Sarai Kale Khan Delhi 💯 Call Us 🔝9205541914 🔝( Delhi) Escorts S...Call Girls in Sarai Kale Khan Delhi 💯 Call Us 🔝9205541914 🔝( Delhi) Escorts S...
Call Girls in Sarai Kale Khan Delhi 💯 Call Us 🔝9205541914 🔝( Delhi) Escorts S...Delhi Call girls
 
Edukaciniai dropshipping via API with DroFx
Edukaciniai dropshipping via API with DroFxEdukaciniai dropshipping via API with DroFx
Edukaciniai dropshipping via API with DroFxolyaivanovalion
 
Halmar dropshipping via API with DroFx
Halmar  dropshipping  via API with DroFxHalmar  dropshipping  via API with DroFx
Halmar dropshipping via API with DroFxolyaivanovalion
 
Delhi Call Girls CP 9711199171 ☎✔👌✔ Whatsapp Hard And Sexy Vip Call
Delhi Call Girls CP 9711199171 ☎✔👌✔ Whatsapp Hard And Sexy Vip CallDelhi Call Girls CP 9711199171 ☎✔👌✔ Whatsapp Hard And Sexy Vip Call
Delhi Call Girls CP 9711199171 ☎✔👌✔ Whatsapp Hard And Sexy Vip Callshivangimorya083
 
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...apidays
 
(PARI) Call Girls Wanowrie ( 7001035870 ) HI-Fi Pune Escorts Service
(PARI) Call Girls Wanowrie ( 7001035870 ) HI-Fi Pune Escorts Service(PARI) Call Girls Wanowrie ( 7001035870 ) HI-Fi Pune Escorts Service
(PARI) Call Girls Wanowrie ( 7001035870 ) HI-Fi Pune Escorts Serviceranjana rawat
 
Data-Analysis for Chicago Crime Data 2023
Data-Analysis for Chicago Crime Data  2023Data-Analysis for Chicago Crime Data  2023
Data-Analysis for Chicago Crime Data 2023ymrp368
 

Recently uploaded (20)

Carero dropshipping via API with DroFx.pptx
Carero dropshipping via API with DroFx.pptxCarero dropshipping via API with DroFx.pptx
Carero dropshipping via API with DroFx.pptx
 
Week-01-2.ppt BBB human Computer interaction
Week-01-2.ppt BBB human Computer interactionWeek-01-2.ppt BBB human Computer interaction
Week-01-2.ppt BBB human Computer interaction
 
VidaXL dropshipping via API with DroFx.pptx
VidaXL dropshipping via API with DroFx.pptxVidaXL dropshipping via API with DroFx.pptx
VidaXL dropshipping via API with DroFx.pptx
 
04242024_CCC TUG_Joins and Relationships
04242024_CCC TUG_Joins and Relationships04242024_CCC TUG_Joins and Relationships
04242024_CCC TUG_Joins and Relationships
 
꧁❤ Greater Noida Call Girls Delhi ❤꧂ 9711199171 ☎️ Hard And Sexy Vip Call
꧁❤ Greater Noida Call Girls Delhi ❤꧂ 9711199171 ☎️ Hard And Sexy Vip Call꧁❤ Greater Noida Call Girls Delhi ❤꧂ 9711199171 ☎️ Hard And Sexy Vip Call
꧁❤ Greater Noida Call Girls Delhi ❤꧂ 9711199171 ☎️ Hard And Sexy Vip Call
 
꧁❤ Aerocity Call Girls Service Aerocity Delhi ❤꧂ 9999965857 ☎️ Hard And Sexy ...
꧁❤ Aerocity Call Girls Service Aerocity Delhi ❤꧂ 9999965857 ☎️ Hard And Sexy ...꧁❤ Aerocity Call Girls Service Aerocity Delhi ❤꧂ 9999965857 ☎️ Hard And Sexy ...
꧁❤ Aerocity Call Girls Service Aerocity Delhi ❤꧂ 9999965857 ☎️ Hard And Sexy ...
 
Log Analysis using OSSEC sasoasasasas.pptx
Log Analysis using OSSEC sasoasasasas.pptxLog Analysis using OSSEC sasoasasasas.pptx
Log Analysis using OSSEC sasoasasasas.pptx
 
Market Analysis in the 5 Largest Economic Countries in Southeast Asia.pdf
Market Analysis in the 5 Largest Economic Countries in Southeast Asia.pdfMarket Analysis in the 5 Largest Economic Countries in Southeast Asia.pdf
Market Analysis in the 5 Largest Economic Countries in Southeast Asia.pdf
 
Call me @ 9892124323 Cheap Rate Call Girls in Vashi with Real Photo 100% Secure
Call me @ 9892124323  Cheap Rate Call Girls in Vashi with Real Photo 100% SecureCall me @ 9892124323  Cheap Rate Call Girls in Vashi with Real Photo 100% Secure
Call me @ 9892124323 Cheap Rate Call Girls in Vashi with Real Photo 100% Secure
 
April 2024 - Crypto Market Report's Analysis
April 2024 - Crypto Market Report's AnalysisApril 2024 - Crypto Market Report's Analysis
April 2024 - Crypto Market Report's Analysis
 
Delhi 99530 vip 56974 Genuine Escort Service Call Girls in Kishangarh
Delhi 99530 vip 56974 Genuine Escort Service Call Girls in  KishangarhDelhi 99530 vip 56974 Genuine Escort Service Call Girls in  Kishangarh
Delhi 99530 vip 56974 Genuine Escort Service Call Girls in Kishangarh
 
Midocean dropshipping via API with DroFx
Midocean dropshipping via API with DroFxMidocean dropshipping via API with DroFx
Midocean dropshipping via API with DroFx
 
Low Rate Call Girls Bhilai Anika 8250192130 Independent Escort Service Bhilai
Low Rate Call Girls Bhilai Anika 8250192130 Independent Escort Service BhilaiLow Rate Call Girls Bhilai Anika 8250192130 Independent Escort Service Bhilai
Low Rate Call Girls Bhilai Anika 8250192130 Independent Escort Service Bhilai
 
Call Girls in Sarai Kale Khan Delhi 💯 Call Us 🔝9205541914 🔝( Delhi) Escorts S...
Call Girls in Sarai Kale Khan Delhi 💯 Call Us 🔝9205541914 🔝( Delhi) Escorts S...Call Girls in Sarai Kale Khan Delhi 💯 Call Us 🔝9205541914 🔝( Delhi) Escorts S...
Call Girls in Sarai Kale Khan Delhi 💯 Call Us 🔝9205541914 🔝( Delhi) Escorts S...
 
Edukaciniai dropshipping via API with DroFx
Edukaciniai dropshipping via API with DroFxEdukaciniai dropshipping via API with DroFx
Edukaciniai dropshipping via API with DroFx
 
Halmar dropshipping via API with DroFx
Halmar  dropshipping  via API with DroFxHalmar  dropshipping  via API with DroFx
Halmar dropshipping via API with DroFx
 
Delhi Call Girls CP 9711199171 ☎✔👌✔ Whatsapp Hard And Sexy Vip Call
Delhi Call Girls CP 9711199171 ☎✔👌✔ Whatsapp Hard And Sexy Vip CallDelhi Call Girls CP 9711199171 ☎✔👌✔ Whatsapp Hard And Sexy Vip Call
Delhi Call Girls CP 9711199171 ☎✔👌✔ Whatsapp Hard And Sexy Vip Call
 
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
 
(PARI) Call Girls Wanowrie ( 7001035870 ) HI-Fi Pune Escorts Service
(PARI) Call Girls Wanowrie ( 7001035870 ) HI-Fi Pune Escorts Service(PARI) Call Girls Wanowrie ( 7001035870 ) HI-Fi Pune Escorts Service
(PARI) Call Girls Wanowrie ( 7001035870 ) HI-Fi Pune Escorts Service
 
Data-Analysis for Chicago Crime Data 2023
Data-Analysis for Chicago Crime Data  2023Data-Analysis for Chicago Crime Data  2023
Data-Analysis for Chicago Crime Data 2023
 

Hive performance optimizations

  • 1. Nag Arvind Gudiseva 1 HIVE PERFORMANCE OPTIMIZATIONS SERDE ORIGINAL CREATE TABLE IF NOT EXISTS cand_sr.cand_sr_note_nda_detail (RUN_ID BIGINT, GUID STRING, CIP_COLLECTION_ID BIGINT) ROW FORMAT DELIMITED FIELDS TERMINATED BY 't' LINES TERMINATED BY 'n' STORED AS TEXTFILE; OPTIMISED CREATE TABLE IF NOT EXISTS cand_sr.cand_sr_note_nda_detail (RUN_ID BIGINT, GUID STRING, CIP_COLLECTION_ID BIGINT) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'; OCTAL ORIGINAL CREATE TABLE IF NOT EXISTS cand_sr.cand_sr_note_nda_detail (RUN_ID BIGINT, GUID STRING, CIP_COLLECTION_ID BIGINT) ROW FORMAT DELIMITED FIELDS TERMINATED BY '<FSP>' LINES TERMINATED BY '<RSP>' ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
  • 2. Nag Arvind Gudiseva 2 OPTIMISED CREATE TABLE employees (name STRING, salary FLOAT, subordinates ARRAY<STRING>, deductions MAP<STRING, FLOAT>, address STRUCT<street: STRING, city: STRING, state: STRING, zip: INT>) ROW FORMAT DELIMITED FIELDS TERMINATED BY '001' COLLECTION ITEMS TERMINATED BY '002' MAP KEYS TERMINATED BY '003' LINES TERMINATED BY 'n' STORED AS TEXTFILE; INTERMEDIATE TABLES, BUCKETING AND COMPRESSION ORIGINAL INSERT OVERWRITE TABLE NDA_CNR_DEVICE_JOIN_MAP_FH PARTITION (CIP_COLLECTION_ID, ELEMENT_TYPE) SELECT T1.CIP_DEVICE_ID AS D2, T2.CIP_DEVICE_ID AS D1, T2.DV_CNT_BY_GRP, T2.FH_LVL_GRP, T2.FAMILY, T2.PLATFORM, T2.ROLE, T2.LEVEL_1, T2.LEVEL_2, T2.LEVEL_3, T2.IS_POLICY, T2.IS_GRP_POLICY, T2.ELEMENT, T1.CIP_COLLECTION_ID,
  • 3. Nag Arvind Gudiseva 3 T2.ELEMENT_TYPE FROM NDA_POLICY_DV_DETAILS_FH T1 JOIN NDA_CNR_DEVICE_ELEMENT_MAP_FH T2 ON (T1.FAMILY = T2.FAMILY AND T1.PLATFORM = T2.PLATFORM AND T1.ROLE = T2.ROLE AND T1.CIP_COLLECTION_ID=T2.CIP_COLLECTION_ID) WHERE ( T1.CIP_COLLECTION_ID=255 AND T2.ELEMENT_TYPE='FTS') OPTIMISED set hive.enforce.bucketing=true Set hive.enforce.sorting=true CREATE TABLE IF NOT EXISTS NDA_POLICY_DV_DETAILS_FH_TEMP ( FAMILY STRING, PLATFORM STRING, ROLE STRING, D2 BIGINT, CIP_COLLECTION_ID BIGINT) CLUSTERED BY (FAMILY, PLATFORM, ROLE) SORTED BY (FAMILY, PLATFORM, ROLE) INTO 50 BUCKETS STORED AS ORC tblproperties ("orc.compress"="SNAPPY"); INSERT OVERWRITE TABLE NDA_POLICY_DV_DETAILS_FH_TEMP SELECT T1.FAMILY, T1.PLATFORM, T1.ROLE, T1.CIP_DEVICE_ID AS D2,
  • 4. Nag Arvind Gudiseva 4 T1.CIP_COLLECTION_ID FROM NDA_POLICY_DV_DETAILS_FH T1 WHERE ( T1.CIP_COLLECTION_ID=‘3159’) CLUSTER BY T1.FAMILY, T1.PLATFORM, T1.ROLE; CREATE TABLE IF NOT EXISTS NDA_CNR_DEVICE_ELEMENT_MAP_FH_TEMP ( FAMILY STRING, PLATFORM STRING, ROLE STRING, D1 BIGINT, DV_CNT_BY_GRP BIGINT, FH_LVL_GRP STRING, LEVEL_1 STRING, LEVEL_2 STRING, LEVEL_3 STRING, IS_POLICY STRING, IS_GRP_POLICY STRING, ELEMENT STRING, ELEMENT_TYPE STRING) CLUSTERED BY (FAMILY, PLATFORM, ROLE) SORTED BY (FAMILY, PLATFORM, ROLE) INTO 50 BUCKETS STORED AS ORC tblproperties ("orc.compress"="SNAPPY"); INSERT OVERWRITE TABLE NDA_CNR_DEVICE_ELEMENT_MAP_FH_TEMP SELECT T2.FAMILY, T2.PLATFORM, T2.ROLE, T2.CIP_DEVICE_ID AS D1,
  • 5. Nag Arvind Gudiseva 5 T2.DV_CNT_BY_GRP, T2.FH_LVL_GRP, T2.LEVEL_1, T2.LEVEL_2, T2.LEVEL_3, T2.IS_POLICY, T2.IS_GRP_POLICY, T2.ELEMENT, T2.ELEMENT_TYPE FROM NDA_CNR_DEVICE_ELEMENT_MAP_FH T2 WHERE ( T2.CIP_COLLECTION_ID='3159' AND T2.ELEMENT_TYPE='FTS') CLUSTER BY T2.FAMILY, T2.PLATFORM, T2.ROLE; CREATE TABLE IF NOT EXISTS NDA_CNR_DEVICE_JOIN_MAP_FH ( D2 BIGINT, D1 BIGINT, DV_CNT_BY_GRP BIGINT, FH_LVL_GRP STRING, FAMILY STRING, PLATFORM STRING, ROLE STRING, LEVEL_1 STRING, LEVEL_2 STRING, LEVEL_3 STRING, IS_POLICY STRING, IS_GRP_POLICY STRING, ELEMENT STRING) PARTITIONED BY (CIP_COLLECTION_ID BIGINT, ELEMENT_TYPE STRING);
  • 6. Nag Arvind Gudiseva 6 INSERT OVERWRITE TABLE NDA_CNR_DEVICE_JOIN_MAP_FH PARTITION (CIP_COLLECTION_ID, ELEMENT_TYPE) SELECT T1.D2, T2.D1, T2.DV_CNT_BY_GRP, T2.FH_LVL_GRP, T2.FAMILY, T2.PLATFORM, T2.ROLE, T2.LEVEL_1, T2.LEVEL_2, T2.LEVEL_3, T2.IS_POLICY, T2.IS_GRP_POLICY, T2.ELEMENT, T1.CIP_COLLECTION_ID, T2.ELEMENT_TYPE FROM NDA_POLICY_DV_DETAILS_FH_TEMP T1 JOIN NDA_CNR_DEVICE_ELEMENT_MAP_FH_TEMP T2 ON (T1.FAMILY = T2.FAMILY AND T1.PLATFORM = T2.PLATFORM AND T1.ROLE = T2.ROLE); SET COMMANDS Use the below SET commands judiciously whererequired: Set mapred.max.split.size=10857600; Set mapred.min.split.size=10857600; Set mapred.map.tasks=50; Set mapred.reduce.tasks=100;
  • 7. Nag Arvind Gudiseva 7 Set mapred.job.map.memory.mb=5120; Set mapred.job.reduce.memory.mb=5120; Set mapred.map.child.java.opts=-Xmx4096m; Set mapred.reduce.child.java.opts=-Xmx4096m; Set io.sort.mb=1000; Set mapred.reduce.tasks=50; Set hive.mapred.reduce.tasks.speculative.execution=false; Set hive.auto.convert.join=false Set hive.rpc.query.plan=true OTHER OPTIMIZATIONS APACHE TEZ Supported by Hortonworks distribution. Other vendors likeCloudera,MapR, etc. have emphasised more on Spark. Note: Spark 2.0 is undergoingmajor transformation with syntactical changes. APACHE PIG Pigis a data flow scriptinglanguage. It’s best Use Caseis for ETL operations. Use Pig for creating data pipelines and Hivefor final longrunningbatch queries.