SlideShare uma empresa Scribd logo
1 de 49
Baixar para ler offline
(in 45 minutes)
Queue Table 
Textual Search 
%LIKE% 
Analytical Reports 
BLOBs 
Geospatial Queries 
Events Table
MBL310 
MBL311
Amazon SQS Amazon Kinesis 
Auto-scaling Shards provisioning 
=> Simple to set up and operate, easy to deploy 
new version to a new queue 
=> More cost effective in high scale, once you 
tuned the system 
“At least once delivery” Multiple “exactly once in order delivery” 
=> Easy to start with a single worker => A set of dedicated workers that are working in 
different intervals and different operations
fromflaskimportFlask,request 
application=Flask(__name__) 
# main entry point for SQS, accepting only POST requests 
@application.route("/sqs/",methods=['POST']) 
defsqs(): 
application.logger.debug('Message was received for processing!') 
doc=parse_request(request) 
load_to_cloudsearch(doc) 
put_into_dynamodb(doc) 
return""# OK
defload_to_cloudsearch(doc): 
# index document 
doc_serv=g.domain.get_document_service() 
doc_serv.add(doc[id],doc) 
application.logger.debug('Inserting docId: %s',doc[id]) 
# send index batch to CloudSearch 
try: 
doc_serv.commit() 
exceptCommitMismatchErrorase: 
application.logger.error('CommitMismatchErrorraised') 
formsgine.errors: 
application.logger.error('Error: %s',msg) 
raise 
finally: 
doc_serv.clear_sdf()# clear SDF for next iteration
defput_into_dynamodb(doc): 
itemData=copy.deepcopy(doc) # I want different fields in DynamoDB 
delitemData['day']# TS is enough, day is only for faceting 
#Using GeoHasingfor DynamoDB lookup index 
geojson_location="{{"coordinates":[{0},{1}],"type":"Point"}}".format(doc['latitude'],doc['longitude']) 
itemData['location']=geojson_location 
geo_server_url="http://geo-server.elasticbeanstalk.com/wl-dynamodb-geo? point={0},{1}".format(doc['latitude'],doc['longitude'])) 
itemData['geohash']=int(requests.get(geo_server_url).content) 
itemData['geobox']=itemData['geohash']/10000000000000 
# PUT into DynamoDB Table 
item=Item(g.eventsTable,data=itemData) 
item.save()
DynamoDB Geospatial
// Using Leaflet to show a map 
functionshow_map(position){ 
varlatitude=position.coords.latitude; 
varlongitude=position.coords.longitude; 
map=L.map('map').setView([latitude,longitude],15); 
L.tileLayer('http://{s}.tiles.mapbox.com/v3/guyernest.jngcdfig/{z}/{x}/{y}.png',{ 
attribution:'Map data &copy…’,maxZoom:18 
}).addTo(map); 
// Call DDB to show markers on the map 
varbounds=map.getBounds(); 
varrequest={minLng:bounds.getWest(),maxLng:bounds.getEast(),minLat: bounds.getSouth(),maxLat:bounds.getNorth()} 
$.ajax({ 
type:"POST", 
url:'http://geo-server.elasticbeanstalk.com/wl-dynamodb-geo', 
data:'{ action: query-rectangle, request :'+JSON.stringify(request) +'}', 
success:success, 
dataType:"json" 
});
functionsuccess(data){ 
data.result.forEach(function(entry){ 
varmarker=L.marker([parseFloat(entry.latitude),parseFloat(entry.longitude)]) 
marker.addTo(map); 
marker.bindPopup("<b>"+entry.comment+"</b><br>"+"<imgsrc='"+entry.img”’>") 
.openPopup(); 
}); 
}
Amazon CloudSearch Native Geospatial support 
•Latitude and Longitude data types 
•Region search 
•Distance sort 
•Supports mobile
emrMyKeyPair-- bootstrap-actions Path=s3://support.elasticmapreduce/spark/install-spark 
{ 
"ClusterId": "j-38X214F58P62M" 
} 
>>awsemrssh--cluster-id j-38X214F58P62M --key-pair-fileMyKeyPair.pem
valsqlContext=neworg.apache.spark.sql.SQLContext(sc) 
importsqlContext._ 
// Define the schema using a case class. 
caseclassEvent(event_id:String,time:String, latitude: Float, longitude: Float) 
// Create an RDD of Event objects from S3 “folder” and register it as a table. 
valevents=sc.textFile("s3://spark-bucket-demo/spark/events").map(_.split(",")). 
map(p=>Event(p(0),p(1), p(2).trim.toFloat), p(3).trim.toFloat)) 
events.registerAsTable(”events”) 
// SQL statements can be run by using the SQL methods provided by sqlContext. 
Valoct= sql("SELECTevent_idFROMeventsWHEREtime>=‘2014-10-01’ANDtime<=‘2014-11-31’”) 
// The results of SQL queries are SchemaRDDsand support all the normal RDD operations. 
// The columns of a row in the result can be accessed by ordinal. 
oct.map(t=>”event-id: "+t(0)).collect().foreach(println)
AWS CloudTrail 
permissions
###############Setup for initial small environment ############### 
# Kinesis 
awskinesiscreate-stream --stream-name "StreamName"--shard-count 1 
# CloudSearch 
awscloudsearchcreate-domain --domain-name "SearchDomain" 
# DynamoDB 
awsdynamodbcreate-table 
--attribute-definitions --table-name "TableName"--key-schema AttributeName=Id,KeyType=HASH Attribute.. 
# Redshift 
awsredshiftcreate-cluster --cluster-identifier "ClusterID"--cluster-type single-node --node-type… 
# EMR 
awsemrrun-job-flow --name"JobFlow" 
--instances {"MasterInstanceType": "m1.medium", "SlaveInstanceType": "m1.medium", "InstanceCount”:} } 
--steps[ 
{ 
"Name": "Analyze Positions", 
"ActionOnFailure": "CONTINUE", 
"HadoopJarStep": {"Jar": "s3://emr-steps/AnalizePositions.jar", 
} 
} 
] 
# Kinesis 
awskinesiscreate-stream --stream-name "StreamName"- -shard-count 1 
# Redshift 
awsredshiftcreate-cluster --cluster-identifier "ClusterID"--cluster-type single-node 
--node-type dw2.large --master-username "master- username"--master-user-password "master-user- password"
#################### Scaling the infrastructure when needed #################### 
# Kinesis 
awskinesissplit-shard --stream-name"StreamName"--shard-to-split $SHARD_ID--new- starting-hash-key$MID_HASH 
# CloudSearch 
awscloudsearchupdate-scaling-parameters --domain-name"SearchDomain"--scaling- parameters DesiredInstanceType=search.m2.xlarge,DesiredReplicationCount=2 
# DynamoDB 
awsdynamodbupdate-table --table-name"TableName"--provisioned-throughput ReadCapacityUnits=100,WriteCapacityUnits=20 
# Redshift 
awsredshiftmodify-cluster --cluster-identifier"ClusterID"--number-of-nodes 2 
# EMR 
awsemradd-instance-groups --job-flow-id"JobFlow" 
--instance-groups Name=insGroup,Market=SPOT,InstanceRole=TASK,BidPrice='0.3',InstanceType=m1.medium,InstanceCount=2
// update an event as close only is the reports is coming from the same geo-box 
Tabletable=dynamo.getTable(TABLE_NAME); 
table.updateItem("event-id","7982e605-dc7d-4199-bc3e-d449733932e2”, 
// update expression 
"set status = 'close'", 
// condition expression 
"geobox= :geobox", 
null, 
newValueMap() 
.withInt(":geobox",515811) 
);
SDKs 
Java 
Python (boto) 
PHP 
.NET 
Ruby 
Node.js 
iOS 
Android 
AWS Toolkit for Visual Studio 
AWS Toolkit for Eclipse 
AWS Tools for Windows PowerShell 
AWS CLI 
JavaScript 
new!
Learn from AWS big data experts 
start-to-finish post on analyzing and visualizing big data 
blogs.aws.amazon.com/bigdata
Please give us your feedback on this session. 
Complete session evaluations and earn re:Invent swag. 
http://bit.ly/awsevals

Mais conteúdo relacionado

Mais procurados

Uber_Trips_Visualizations
Uber_Trips_VisualizationsUber_Trips_Visualizations
Uber_Trips_Visualizations
Ayman Siraj
 
Full-Stack JavaScript with Node.js
Full-Stack JavaScript with Node.jsFull-Stack JavaScript with Node.js
Full-Stack JavaScript with Node.js
Michael Lehmann
 
Hadoop - MongoDB Webinar June 2014
Hadoop - MongoDB Webinar June 2014Hadoop - MongoDB Webinar June 2014
Hadoop - MongoDB Webinar June 2014
MongoDB
 

Mais procurados (20)

Absolute Beginners Guide to Puppet Through Types - PuppetConf 2014
Absolute Beginners Guide to Puppet Through Types - PuppetConf 2014Absolute Beginners Guide to Puppet Through Types - PuppetConf 2014
Absolute Beginners Guide to Puppet Through Types - PuppetConf 2014
 
Angular&node js upload file
Angular&node js upload fileAngular&node js upload file
Angular&node js upload file
 
Tracing and awk in ns2
Tracing and awk in ns2Tracing and awk in ns2
Tracing and awk in ns2
 
Webinar: Secrets of ClickHouse Query Performance, by Robert Hodges
Webinar: Secrets of ClickHouse Query Performance, by Robert HodgesWebinar: Secrets of ClickHouse Query Performance, by Robert Hodges
Webinar: Secrets of ClickHouse Query Performance, by Robert Hodges
 
Cервер на Go для мобильной стратегии
Cервер на Go для мобильной стратегииCервер на Go для мобильной стратегии
Cервер на Go для мобильной стратегии
 
Clojutre Real Life (2012 ClojuTRE Retro Edition)
Clojutre Real Life (2012 ClojuTRE Retro Edition)Clojutre Real Life (2012 ClojuTRE Retro Edition)
Clojutre Real Life (2012 ClojuTRE Retro Edition)
 
Clojure@Nuday
Clojure@NudayClojure@Nuday
Clojure@Nuday
 
Javascript Everywhere From Nose To Tail
Javascript Everywhere From Nose To TailJavascript Everywhere From Nose To Tail
Javascript Everywhere From Nose To Tail
 
Uber_Trips_Visualizations
Uber_Trips_VisualizationsUber_Trips_Visualizations
Uber_Trips_Visualizations
 
Hacking the Internet of Things for Fun & Profit
Hacking the Internet of Things for Fun & ProfitHacking the Internet of Things for Fun & Profit
Hacking the Internet of Things for Fun & Profit
 
Map kit light
Map kit lightMap kit light
Map kit light
 
Benchx: An XQuery benchmarking web application
Benchx: An XQuery benchmarking web application Benchx: An XQuery benchmarking web application
Benchx: An XQuery benchmarking web application
 
Full-Stack JavaScript with Node.js
Full-Stack JavaScript with Node.jsFull-Stack JavaScript with Node.js
Full-Stack JavaScript with Node.js
 
MongoDB .local Paris 2020: Adéo @MongoDB : MongoDB Atlas & Leroy Merlin : et ...
MongoDB .local Paris 2020: Adéo @MongoDB : MongoDB Atlas & Leroy Merlin : et ...MongoDB .local Paris 2020: Adéo @MongoDB : MongoDB Atlas & Leroy Merlin : et ...
MongoDB .local Paris 2020: Adéo @MongoDB : MongoDB Atlas & Leroy Merlin : et ...
 
Hadoop - MongoDB Webinar June 2014
Hadoop - MongoDB Webinar June 2014Hadoop - MongoDB Webinar June 2014
Hadoop - MongoDB Webinar June 2014
 
ClickHouse and the Magic of Materialized Views, By Robert Hodges and Altinity...
ClickHouse and the Magic of Materialized Views, By Robert Hodges and Altinity...ClickHouse and the Magic of Materialized Views, By Robert Hodges and Altinity...
ClickHouse and the Magic of Materialized Views, By Robert Hodges and Altinity...
 
Nginx cache api delete
Nginx cache api deleteNginx cache api delete
Nginx cache api delete
 
Exchange 2010/2013 Exchange Management Shell Command
Exchange 2010/2013 Exchange Management Shell CommandExchange 2010/2013 Exchange Management Shell Command
Exchange 2010/2013 Exchange Management Shell Command
 
Nodejs mongoose
Nodejs mongooseNodejs mongoose
Nodejs mongoose
 
MongoDB .local Toronto 2019: Using Change Streams to Keep Up with Your Data
MongoDB .local Toronto 2019: Using Change Streams to Keep Up with Your DataMongoDB .local Toronto 2019: Using Change Streams to Keep Up with Your Data
MongoDB .local Toronto 2019: Using Change Streams to Keep Up with Your Data
 

Semelhante a (BDT401) Big Data Orchestra - Harmony within Data Analysis Tools | AWS re:Invent 2014

Writing robust Node.js applications
Writing robust Node.js applicationsWriting robust Node.js applications
Writing robust Node.js applications
Tom Croucher
 
kissy-past-now-future
kissy-past-now-futurekissy-past-now-future
kissy-past-now-future
yiming he
 

Semelhante a (BDT401) Big Data Orchestra - Harmony within Data Analysis Tools | AWS re:Invent 2014 (20)

Building and Deploying Application to Apache Mesos
Building and Deploying Application to Apache MesosBuilding and Deploying Application to Apache Mesos
Building and Deploying Application to Apache Mesos
 
node.js and the AR.Drone: building a real-time dashboard using socket.io
node.js and the AR.Drone: building a real-time dashboard using socket.ionode.js and the AR.Drone: building a real-time dashboard using socket.io
node.js and the AR.Drone: building a real-time dashboard using socket.io
 
Converting a Rails application to Node.js
Converting a Rails application to Node.jsConverting a Rails application to Node.js
Converting a Rails application to Node.js
 
Apache Kafka, HDFS, Accumulo and more on Mesos
Apache Kafka, HDFS, Accumulo and more on MesosApache Kafka, HDFS, Accumulo and more on Mesos
Apache Kafka, HDFS, Accumulo and more on Mesos
 
Declarative & workflow based infrastructure with Terraform
Declarative & workflow based infrastructure with TerraformDeclarative & workflow based infrastructure with Terraform
Declarative & workflow based infrastructure with Terraform
 
Flux and InfluxDB 2.0 by Paul Dix
Flux and InfluxDB 2.0 by Paul DixFlux and InfluxDB 2.0 by Paul Dix
Flux and InfluxDB 2.0 by Paul Dix
 
Future Decoded - Node.js per sviluppatori .NET
Future Decoded - Node.js per sviluppatori .NETFuture Decoded - Node.js per sviluppatori .NET
Future Decoded - Node.js per sviluppatori .NET
 
Writing robust Node.js applications
Writing robust Node.js applicationsWriting robust Node.js applications
Writing robust Node.js applications
 
Hazelcast and MongoDB at Cloud CMS
Hazelcast and MongoDB at Cloud CMSHazelcast and MongoDB at Cloud CMS
Hazelcast and MongoDB at Cloud CMS
 
Bonnes pratiques de développement avec Node js
Bonnes pratiques de développement avec Node jsBonnes pratiques de développement avec Node js
Bonnes pratiques de développement avec Node js
 
KSQL - Stream Processing simplified!
KSQL - Stream Processing simplified!KSQL - Stream Processing simplified!
KSQL - Stream Processing simplified!
 
Burn down the silos! Helping dev and ops gel on high availability websites
Burn down the silos! Helping dev and ops gel on high availability websitesBurn down the silos! Helping dev and ops gel on high availability websites
Burn down the silos! Helping dev and ops gel on high availability websites
 
CouchDB Mobile - From Couch to 5K in 1 Hour
CouchDB Mobile - From Couch to 5K in 1 HourCouchDB Mobile - From Couch to 5K in 1 Hour
CouchDB Mobile - From Couch to 5K in 1 Hour
 
Developing web-apps like it's 2013
Developing web-apps like it's 2013Developing web-apps like it's 2013
Developing web-apps like it's 2013
 
Fun Teaching MongoDB New Tricks
Fun Teaching MongoDB New TricksFun Teaching MongoDB New Tricks
Fun Teaching MongoDB New Tricks
 
kissy-past-now-future
kissy-past-now-futurekissy-past-now-future
kissy-past-now-future
 
KISSY 的昨天、今天与明天
KISSY 的昨天、今天与明天KISSY 的昨天、今天与明天
KISSY 的昨天、今天与明天
 
Monitoring Your ISP Using InfluxDB Cloud and Raspberry Pi
Monitoring Your ISP Using InfluxDB Cloud and Raspberry PiMonitoring Your ISP Using InfluxDB Cloud and Raspberry Pi
Monitoring Your ISP Using InfluxDB Cloud and Raspberry Pi
 
Emerging Languages: A Tour of the Horizon
Emerging Languages: A Tour of the HorizonEmerging Languages: A Tour of the Horizon
Emerging Languages: A Tour of the Horizon
 
Writing Maintainable JavaScript
Writing Maintainable JavaScriptWriting Maintainable JavaScript
Writing Maintainable JavaScript
 

Mais de Amazon Web Services

Tools for building your MVP on AWS
Tools for building your MVP on AWSTools for building your MVP on AWS
Tools for building your MVP on AWS
Amazon Web Services
 
How to Build a Winning Pitch Deck
How to Build a Winning Pitch DeckHow to Build a Winning Pitch Deck
How to Build a Winning Pitch Deck
Amazon Web Services
 
Building a web application without servers
Building a web application without serversBuilding a web application without servers
Building a web application without servers
Amazon Web Services
 
AWS_HK_StartupDay_Building Interactive websites while automating for efficien...
AWS_HK_StartupDay_Building Interactive websites while automating for efficien...AWS_HK_StartupDay_Building Interactive websites while automating for efficien...
AWS_HK_StartupDay_Building Interactive websites while automating for efficien...
Amazon Web Services
 

Mais de Amazon Web Services (20)

Come costruire servizi di Forecasting sfruttando algoritmi di ML e deep learn...
Come costruire servizi di Forecasting sfruttando algoritmi di ML e deep learn...Come costruire servizi di Forecasting sfruttando algoritmi di ML e deep learn...
Come costruire servizi di Forecasting sfruttando algoritmi di ML e deep learn...
 
Big Data per le Startup: come creare applicazioni Big Data in modalità Server...
Big Data per le Startup: come creare applicazioni Big Data in modalità Server...Big Data per le Startup: come creare applicazioni Big Data in modalità Server...
Big Data per le Startup: come creare applicazioni Big Data in modalità Server...
 
Esegui pod serverless con Amazon EKS e AWS Fargate
Esegui pod serverless con Amazon EKS e AWS FargateEsegui pod serverless con Amazon EKS e AWS Fargate
Esegui pod serverless con Amazon EKS e AWS Fargate
 
Costruire Applicazioni Moderne con AWS
Costruire Applicazioni Moderne con AWSCostruire Applicazioni Moderne con AWS
Costruire Applicazioni Moderne con AWS
 
Come spendere fino al 90% in meno con i container e le istanze spot
Come spendere fino al 90% in meno con i container e le istanze spot Come spendere fino al 90% in meno con i container e le istanze spot
Come spendere fino al 90% in meno con i container e le istanze spot
 
Open banking as a service
Open banking as a serviceOpen banking as a service
Open banking as a service
 
Rendi unica l’offerta della tua startup sul mercato con i servizi Machine Lea...
Rendi unica l’offerta della tua startup sul mercato con i servizi Machine Lea...Rendi unica l’offerta della tua startup sul mercato con i servizi Machine Lea...
Rendi unica l’offerta della tua startup sul mercato con i servizi Machine Lea...
 
OpsWorks Configuration Management: automatizza la gestione e i deployment del...
OpsWorks Configuration Management: automatizza la gestione e i deployment del...OpsWorks Configuration Management: automatizza la gestione e i deployment del...
OpsWorks Configuration Management: automatizza la gestione e i deployment del...
 
Microsoft Active Directory su AWS per supportare i tuoi Windows Workloads
Microsoft Active Directory su AWS per supportare i tuoi Windows WorkloadsMicrosoft Active Directory su AWS per supportare i tuoi Windows Workloads
Microsoft Active Directory su AWS per supportare i tuoi Windows Workloads
 
Computer Vision con AWS
Computer Vision con AWSComputer Vision con AWS
Computer Vision con AWS
 
Database Oracle e VMware Cloud on AWS i miti da sfatare
Database Oracle e VMware Cloud on AWS i miti da sfatareDatabase Oracle e VMware Cloud on AWS i miti da sfatare
Database Oracle e VMware Cloud on AWS i miti da sfatare
 
Crea la tua prima serverless ledger-based app con QLDB e NodeJS
Crea la tua prima serverless ledger-based app con QLDB e NodeJSCrea la tua prima serverless ledger-based app con QLDB e NodeJS
Crea la tua prima serverless ledger-based app con QLDB e NodeJS
 
API moderne real-time per applicazioni mobili e web
API moderne real-time per applicazioni mobili e webAPI moderne real-time per applicazioni mobili e web
API moderne real-time per applicazioni mobili e web
 
Database Oracle e VMware Cloud™ on AWS: i miti da sfatare
Database Oracle e VMware Cloud™ on AWS: i miti da sfatareDatabase Oracle e VMware Cloud™ on AWS: i miti da sfatare
Database Oracle e VMware Cloud™ on AWS: i miti da sfatare
 
Tools for building your MVP on AWS
Tools for building your MVP on AWSTools for building your MVP on AWS
Tools for building your MVP on AWS
 
How to Build a Winning Pitch Deck
How to Build a Winning Pitch DeckHow to Build a Winning Pitch Deck
How to Build a Winning Pitch Deck
 
Building a web application without servers
Building a web application without serversBuilding a web application without servers
Building a web application without servers
 
Fundraising Essentials
Fundraising EssentialsFundraising Essentials
Fundraising Essentials
 
AWS_HK_StartupDay_Building Interactive websites while automating for efficien...
AWS_HK_StartupDay_Building Interactive websites while automating for efficien...AWS_HK_StartupDay_Building Interactive websites while automating for efficien...
AWS_HK_StartupDay_Building Interactive websites while automating for efficien...
 
Introduzione a Amazon Elastic Container Service
Introduzione a Amazon Elastic Container ServiceIntroduzione a Amazon Elastic Container Service
Introduzione a Amazon Elastic Container Service
 

Último

+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
?#DUbAI#??##{{(☎️+971_581248768%)**%*]'#abortion pills for sale in dubai@
 
Histor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slideHistor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slide
vu2urc
 

Último (20)

Apidays New York 2024 - The value of a flexible API Management solution for O...
Apidays New York 2024 - The value of a flexible API Management solution for O...Apidays New York 2024 - The value of a flexible API Management solution for O...
Apidays New York 2024 - The value of a flexible API Management solution for O...
 
From Event to Action: Accelerate Your Decision Making with Real-Time Automation
From Event to Action: Accelerate Your Decision Making with Real-Time AutomationFrom Event to Action: Accelerate Your Decision Making with Real-Time Automation
From Event to Action: Accelerate Your Decision Making with Real-Time Automation
 
HTML Injection Attacks: Impact and Mitigation Strategies
HTML Injection Attacks: Impact and Mitigation StrategiesHTML Injection Attacks: Impact and Mitigation Strategies
HTML Injection Attacks: Impact and Mitigation Strategies
 
04-2024-HHUG-Sales-and-Marketing-Alignment.pptx
04-2024-HHUG-Sales-and-Marketing-Alignment.pptx04-2024-HHUG-Sales-and-Marketing-Alignment.pptx
04-2024-HHUG-Sales-and-Marketing-Alignment.pptx
 
GenAI Risks & Security Meetup 01052024.pdf
GenAI Risks & Security Meetup 01052024.pdfGenAI Risks & Security Meetup 01052024.pdf
GenAI Risks & Security Meetup 01052024.pdf
 
Advantages of Hiring UIUX Design Service Providers for Your Business
Advantages of Hiring UIUX Design Service Providers for Your BusinessAdvantages of Hiring UIUX Design Service Providers for Your Business
Advantages of Hiring UIUX Design Service Providers for Your Business
 
GenCyber Cyber Security Day Presentation
GenCyber Cyber Security Day PresentationGenCyber Cyber Security Day Presentation
GenCyber Cyber Security Day Presentation
 
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
 
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
 
Powerful Google developer tools for immediate impact! (2023-24 C)
Powerful Google developer tools for immediate impact! (2023-24 C)Powerful Google developer tools for immediate impact! (2023-24 C)
Powerful Google developer tools for immediate impact! (2023-24 C)
 
Data Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt RobisonData Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt Robison
 
Real Time Object Detection Using Open CV
Real Time Object Detection Using Open CVReal Time Object Detection Using Open CV
Real Time Object Detection Using Open CV
 
Boost Fertility New Invention Ups Success Rates.pdf
Boost Fertility New Invention Ups Success Rates.pdfBoost Fertility New Invention Ups Success Rates.pdf
Boost Fertility New Invention Ups Success Rates.pdf
 
Tech Trends Report 2024 Future Today Institute.pdf
Tech Trends Report 2024 Future Today Institute.pdfTech Trends Report 2024 Future Today Institute.pdf
Tech Trends Report 2024 Future Today Institute.pdf
 
Finology Group – Insurtech Innovation Award 2024
Finology Group – Insurtech Innovation Award 2024Finology Group – Insurtech Innovation Award 2024
Finology Group – Insurtech Innovation Award 2024
 
Developing An App To Navigate The Roads of Brazil
Developing An App To Navigate The Roads of BrazilDeveloping An App To Navigate The Roads of Brazil
Developing An App To Navigate The Roads of Brazil
 
Histor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slideHistor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slide
 
What Are The Drone Anti-jamming Systems Technology?
What Are The Drone Anti-jamming Systems Technology?What Are The Drone Anti-jamming Systems Technology?
What Are The Drone Anti-jamming Systems Technology?
 
How to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected WorkerHow to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected Worker
 
Understanding Discord NSFW Servers A Guide for Responsible Users.pdf
Understanding Discord NSFW Servers A Guide for Responsible Users.pdfUnderstanding Discord NSFW Servers A Guide for Responsible Users.pdf
Understanding Discord NSFW Servers A Guide for Responsible Users.pdf
 

(BDT401) Big Data Orchestra - Harmony within Data Analysis Tools | AWS re:Invent 2014

  • 1.
  • 3.
  • 4. Queue Table Textual Search %LIKE% Analytical Reports BLOBs Geospatial Queries Events Table
  • 5.
  • 6.
  • 8.
  • 9. Amazon SQS Amazon Kinesis Auto-scaling Shards provisioning => Simple to set up and operate, easy to deploy new version to a new queue => More cost effective in high scale, once you tuned the system “At least once delivery” Multiple “exactly once in order delivery” => Easy to start with a single worker => A set of dedicated workers that are working in different intervals and different operations
  • 10.
  • 11. fromflaskimportFlask,request application=Flask(__name__) # main entry point for SQS, accepting only POST requests @application.route("/sqs/",methods=['POST']) defsqs(): application.logger.debug('Message was received for processing!') doc=parse_request(request) load_to_cloudsearch(doc) put_into_dynamodb(doc) return""# OK
  • 12. defload_to_cloudsearch(doc): # index document doc_serv=g.domain.get_document_service() doc_serv.add(doc[id],doc) application.logger.debug('Inserting docId: %s',doc[id]) # send index batch to CloudSearch try: doc_serv.commit() exceptCommitMismatchErrorase: application.logger.error('CommitMismatchErrorraised') formsgine.errors: application.logger.error('Error: %s',msg) raise finally: doc_serv.clear_sdf()# clear SDF for next iteration
  • 13. defput_into_dynamodb(doc): itemData=copy.deepcopy(doc) # I want different fields in DynamoDB delitemData['day']# TS is enough, day is only for faceting #Using GeoHasingfor DynamoDB lookup index geojson_location="{{"coordinates":[{0},{1}],"type":"Point"}}".format(doc['latitude'],doc['longitude']) itemData['location']=geojson_location geo_server_url="http://geo-server.elasticbeanstalk.com/wl-dynamodb-geo? point={0},{1}".format(doc['latitude'],doc['longitude'])) itemData['geohash']=int(requests.get(geo_server_url).content) itemData['geobox']=itemData['geohash']/10000000000000 # PUT into DynamoDB Table item=Item(g.eventsTable,data=itemData) item.save()
  • 14.
  • 16.
  • 17.
  • 18.
  • 19.
  • 20. // Using Leaflet to show a map functionshow_map(position){ varlatitude=position.coords.latitude; varlongitude=position.coords.longitude; map=L.map('map').setView([latitude,longitude],15); L.tileLayer('http://{s}.tiles.mapbox.com/v3/guyernest.jngcdfig/{z}/{x}/{y}.png',{ attribution:'Map data &copy…’,maxZoom:18 }).addTo(map); // Call DDB to show markers on the map varbounds=map.getBounds(); varrequest={minLng:bounds.getWest(),maxLng:bounds.getEast(),minLat: bounds.getSouth(),maxLat:bounds.getNorth()} $.ajax({ type:"POST", url:'http://geo-server.elasticbeanstalk.com/wl-dynamodb-geo', data:'{ action: query-rectangle, request :'+JSON.stringify(request) +'}', success:success, dataType:"json" });
  • 21. functionsuccess(data){ data.result.forEach(function(entry){ varmarker=L.marker([parseFloat(entry.latitude),parseFloat(entry.longitude)]) marker.addTo(map); marker.bindPopup("<b>"+entry.comment+"</b><br>"+"<imgsrc='"+entry.img”’>") .openPopup(); }); }
  • 22.
  • 23.
  • 24.
  • 25. Amazon CloudSearch Native Geospatial support •Latitude and Longitude data types •Region search •Distance sort •Supports mobile
  • 26.
  • 27.
  • 28.
  • 29. emrMyKeyPair-- bootstrap-actions Path=s3://support.elasticmapreduce/spark/install-spark { "ClusterId": "j-38X214F58P62M" } >>awsemrssh--cluster-id j-38X214F58P62M --key-pair-fileMyKeyPair.pem
  • 30. valsqlContext=neworg.apache.spark.sql.SQLContext(sc) importsqlContext._ // Define the schema using a case class. caseclassEvent(event_id:String,time:String, latitude: Float, longitude: Float) // Create an RDD of Event objects from S3 “folder” and register it as a table. valevents=sc.textFile("s3://spark-bucket-demo/spark/events").map(_.split(",")). map(p=>Event(p(0),p(1), p(2).trim.toFloat), p(3).trim.toFloat)) events.registerAsTable(”events”) // SQL statements can be run by using the SQL methods provided by sqlContext. Valoct= sql("SELECTevent_idFROMeventsWHEREtime>=‘2014-10-01’ANDtime<=‘2014-11-31’”) // The results of SQL queries are SchemaRDDsand support all the normal RDD operations. // The columns of a row in the result can be accessed by ordinal. oct.map(t=>”event-id: "+t(0)).collect().foreach(println)
  • 31.
  • 33.
  • 34.
  • 35. ###############Setup for initial small environment ############### # Kinesis awskinesiscreate-stream --stream-name "StreamName"--shard-count 1 # CloudSearch awscloudsearchcreate-domain --domain-name "SearchDomain" # DynamoDB awsdynamodbcreate-table --attribute-definitions --table-name "TableName"--key-schema AttributeName=Id,KeyType=HASH Attribute.. # Redshift awsredshiftcreate-cluster --cluster-identifier "ClusterID"--cluster-type single-node --node-type… # EMR awsemrrun-job-flow --name"JobFlow" --instances {"MasterInstanceType": "m1.medium", "SlaveInstanceType": "m1.medium", "InstanceCount”:} } --steps[ { "Name": "Analyze Positions", "ActionOnFailure": "CONTINUE", "HadoopJarStep": {"Jar": "s3://emr-steps/AnalizePositions.jar", } } ] # Kinesis awskinesiscreate-stream --stream-name "StreamName"- -shard-count 1 # Redshift awsredshiftcreate-cluster --cluster-identifier "ClusterID"--cluster-type single-node --node-type dw2.large --master-username "master- username"--master-user-password "master-user- password"
  • 36. #################### Scaling the infrastructure when needed #################### # Kinesis awskinesissplit-shard --stream-name"StreamName"--shard-to-split $SHARD_ID--new- starting-hash-key$MID_HASH # CloudSearch awscloudsearchupdate-scaling-parameters --domain-name"SearchDomain"--scaling- parameters DesiredInstanceType=search.m2.xlarge,DesiredReplicationCount=2 # DynamoDB awsdynamodbupdate-table --table-name"TableName"--provisioned-throughput ReadCapacityUnits=100,WriteCapacityUnits=20 # Redshift awsredshiftmodify-cluster --cluster-identifier"ClusterID"--number-of-nodes 2 # EMR awsemradd-instance-groups --job-flow-id"JobFlow" --instance-groups Name=insGroup,Market=SPOT,InstanceRole=TASK,BidPrice='0.3',InstanceType=m1.medium,InstanceCount=2
  • 37.
  • 38.
  • 39. // update an event as close only is the reports is coming from the same geo-box Tabletable=dynamo.getTable(TABLE_NAME); table.updateItem("event-id","7982e605-dc7d-4199-bc3e-d449733932e2”, // update expression "set status = 'close'", // condition expression "geobox= :geobox", null, newValueMap() .withInt(":geobox",515811) );
  • 40. SDKs Java Python (boto) PHP .NET Ruby Node.js iOS Android AWS Toolkit for Visual Studio AWS Toolkit for Eclipse AWS Tools for Windows PowerShell AWS CLI JavaScript new!
  • 41.
  • 42.
  • 43.
  • 44.
  • 45.
  • 46.
  • 47.
  • 48. Learn from AWS big data experts start-to-finish post on analyzing and visualizing big data blogs.aws.amazon.com/bigdata
  • 49. Please give us your feedback on this session. Complete session evaluations and earn re:Invent swag. http://bit.ly/awsevals