Mais conteúdo relacionado Semelhante a ストリーミングデータのアドホック分析エンジンの比較 (20) ストリーミングデータのアドホック分析エンジンの比較3. • 2011/04
• 2015/09
•
• Druid (KDP, 2015)
• RDB NoSQL ( , 2016; : HBase )
• ESP8266 Wi-Fi IoT (KDP, 2016)
•
• (WebDB Forum 2014)
• Spark Streaming (Spark Meetup December; 2015)
• Kafka AWS Kinesis (Apache Kafka Meetup Japan #1; 2016)
• (FutureOfData; 2016)
• Queryable State for Kafka Streams (Apache Kafka Meetup Japan #2; 2016)
• Apache Spark ( Geek Night #11; 2016)
3
12. • SQL
• SELECT GROUPBY JOIN
•
•
• AWS Kinesis Apache Kafka
• &
•
•
•
12
23. JSON
SELECT
info#>'{features, 0, geometry, coordinates}' as coord
FROM geo_view;
SELECT
info->'features'->0->'geometry'->'coordinates' as coord
FROM geo_view;
SELECT
JSON_EXTRACT_JSON(info::features, 0, 'geometry', 'coordinates')
FROM geo;
SELECT
JSON_EXTRACT_JSON(info, 'features', 0, 'geometry', 'coordinates')
FROM geo;
SELECT
FIELD(info, 'features[0].geometry.coordinates')
FROM geo;
SELECT
FIELD(ARRAY_ELEMENT(FIELD(info, 'features'), 0), 'geometry.coordinates')
FROM geo;
31. Kinesis Analytics: SQL
CREATE OR REPLACE STREAM "DESTINATION_SQL_STREAM" (
min_ask INTEGER,
max_bid INTEGER,
avg_last INTEGER
);
CREATE OR REPLACE PUMP "TEST_STREAM_PUMP"
AS INSERT INTO "DESTINATION_SQL_STREAM"
SELECT STREAM
MIN("ask") as min_ask,
MAX("bid") as max_bid,
AVG("last") as avg_last
FROM "SOURCE_SQL_STREAM_001"
GROUP BY PARTITION_KEY,
FLOOR(("SOURCE_SQL_STREAM_001".ROWTIME
- TIMESTAMP '1970-01-01 00:00:00') SECOND / 120 TO SECOND);
CREATE STREAM
CREATE PUMP
31
34. PipelineDB: Kinesis
CREATE STREAM bitcoins (info JSON);
SELECT pipeline_kinesis.add_endpoint('input_stream',
'ap-northeast-1',
'/path_to_credential_file');
SELECT pipeline_kinesis.consume_begin('input_stream',
'kinesis-stream-name',
'bitcoins',
format := 'json');
CREATE CONTINUOUS VIEW bitcoins_view AS SELECT info FROM bitcoins;
SELECT * FROM bitcoins_view LIMIT 10;
34
CREATE STREAM / SELECT pipeline_*.consume_begin
CREATE CONTINUOUS VIEW
35. PipelineDB: Kafka
CREATE STREAM bitcoins (info JSON);
SELECT pipeline_kafka.add_broker('172.17.0.3:9092');
SELECT pipeline_kafka.consume_begin('test-bitcoin-j',
'bitcoins',
format := ‘json');
CREATE CONTINUOUS VIEW bitcoins_view AS SELECT info FROM bitcoins;
SELECT * FROM bitcoins_view LIMIT 10;
CREATE STREAM / SELECT pipeline_*.consume_begin
CREATE CONTINUOUS VIEW
35
37. MemSQL: Kafka
CREATE TABLE bitcoins (info JSON);
CREATE PIPELINE `test_kafka_bitcoin` AS LOAD DATA
KAFKA '172.17.0.3:9092/test-bitcoin-j' INTO TABLE `bitcoins`;
TEST PIPELINE test_kafka_bitcoin LIMIT 1;
START PIPELINE test_kafka_bitcoin;
CREATE VIEW bitcoins_view AS SELECT info FROM bitcoins;
SELECT * FROM bitcoins LIMIT 10;
SELECT * FROM bitcoins_view LIMIT 10;
CREATE TABLE / CREATE PIPELINE
CREATE VIEW
37
39. VoltDB: Kinesis
CREATE TABLE bitcoins (info VARCHAR(5000));
CREATE VIEW bitcoins_view AS SELECT info FROM bitcoins;
SELECT * FROM bitcoins LIMIT 10;
SELECT * FROM bitcoins_view LIMIT 10;
<deployment>
<import>
<configuration type="kinesis" format="csv" enabled="true">
<property name=“stream.name”> kinesis-stream-name </property>
<property name=“region”> ap-northeast-1 </property>
<property name="access.key"> ... </property>
<property name="secret.key"> ... </property>
<property name="procedure"> bitcoins.insert </property>
</configuration>
</import>
</deployment>
39
CREATE TABLE /
CREATE VIEW
40. VoltDB: Kafka
CREATE TABLE bitcoins (info VARCHAR(5000));
CREATE VIEW bitcoins_view AS SELECT info FROM bitcoins;
SELECT * FROM bitcoins LIMIT 10;
SELECT * FROM bitcoins_view LIMIT 10;
<deployment>
<import>
<configuration type="kafka" format="csv" enabled="true">
<property name=“topics”> test-bitcoin-j </property>
<property name=“brokers"> 172.17.0.3:9092 </property>
<property name="procedure"> bitcoins.insert </property>
</configuration>
</import>
</deployment>
40
CREATE TABLE /
CREATE VIEW
48. SQL Sliding Window
SELECT STREAM
count(*) OVER lastHour
FROM APP_STREAM
WINDOW lastHour AS (PARTITION BY ... RANGE INTERVAL '1' HOUR PRECEDING);
CREATE VIEW app_stream_view
WITH (sw = '1 hour', step_factor = 50)
AS SELECT count(*) FROM app_stream;
SELECT
count(*) OVER (PARTITION BY ... ORDER BY ...)
FROM app_stream_table;
timediff()
50. • SQL
• SELECT GROUPBY JOIN
•
•
• AWS Kinesis Apache Kafka
• &
•
•
•
50
51. Kinesis Analytics
• Kinesis Stream KPL
• Aggregated Record
• Web UI
• API aws
• AddApplicationReferenceDataSource
• S3 / UpdateApplication
• S3 1GB