AWS Athena: Difference between revisions
From Federal Burro of Information
Jump to navigationJump to search
No edit summary |
|||
(5 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
https://docs.aws.amazon.com/athena/latest/ug/functions-operators-reference-section.html | |||
== light sensor == | |||
from the light sensor data project: | from the light sensor data project: | ||
Line 14: | Line 18: | ||
'mapkey.delim' = 'undefined' | 'mapkey.delim' = 'undefined' | ||
) LOCATION 's3://mymainsqueeze/sensor/' | ) LOCATION 's3://mymainsqueeze/sensor/' | ||
TBLPROPERTIES ('has_encrypted_data'='false'); | TBLPROPERTIES ( | ||
'has_encrypted_data'='false'); | |||
</pre> | </pre> | ||
SELECT timestamp , reading1, reading2 from lightsensordb.sensordatatable LIMIT 100 | SELECT timestamp , reading1, reading2 from lightsensordb.sensordatatable LIMIT 100 | ||
string to date: | |||
date_parse(b.APIDT, '%Y-%m-%d') | |||
== transactions == | |||
If you are destroying and creating tables ofeten, tweaking and tuning, make a saved query for the create. | |||
<pre> | |||
CREATE EXTERNAL TABLE IF NOT EXISTS billing.XXX ( | |||
`posted` string, | |||
`payee` string, | |||
`address` string, | |||
`amount` float | |||
) | |||
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' | |||
WITH SERDEPROPERTIES ( | |||
'serialization.format' = ',', | |||
'field.delim' = ',' | |||
) LOCATION 's3://XXX/XXX/' | |||
TBLPROPERTIES ( | |||
'has_encrypted_data'='false', | |||
'skip.header.line.count'='1' | |||
); | |||
</pre> | |||
SELECT date_parse(posted, '%m/%d/%Y') , payee FROM "billing"."XXX" limit 100; | |||
transaction by day: | |||
SELECT date_parse(posted, '%m/%d/%Y') as date , count(payee) FROM "billing"."XXX" GROUP by date_parse(posted, '%m/%d/%Y') limit 100; | |||
spending_by_payee_overmonth | |||
SELECT date_trunc('month', date_parse(posted, '%m/%d/%Y')) AS month, payee, sum(amount) AS total | |||
FROM "billing"."XXX" | |||
GROUP BY date_trunc('month', date_parse(posted, '%m/%d/%Y')), payee | |||
ORDER BY date_trunc('month', date_parse(posted, '%m/%d/%Y')), sum(amount) limit 100; | |||
if your data has a header remote it! | |||
TBLPROPERTIES ( 'skip.header.line.count'='1'); |
Latest revision as of 15:26, 28 March 2018
https://docs.aws.amazon.com/athena/latest/ug/functions-operators-reference-section.html
light sensor
from the light sensor data project:
CREATE EXTERNAL TABLE IF NOT EXISTS lightsensordb.sensordatatable ( `timestamp` int, `reading1` float, `reading2` float ) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ( 'serialization.format' = ' ', 'field.delim' = ' ', 'collection.delim' = 'undefined', 'mapkey.delim' = 'undefined' ) LOCATION 's3://mymainsqueeze/sensor/' TBLPROPERTIES ( 'has_encrypted_data'='false');
SELECT timestamp , reading1, reading2 from lightsensordb.sensordatatable LIMIT 100
string to date:
date_parse(b.APIDT, '%Y-%m-%d')
transactions
If you are destroying and creating tables ofeten, tweaking and tuning, make a saved query for the create.
CREATE EXTERNAL TABLE IF NOT EXISTS billing.XXX ( `posted` string, `payee` string, `address` string, `amount` float ) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ( 'serialization.format' = ',', 'field.delim' = ',' ) LOCATION 's3://XXX/XXX/' TBLPROPERTIES ( 'has_encrypted_data'='false', 'skip.header.line.count'='1' );
SELECT date_parse(posted, '%m/%d/%Y') , payee FROM "billing"."XXX" limit 100;
transaction by day:
SELECT date_parse(posted, '%m/%d/%Y') as date , count(payee) FROM "billing"."XXX" GROUP by date_parse(posted, '%m/%d/%Y') limit 100;
spending_by_payee_overmonth
SELECT date_trunc('month', date_parse(posted, '%m/%d/%Y')) AS month, payee, sum(amount) AS total FROM "billing"."XXX" GROUP BY date_trunc('month', date_parse(posted, '%m/%d/%Y')), payee ORDER BY date_trunc('month', date_parse(posted, '%m/%d/%Y')), sum(amount) limit 100;
if your data has a header remote it!
TBLPROPERTIES ( 'skip.header.line.count'='1');