AWS Athena: Difference between revisions
From Federal Burro of Information
Jump to navigationJump to search
No edit summary |
|||
(2 intermediate revisions by the same user not shown) | |||
Line 50: | Line 50: | ||
); | ); | ||
</pre> | </pre> | ||
SELECT date_parse(posted, '%m/%d/%Y') , payee FROM "billing"."XXX" limit 100; | |||
transaction by day: | |||
SELECT date_parse(posted, '%m/%d/%Y') as date , count(payee) FROM "billing"."XXX" GROUP by date_parse(posted, '%m/%d/%Y') limit 100; | |||
spending_by_payee_overmonth | |||
SELECT date_trunc('month', date_parse(posted, '%m/%d/%Y')) AS month, payee, sum(amount) AS total | |||
FROM "billing"."XXX" | |||
GROUP BY date_trunc('month', date_parse(posted, '%m/%d/%Y')), payee | |||
ORDER BY date_trunc('month', date_parse(posted, '%m/%d/%Y')), sum(amount) limit 100; | |||
Latest revision as of 15:26, 28 March 2018
https://docs.aws.amazon.com/athena/latest/ug/functions-operators-reference-section.html
light sensor
from the light sensor data project:
CREATE EXTERNAL TABLE IF NOT EXISTS lightsensordb.sensordatatable ( `timestamp` int, `reading1` float, `reading2` float ) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ( 'serialization.format' = ' ', 'field.delim' = ' ', 'collection.delim' = 'undefined', 'mapkey.delim' = 'undefined' ) LOCATION 's3://mymainsqueeze/sensor/' TBLPROPERTIES ( 'has_encrypted_data'='false');
SELECT timestamp , reading1, reading2 from lightsensordb.sensordatatable LIMIT 100
string to date:
date_parse(b.APIDT, '%Y-%m-%d')
transactions
If you are destroying and creating tables ofeten, tweaking and tuning, make a saved query for the create.
CREATE EXTERNAL TABLE IF NOT EXISTS billing.XXX ( `posted` string, `payee` string, `address` string, `amount` float ) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ( 'serialization.format' = ',', 'field.delim' = ',' ) LOCATION 's3://XXX/XXX/' TBLPROPERTIES ( 'has_encrypted_data'='false', 'skip.header.line.count'='1' );
SELECT date_parse(posted, '%m/%d/%Y') , payee FROM "billing"."XXX" limit 100;
transaction by day:
SELECT date_parse(posted, '%m/%d/%Y') as date , count(payee) FROM "billing"."XXX" GROUP by date_parse(posted, '%m/%d/%Y') limit 100;
spending_by_payee_overmonth
SELECT date_trunc('month', date_parse(posted, '%m/%d/%Y')) AS month, payee, sum(amount) AS total FROM "billing"."XXX" GROUP BY date_trunc('month', date_parse(posted, '%m/%d/%Y')), payee ORDER BY date_trunc('month', date_parse(posted, '%m/%d/%Y')), sum(amount) limit 100;
if your data has a header remote it!
TBLPROPERTIES ( 'skip.header.line.count'='1');