AWS Notes/SQL on VPC Flow Logs
From Federal Burro of Information
Jump to navigationJump to search
select vpc_id as VPC, sum( cast(bytes as DECIMAL ) / 1073741824 as GB, dt from vpc_flow_logs group by vpc_id , dt limit 10;
select dt, vpc_id as VPC, bytes from vpc_flow_logs limit 10;
ALTER TABLE ADD PARTITION
ALTER TABLE orders ADD PARTITION (dt = '2016-05-14') LOCATION 's3://mystorage/path/to/INDIA_14_May_2016/' PARTITION (dt = '2016-05-15') LOCATION 's3://mystorage/path/to/INDIA_15_May_2016/';
If you choose "load partitions" it will execute: "MSCK REPAIR TABLE vpc_flow_logs;"
MSCK REPAIR TABLE vpc_flow_logs;
which gives the following out put:
Partitions not in metastore: vpc_flow_logs:2019/09/21 vpc_flow_logs:2019/09/22 vpc_flow_logs:2019/09/23
ALTER TABLE vpc_flow_logs ADD PARTITION ( dt = '2019-09-21' ) location 's3://vpc-flow-logs/AWSLogs/XXX/vpcflowlogs/us-east-1/2019/09/21'; ALTER TABLE vpc_flow_logs ADD PARTITION ( dt = '2019-09-22' ) location 's3://vpc-flow-logs/AWSLogs/XXX/vpcflowlogs/us-east-1/2019/09/22'; ALTER TABLE vpc_flow_logs ADD PARTITION ( dt = '2019-09-23' ) location 's3://vpc-flow-logs/AWSLogs/XXX/vpcflowlogs/us-east-1/2019/09/23';
show partitions vpc_flow_logs;
select dt, vpc_id as VPC, sum(bytes) as bytes, cast( sum(bytes) as decimal )/1024 as Kilobytes, cast( sum(bytes) as decimal )/1048576 as Megabytes, cast( sum(bytes) as decimal(38,2) )/1073741824 as Gigabytes from vpc_flow_logs group by dt, vpc_id order by dt,vpc_id limit 10;
select interfaceid, cast( sum(bytes) as decimal(38,2) )/1073741824 as Gigabytes from vpc_flow_logs WHERE vpc_id in ( 'vpc-XXX' ) group by interfaceid order by Gigabytes limit 10;
select dt, vpc_id, destinationport, cast( sum(bytes) as decimal(38,2) )/1073741824 as Gigabytes, sum(bytes) FROM vpc_flow_logs GROUP BY dt,vpc_id,destinationport ORDER BY dt