AWS Notes/SQL on VPC Flow Logs

From Federal Burro of Information
Jump to navigationJump to search
select
 vpc_id as VPC,
 sum( cast(bytes as DECIMAL )  / 1073741824  as GB,
 dt
from vpc_flow_logs 
group by vpc_id , dt
limit 10;
select
 dt,
 vpc_id as VPC,
 bytes
from vpc_flow_logs
limit 10;

ALTER TABLE ADD PARTITION

ALTER TABLE orders ADD
  PARTITION (dt = '2016-05-14') LOCATION 's3://mystorage/path/to/INDIA_14_May_2016/'
  PARTITION (dt = '2016-05-15') LOCATION 's3://mystorage/path/to/INDIA_15_May_2016/';

If you choose "load partitions" it will execute: "MSCK REPAIR TABLE vpc_flow_logs;"

MSCK REPAIR TABLE vpc_flow_logs;

which gives the following out put:

Partitions not in metastore:	vpc_flow_logs:2019/09/21	vpc_flow_logs:2019/09/22	vpc_flow_logs:2019/09/23
ALTER TABLE  vpc_flow_logs ADD PARTITION ( dt = '2019-09-21' ) location 's3://vpc-flow-logs/AWSLogs/XXX/vpcflowlogs/us-east-1/2019/09/21';
ALTER TABLE  vpc_flow_logs ADD PARTITION ( dt = '2019-09-22' ) location 's3://vpc-flow-logs/AWSLogs/XXX/vpcflowlogs/us-east-1/2019/09/22';
ALTER TABLE  vpc_flow_logs ADD PARTITION ( dt = '2019-09-23' ) location 's3://vpc-flow-logs/AWSLogs/XXX/vpcflowlogs/us-east-1/2019/09/23';

show partitions vpc_flow_logs;

select
 dt,
 vpc_id as VPC,
 sum(bytes) as bytes,
 cast( sum(bytes) as decimal )/1024 as Kilobytes,
 cast( sum(bytes) as decimal )/1048576 as Megabytes,
 cast( sum(bytes) as decimal(38,2) )/1073741824 as Gigabytes
from vpc_flow_logs
group by dt, vpc_id
order by dt,vpc_id
limit 10;
select
interfaceid, cast( sum(bytes) as decimal(38,2) )/1073741824 as Gigabytes
from vpc_flow_logs
WHERE vpc_id in ( 'vpc-XXX' )
group by interfaceid
order by Gigabytes
limit 10;
select
 dt,
 vpc_id,
 destinationport,
 cast( sum(bytes) as decimal(38,2) )/1073741824 as Gigabytes,
 sum(bytes)
FROM vpc_flow_logs
GROUP BY dt,vpc_id,destinationport
ORDER BY dt