跳转至

Export Cloudwatch Log Group to S3

create bucket and policy

bucket_name=my-exported-logs-2358-$RANDOM
aws_region=us-east-2
envsubst >policy.json <<-EOF
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Action": "s3:GetBucketAcl",
            "Effect": "Allow",
            "Resource": "arn:aws:s3:::${bucket_name}",
            "Principal": { "Service": "logs.${aws_region}.amazonaws.com" }
        },
        {
            "Action": "s3:PutObject" ,
            "Effect": "Allow",
            "Resource": "arn:aws:s3:::${bucket_name}/*",
            "Condition": { "StringEquals": { "s3:x-amz-acl": "bucket-owner-full-control" } },
            "Principal": { "Service": "logs.${aws_region}.amazonaws.com" }
        }
    ]
}
EOF

aws s3api create-bucket --bucket ${bucket_name} \
--create-bucket-configuration LocationConstraint=${aws_region} \
--region ${aws_region}

# will overwrite existed policy attached on s3 bucket !
aws s3api put-bucket-policy --bucket ${bucket_name} --policy file://policy.json

export task

loggroup_name="/aws/eks/ekscluster1/cluster"
begin_time=$(date --date "2 days ago" +%s)
end_time=$(date --date "1 days ago" +%s)

suffix=$(date +%Y%m%d-%H%M%S)
#for i in kube-apiserver-audit kube-apiserver kube-scheduler authenticator kube-controller-manager cloud-controller-manager ; do
for i in kube-apiserver-audit ; do
  task_name=${i}-${suffix}
  # create export task
  aws logs create-export-task --task-name ${task_name} \
    --log-group-name ${loggroup_name} \
    --log-stream-name-prefix ${i} \
    --from ${begin_time}000 --to ${end_time}000 \
    --destination ${bucket_name} \
    --destination-prefix ${i} |tee /tmp/$$
  task_id=$(cat /tmp/$$ |jq -r '.taskId')
  while true; do
    # describe task
    aws logs describe-export-tasks \
      --task-id ${task_id} |tee /tmp/$$.task
    task_status=$(cat /tmp/$$.task |jq -r '.exportTasks[0].status.code')
    if [[ ${task_status} == "COMPLETED" ]]; then
      break
    fi
    sleep 10
  done
done

remaining issues

  • 需要保存导出的时间点,以便于下次从改时间点继续
  • 导出后将在指定的 prefix 中,创建 task id 为 folder ,按照所有 log stream name 作为下一层的 folder ,日志文件为 gz 压缩格式
  • 可以省略 log-stream-name-prefix 参数,导出所有日志,考虑日志如何进行二次处理
  • k8s控制日志包含类似前缀,比如 kube-apiserver-audit-xxxkube-apiserver-xxxx ,并且前者是json格式日志,后者是行日志,如何进行区分导出到不同 prefix 路径,或者导出后如何进行二次处理,参考: stream-k8s-control-panel-logs-to-s3
  • 导出消息格式问题待解决 athena-sample-query