From: Justin Wind Date: Wed, 12 Oct 2016 18:42:27 +0000 (-0700) Subject: initial import X-Git-Url: https://git.squeep.com/?a=commitdiff_plain;h=164fb4ac7aebf84ca89433aeee8d16771fb8b7da;p=awsible initial import --- diff --git a/README.md b/README.md new file mode 100644 index 0000000..80c04f5 --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ +# AWSible - Autonomous Ansible on AWS + +This is a simple framework which runs on a small dedicated instance in an Amazon account which will respond to new instances being spawned. + diff --git a/ansible.cfg b/ansible.cfg new file mode 100644 index 0000000..a48806d --- /dev/null +++ b/ansible.cfg @@ -0,0 +1,5 @@ +[defaults] +host_key_checking = False +inventory = inventory +remote_user = ec2-user +private_key_file = keys/awskey.pem diff --git a/group_vars/all b/group_vars/all new file mode 100644 index 0000000..257b7ad --- /dev/null +++ b/group_vars/all @@ -0,0 +1,3 @@ +--- +ACCT_ID: 123456789012 +VPC_SUBNETS: "subnet-1234abcd,subnet-5678ef01" diff --git a/group_vars/management b/group_vars/management new file mode 100644 index 0000000..98a2263 --- /dev/null +++ b/group_vars/management @@ -0,0 +1,6 @@ +MANAGEMENT_DATA_ROOT: /data/management +MANAGEMENT_EVENT_QUEUE: management-events +MANAGEMENT_EVENT_FAILURE_QUEUE: management-events-failed +MANAGEMENT_NOTICE_ARN: arn:aws:sns:us-east-1:123456789012:management-alerts +MANAGEMENT_BACKUP_S3_BUCKET: backups +MANAGEMENT_SUBNET: "subnet-1234abcd" diff --git a/inventory/asg-inventory.py b/inventory/asg-inventory.py new file mode 100755 index 0000000..74e21b0 --- /dev/null +++ b/inventory/asg-inventory.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python +'''\ +Generate a JSON object containing the names of all the AWS Autoscaling +Groups in an account and the IPs of the Instances within them, suitable +for use as an Ansible inventory. +''' + +import argparse +import boto3 +import json +import sys +from multiprocessing.dummy import Pool as ThreadPool +from functools import partial + + +DEFAULT_REGIONS = ['us-east-1', 'us-west-2'] +HOSTVARS = {} + + +def allASGInstances(asgc): + 'Return a tuple of a dict of each ASG name listing the instance IDs within it, and a list of all instance IDs.' + asgs = {} + instanceIds = [] + args = {} + while True: + response = asgc.describe_auto_scaling_groups(**args) + for asg in response['AutoScalingGroups']: + asgs[asg['AutoScalingGroupName']] = [i['InstanceId'] for i in asg['Instances']] + instanceIds += asgs[asg['AutoScalingGroupName']] + if 'NextToken' not in response: + break + args = {'NextToken': response['NextToken']} + return (asgs, instanceIds) + + +def allInstanceIPs(ec2c, InstanceIds=None, publicIPs=False): + 'Return a dict of each Instance ID with its addresses.' + instances = {} + args = {} + IPType = 'PublicIpAddress' if publicIPs else 'PrivateIpAddress' + if InstanceIds is not None: + args['InstanceIds'] = InstanceIds + while True: + response = ec2c.describe_instances(**args) + for resv in response['Reservations']: + for inst in resv['Instances']: + if IPType in inst: + instances[inst['InstanceId']] = inst[IPType] + if 'NextToken' not in response: + break + args = {'NextToken': response['NextToken']} + return instances + + +def regionInventory(sessionArgs, publicIPs=False): + 'Return dict results for one region.' + session = boto3.session.Session(**sessionArgs) + asgc = session.client('autoscaling') + ec2c = session.client('ec2') + + # get dict of ASG names and associated Instance Ids, plus list of all Instance Ids referenced by ASGs + (ASGs, AllInstanceIds) = allASGInstances(asgc) + + # get list of instance IPs for all instance Ids used by ASGs + AllInstanceIPs = allInstanceIPs(ec2c, InstanceIds=AllInstanceIds, publicIPs=publicIPs) + + # a group for every Instance Id + inventory = {iid:[AllInstanceIPs[iid]] for iid in AllInstanceIPs} + + # add ASG dict, replacing ASG Instance Id with instance IP + inventory.update({asg:[AllInstanceIPs[iid] for iid in ASGs[asg]] for asg in ASGs}) + + return inventory + + +def mergeDictOfLists(a, b): + 'There is likely a better way of doing this, but right now I have a headache.' + for key in b: + if key in a: + a[key] += b[key] + else: + a[key] = b[key] + return a + + +parser = argparse.ArgumentParser(description='dynamic Ansible inventory from AWS Autoscaling Groups') +parser.add_argument('--public', action='store_true', help='inventory public IPs (default: private IPs)') +parser.add_argument('--profile', metavar='PROFILE', dest='profile_name', help='AWS Profile (default: current IAM Role)') +parser.add_argument('--regions', metavar='REGION', nargs='+', default=DEFAULT_REGIONS, help='AWS Regions (default: %(default)s)') +parser.add_argument('--list', action='store_true') +parser.add_argument('--host', nargs=1) +args = parser.parse_args() + +if args.host: + print(json.dumps(HOSTVARS)) + sys.exit() + +# create sessionArgs for each region +regionArgs = [{'region_name': region} for region in args.regions] +if args.profile_name: + for arg in regionArgs: + arg.update({'profile_name': args.profile_name}) + +# pin the non-variant option +invf = partial(regionInventory, publicIPs=args.public) + +# query regions concurrently +pool = ThreadPool(len(regionArgs)) +regionInventories = pool.map(invf, regionArgs) +pool.close() +pool.join() + +# combine regions +inventory = reduce(mergeDictOfLists, regionInventories, {}) +inventory['_meta'] = {'hostvars': HOSTVARS} + +print(json.dumps(inventory)) diff --git a/management.yml b/management.yml new file mode 100644 index 0000000..b308c43 --- /dev/null +++ b/management.yml @@ -0,0 +1,11 @@ +--- +- hosts: localhost + connection: local + roles: + - aws-infrastructure + +- hosts: management + become: true + roles: + - common + - management diff --git a/roles/aws-infrastructure/files/base-policy.json b/roles/aws-infrastructure/files/base-policy.json new file mode 100644 index 0000000..4b193ad --- /dev/null +++ b/roles/aws-infrastructure/files/base-policy.json @@ -0,0 +1,24 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "Stmt1405357963000", + "Effect": "Allow", + "Action": [ + "autoscaling:Describe*", + "cloudwatch:ListMetrics", + "cloudwatch:GetMetricStatistics", + "cloudwatch:Describe*", + "ec2:CreateTags", + "ec2:DeleteTags", + "ec2:Describe*", + "elasticloadbalancing:Describe*", + "logs:DescribeLogStreams", + "logs:PutLogEvents" + ], + "Resource": [ + "*" + ] + } + ] +} diff --git a/roles/aws-infrastructure/files/management-policy.json b/roles/aws-infrastructure/files/management-policy.json new file mode 100644 index 0000000..783ef44 --- /dev/null +++ b/roles/aws-infrastructure/files/management-policy.json @@ -0,0 +1,21 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "Stmt1474056153000", + "Effect": "Allow", + "Action": [ + "autoscaling:*", + "ec2:*", + "elasticloadbalancing:*", + "iam:PassRole", + "iam:GetServerCertificate", + "sns:*", + "sqs:*" + ], + "Resource": [ + "*" + ] + } + ] +} diff --git a/roles/aws-infrastructure/tasks/main.yml b/roles/aws-infrastructure/tasks/main.yml new file mode 100644 index 0000000..49bc640 --- /dev/null +++ b/roles/aws-infrastructure/tasks/main.yml @@ -0,0 +1,100 @@ +- action: ec2_facts + +- name: Managment queue. + sqs_queue: + state: present + name: "{{ MANAGEMENT_EVENT_QUEUE }}" + default_visibility_timeout: 30 + message_retention_period: 345600 + maximum_message_size: 262144 + delivery_delay: 0 + receive_message_wait_time: 20 + +- name: Management failure queue. + sqs_queue: + state: present + name: "{{ MANAGEMENT_EVENT_FAILURE_QUEUE }}" + default_visibility_timeout: 30 + message_retention_period: 1209600 + maximum_message_size: 262144 + delivery_delay: 0 + receive_message_wait_time: 0 + +- name: Management topic and subscription. + sns_topic: + state: present + name: "{{ MANAGEMENT_EVENT_QUEUE }}" + display_name: "management" + purge_subscriptions: False + subscriptions: + - endpoint: "arn:aws:sqs:{{ ansible_ec2_placement_region }}:{{ ACCT_ID }}:{{ MANAGEMENT_EVENT_QUEUE }}" + protocol: "sqs" + +- name: Ensure management backup bucket exists. + s3_bucket: + state: present + when: MANAGEMENT_BACKUP_S3_BUCKET|defined + name: "{{ MANAGEMENT_BACKUP_S3_BUCKET }}" + +- name: sg ssh + ec2_group: + state: present + name: sshOnly + description: "allow ssh from anywhere" + purge_rules: false + rules: + - proto: tcp + from_port: 22 + to_port: 22 + cidr_ip: 0.0.0.0/0 + rules_egress: + - proto: all + cidr_ip: 0.0.0.0/0 + + - name: sg management-elb + ec2_group: + state: present + name: management-elb + description: "sg for internal elb for monitoring management" + purge_rules: false + rules: + - proto: tcp + from_port: 22 + to_port: 22 + cidr_ip: 0.0.0.0/0 + rules_egress: + - proto: all + cidr_ip: 0.0.0.0/0 + + - name: sg management + ec2_group: + state: present + name: management + description: "sg for management" + purge_rules: false + rules: + - proto: all + group_name: management + - proto: all + group_name: management-elb + + - name: elb management-int-elb + ec2_elb_lb: + state: present + name: management-int-elb + cross_az_load_balancing: yes + scheme: internal + subnets: "{{ MANAGEMENT_SUBNET }}" + security_group_names: + - management-elb + listeners: + - protocol: tcp + load_balancer_port: 22 + instance_port: 22 + health_check: + ping_protocol: tcp + ping_port: 22 + response_timeout: 5 + interval: 30 + unhealthy_threshold: 2 + healthy_threshold: 2 diff --git a/roles/common/files/31-branding b/roles/common/files/31-branding new file mode 100644 index 0000000..cc18720 --- /dev/null +++ b/roles/common/files/31-branding @@ -0,0 +1,5 @@ +#!/bin/sh +source /etc/profile +figlet -f small ' AWSible' +echo "This is a ${CLOUD_MODULE} system in the ${CLOUD_ACCOUNT} environment." +echo diff --git a/roles/common/files/aws.config.j2 b/roles/common/files/aws.config.j2 new file mode 100644 index 0000000..c74858a --- /dev/null +++ b/roles/common/files/aws.config.j2 @@ -0,0 +1,2 @@ +[default] +region = {{ ansible_ec2_placement_region }} diff --git a/roles/common/files/awsible.sh b/roles/common/files/awsible.sh new file mode 100644 index 0000000..00a9ec1 --- /dev/null +++ b/roles/common/files/awsible.sh @@ -0,0 +1,16 @@ +# configure environment with AWS info +export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id` +export PUBLIC_HOSTNAME=`curl -s http://169.254.169.254/latest/meta-data/public-hostname` +export PUBLIC_IP=`curl -s http://169.254.169.254/latest/meta-data/public-ipv4` +export PRIVATE_IP=`curl -s http://169.254.169.254/latest/meta-data/local-ipv4` +export EC2_AZ=`curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone/` +TMP="${#EC2_AZ}" +export EC2_REGION="${EC2_AZ:0:$TMP-1}" +unset TMP +# export CLOUD_ACCOUNT=awsAcctName... +# export CLOUD_MODULE=mytags['tags']['module']|default('(no module)') +# export CLOUD_STACK=stack|None +# export CLOUD_PHASE=mytags['tags']['phase']|default('None') + +# export CLOUD_AUTO_SCALE_GROUP= +# export CLOUD_LAUNCH_CONFIG=module-stack-country-phase-version diff --git a/roles/common/files/bash_prompt.sh b/roles/common/files/bash_prompt.sh new file mode 100644 index 0000000..a0a3d8c --- /dev/null +++ b/roles/common/files/bash_prompt.sh @@ -0,0 +1,17 @@ +#!/bin/bash +prompt_string(){ + local _normal="\[\033[00m\]" + local _yellow="\[\033[33m\]" + local _green="\[\033[32m\]" + local _cyan="\[\033[36m\]" + + local _resultcode="${_yellow}\$?${_normal}" + local _addr="${_green}${PUBLIC_IP}${_normal}" + local _id="${_yellow}${INSTANCE_ID}${_normal}" + local _app="${_yellow}${CLOUD_MODULE}-${CLOUD_PHASE}${_normal}" + local _path="${_cyan}\w${_normal}" + + echo ":${_resultcode}:${_addr}:${_app}:${_id}:${_path}\n\$ " +} +export PS1=$(prompt_string) +unset -f prompt_string diff --git a/roles/common/tasks/main.yml b/roles/common/tasks/main.yml new file mode 100644 index 0000000..02f3b0a --- /dev/null +++ b/roles/common/tasks/main.yml @@ -0,0 +1,128 @@ +--- +- include: volumes.yml + +- name: Create local asset directory + when: ASSET_PATH is defined + file: + state: directory + path: {{ ASSET_PATH }} + mode: "0755" + owner: root + group: root + +- name: PIP prerequisites + with_items: + - boto + - boto3 + - httplib2 + - requests + pip: + name: "{{ item }}" + state: latest + +- name: Common packages + with_items: + - aws-cli + - cowsay + - figlet + - ipsec-tools + - jq + - krb5-workstation + - pax + - rpcbind + - symlinks + - tcpdump + - strace + yum: + name: "{{ item }}" + state: latest + +- name: Facts need a home. + file: + state: directory + path: /etc/ansible/facts.d + mode: "0755" + owner: root + group: root + +- name: Install any facts. + with_items: "{{ fact_scripts|default(None) }}" + copy: + dest: /etc/ansible/facts.d/{{ item }}.fact + src: {{ item }}.fact + mode: "0755" + owner: root + group: root + register: new_facts + +- name: Refresh facts + when: new_facts|changed + setup: + +- name: Gather EC2 info + ec2_facts: + +- name: Gather own tags + ec2_tag: + state: list + region: "{{ ansible_ec2_placement_region }}" + resource: "{{ ansible_ec2_instance_id }}" + +- name: Name instance from ID and ASG module + when: my_tags['tags']['aws:autoscaling:groupName'] is defined + ec2_tag: + state: present + region: "{{ ansible_ec2_placement_region }}" + resource: "{{ ansible_ec2_instance_id }}" + tags: + Name: "{{ my_tags['tags']['module']|default('(no module)') }}_{{ ansible_ec2_instance_id }}" + +- name: MOTD Branding + copy: + dest: /etc/update-motd.d/31-branding + src: 31-branding + mode: "0755" + owner: root + group: root + register: motd +- name: update MOTD + when: motd|changed + command: /usr/sbin/update-motd + +- name: profile stuff + copy: + dest: /etc/profile.d/awsible.sh + src: awsible.sh + mode: "0644" + owner: root + group: root + +- name: fancy prompt + copy: + dest: /home/ec2-user/.bash_prompt + src: bash_prompt.sh + mode: "0644" + owner: ec2-user + group: ec2-user + +- name: use fancy prompt + lineinfile: + dest: /home/ec2-user/.bash_profile + insertafter: EOF + line: "[[ -f ~/.bash_prompt ]] && . ~/.bash_prompt" + +- name: Create AWS config + file: + state: directory + path: /home/ec2-user/.aws + mode: "0775" + owner: ec2-user + group: ec2-user + +- name: awscli config + template: + dest: /home/ec2-user/.aws/config + src: aws.config.j2 + mode: "0664" + owner: ec2-user + group: ec2-user diff --git a/roles/common/tasks/volumes.yml b/roles/common/tasks/volumes.yml new file mode 100644 index 0000000..8d09506 --- /dev/null +++ b/roles/common/tasks/volumes.yml @@ -0,0 +1,27 @@ +--- +# set up a common /data directory for ephemeral or EBS volume +- stat: path=/media/data + register: dpstat +- set_fact: data_mount_path={{ dpstat.stat.path }} + when: dpstat.stat.exists and data_mount_path is undefined + +- stat: path=/media/ephemeral0 + register: dpstat +- set_fact: data_mount_path={{ dpstat.stat.path }} + when: dpstat.stat.exists and data_mount_path is undefined + +- stat: path=/media/ephemeral1 + register: dpstat +- set_fact: data_mount_path={{ dpstat.stat.path }} + when: dpstat.stat.exists and data_mount_path is undefined + +- stat: path=/mnt + register: dpstat +- set_fact: data_mount_path={{ dpstat.stat.path }} + when: dpstat.stat.exists and data_mount_path is undefined + +- name: Establish /data link to volume. + when: data_mount_path is defined + file: + state: link + path: /data diff --git a/roles/management/tasks/main.yml b/roles/management/tasks/main.yml new file mode 100644 index 0000000..0a99c75 --- /dev/null +++ b/roles/management/tasks/main.yml @@ -0,0 +1,29 @@ +--- +- name: Install build bits + with_items: + - git + - gcc + - libffi-devel + - openssl-devel + yum: + name: "{{ item }}" + state: present + +- name: Install PIP things + with_items: + - ansible + - boto3 + pip: + name: "{{ item }}" + state: present + +- name: Gather AWS info + action: ec2_facts + +- name: Install queue-watching cron + template: + src: sqs-poll.cron.j2 + dest: /etc/cron.d/sqs-poll.cron + mode: "0644" + owner: root + group: root diff --git a/roles/management/templates/sqs-poll.cron.j2 b/roles/management/templates/sqs-poll.cron.j2 new file mode 100644 index 0000000..98a4b1f --- /dev/null +++ b/roles/management/templates/sqs-poll.cron.j2 @@ -0,0 +1 @@ +* * * * * ec2-user {{ MANAGEMENT_DATA_ROOT }}/sqs-action.py {{ MANAGEMENT_DATA_ROOT }} {{ MANAGEMENT_EVENT_QUEUE }} {{ MANAGEMENT_NOTICE_ARN }} diff --git a/sqs-action.py b/sqs-action.py new file mode 100755 index 0000000..54f2eb7 --- /dev/null +++ b/sqs-action.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python +'''\ +Check an SQS queue for ASG lifecycle notifications of new instances, +and run the appropriate Ansible playbook against the host. +''' + +import argparse +import logging +import boto3 +import json +import sys +import os +import errno +from subprocess import Popen, PIPE +from tempfile import gettempdir +from hashlib import sha256 + + +ANSIBLE_PLAYBOOK_CMD = '/usr/local/bin/ansible-playbook' + + +def notify(subj, msg): + if topic: + u8msg = unicode(msg).encode('utf-8') + topic.publish(Subject=subj, Message=u8msg[:262144]) + else: + print(msg) + + +def handleEvent(message, event, ASGName, InstanceId): + notice = [' '.join([ASGName, InstanceId, event])] + if os.path.isfile(os.path.join(args.playbooks, ASGName + '.yml')): + message.change_visibility(VisibilityTimeout=(60 * 15)) # hope config doesn't take more than 15m + cmd = [ ANSIBLE_PLAYBOOK_CMD, '-i', 'inventory', '--limit', InstanceId, ASGName + '.yml'] + p = Popen(cmd, cwd=args.playbooks, stdout=PIPE, stderr=PIPE) + (stdoutdata, stderrdata) = p.communicate() + retval = p.returncode + message.change_visibility(VisibilityTimeout=60) + if retval: + notice += ['FAILURE CODE {}'.format(retval), stderrdata, stdoutdata] + else: + notice += ['SUCCESS'] + message.delete() + else: + notice += ['no action taken: no playbook for this ASG'] + notify(notice[0], '\n'.join(notice)) + + +def processMessage(message): + '''Unpack the data we want from an SQS message.''' + try: + data = json.loads(json.loads(message.body)['Message']) + event = data['Event'] + ASGName = data['AutoScalingGroupName'] + InstanceId = data['EC2InstanceId'] + except: + logging.debug('unparsable message %r', message.body) + message.delete() + else: + if event == 'autoscaling:EC2_INSTANCE_LAUNCH': + try: + instanceState = ec2r.Instance(InstanceId).state['Name'] + except: + logging.debug('instance %s does not exist', InstanceId) + message.delete() + else: + if instanceState == 'running': + handleEvent(message, event, ASGName, InstanceId) + else: + logging.debug('instance %s is in state %s, will try again', InstanceId, instanceState) + else: + logging.debug('nothing to do for event %r', data) + message.delete() + + +class PidFileSingleton: + '''Ensure that only one instance of this specific script runs at once.''' + def __init__(self): + self.pidfile = os.path.join(gettempdir(), sha256(os.path.abspath(sys.argv[0])).hexdigest() + '.pid') + try: + fd = os.open(self.pidfile, os.O_WRONLY|os.O_CREAT|os.O_EXCL, ) + except OSError as e: + self.pidfile = None + if e.errno == errno.EEXIST: + logging.debug('An instance of this is already running.') + sys.exit(0) + raise e + with os.fdopen(fd, 'w') as f: + f.write(str(os.getpid())) + def __del__(self): + if self.pidfile: + os.unlink(self.pidfile) + + +parser = argparse.ArgumentParser(description='act on SQS Notifications') +parser.add_argument('--profile', metavar='PROFILE', dest='profile_name', help='AWS Profile (default: current IAM Role)') +parser.add_argument('--region', metavar='REGION', dest='region_name', help='AWS Region') +parser.add_argument('playbooks', metavar='directory', help='path containing playbooks et al') +parser.add_argument('queue', help='SQS Queue') +parser.add_argument('arn', nargs='?', default=None, help='ARN of SNS topic') +args = parser.parse_args() + +pidfile = PidFileSingleton() + +session = boto3.session.Session(**{k:v for k,v in vars(args).items() if k in ('profile_name', 'region_name')}) +queue = session.resource('sqs').get_queue_by_name(QueueName=args.queue) +topic = session.resource('sns').Topic(args.arn) if args.arn else None +ec2r = session.resource('ec2') + +while True: + # long poll until there are no more messages + messages = queue.receive_messages(MaxNumberOfMessages=10, WaitTimeSeconds=20) + if not len(messages): + break + for message in messages: + processMessage(message)