updates to sqs-action
authorJustin Wind <j.wind@partner.samsung.com>
Wed, 20 Sep 2017 19:16:04 +0000 (12:16 -0700)
committerJustin Wind <j.wind@partner.samsung.com>
Wed, 20 Sep 2017 19:16:04 +0000 (12:16 -0700)
sqs-action.py

index e2b8c6431eb7a574e79bb342ef04d17cb70aa7d9..3a5d8bed4d5d2476084fa129c04e74eee2f5410f 100755 (executable)
@@ -7,9 +7,11 @@ and run the appropriate Ansible playbook against the host.
 import argparse
 import logging
 import boto3
+import botocore.exceptions
 import json
 import sys
 import os
+import time
 import errno
 from subprocess import Popen, PIPE
 from tempfile import gettempdir
@@ -29,9 +31,10 @@ def notify(subj, msg):
 
 def handleEvent(message, event, ASGName, InstanceId):
     notice = [' '.join([ASGName, InstanceId, event])]
+    postnotice = []
     if os.path.isfile(os.path.join(args.playbooks, ASGName + '.yml')):
         message.change_visibility(VisibilityTimeout=(60 * 15)) # hope config doesn't take more than 15m
-        cmd = [ ANSIBLE_PLAYBOOK_CMD, '-i', 'inventory', '--limit', InstanceId, ASGName + '.yml']
+        cmd = [ANSIBLE_PLAYBOOK_CMD, '--limit', InstanceId, ASGName + '.yml']
         p = Popen(cmd, cwd=args.playbooks, stdout=PIPE, stderr=PIPE)
         (stdoutdata, stderrdata) = p.communicate()
         retval = p.returncode
@@ -41,10 +44,23 @@ def handleEvent(message, event, ASGName, InstanceId):
         else:
             notice += ['SUCCESS']
             message.delete()
+            if os.path.isfile(os.path.join(args.playbooks, ASGName + '-post.yml')):
+                postnotice = [' '.join(ASGName, 'post', event)]
+                cmd = [ANSIBLE_PLAYBOOK_CMD, ASGName + '-post.yml']
+                p = Popen(cmd, cwd=args.playbooks, stdout=PIPE, stderr=PIPE)
+                (stdoutdata, stderrdata) = p.communicate()
+                retval = p.returncode
+                if retval:
+                    postnotice += ['FAILURE CODE {}'.format(retval), stderrdata, stdoutdata]
+                else:
+                    postnotice += ['SUCCESS']
+
     else:
         notice += ['no action taken: no playbook for this ASG']
         message.delete()
     notify(notice[0], '\n'.join(notice))
+    if len(postnotice):
+        notify(postnotice[0], '\n'.join(postnotice))
 
 
 def processMessage(message):
@@ -104,10 +120,23 @@ args = parser.parse_args()
 
 pidfile = PidFileSingleton()
 
-session = boto3.session.Session(**{k:v for k,v in vars(args).items() if k in ('profile_name', 'region_name')})
-queue = session.resource('sqs').get_queue_by_name(QueueName=args.queue)
-topic = session.resource('sns').Topic(args.arn) if args.arn else None
-ec2r = session.resource('ec2')
+# occasionally, small instances seem to briefly lose their iam credentials
+sessionTriesRemaining = 3
+while sessionTriesRemaining:
+    try:
+        session = boto3.session.Session(**{k:v for k,v in vars(args).items() if k in ('profile_name', 'region_name')})
+        queue = session.resource('sqs').get_queue_by_name(QueueName=args.queue)
+        topic = session.resource('sns').Topic(args.arn) if args.arn else None
+        ec2r = session.resource('ec2')
+    except botocore.exceptions.NoCredentialsError as e:
+        logging.debug('Trouble with credentials, will retry %s more times.', sessionTriesRemaining)
+        sessionTriesRemaining -= 1
+        time.sleep(5)
+        continue
+    break
+if sessionTriesRemaining == 0:
+    logging.error('Failed trying to use IAM credentials.')
+    sys.exit(1)
 
 while True:
     # long poll until there are no more messages