service: serverless-scientist plugins: - serverless-offline-python - serverless-python-requirements - serverless-s3-sync - serverless-offline - serverless-apigw-binary - serverless-pseudo-parameters - serverless-vpc-plugin custom: pythonRequirements: dockerizePip: non-linux slim: true # Hack to make sure that boto3 lib is included in the package noDeploy: - xx logRetentionInDays: 30 gateway: "test" aws_account_id: "#{AWS::AccountId}" aws_region: "#{AWS::Region}" # Unfortunately it is impossible in the s3sync 1.8.0 plugin version to refer to # a resource or use the custom.experimentsBucket variable as bucketName if it # includes #{AWS::AccountId} experimentsBucket: scientist-experiments-${opt:experiment_bucketpostfix, 'default'} metricsProvisioningFiles: scientist-metricsinstance-provisioning-${opt:experiment_bucketpostfix, 'default'} s3Sync: - bucketName: ${self:custom.experimentsBucket} localDir: experiment_definitions - bucketName: ${self:custom.metricsProvisioningFiles} localDir: metrics_filesystem apigwBinary: # Necessary to support binary media types (like images). types: - '*/*' # Strangely, adding 'image/png' and other binary formats doesn't work. vpcConfig: cidrBlock: '10.0.0.0/16' # if createNatGateway is a boolean "true", a NAT Gateway and EIP will be provisioned in each zone # if createNatGateway is a number, that number of NAT Gateways will be provisioned createNatGateway: true # When enabled, the DB subnet will only be accessible from the Application subnet # Both the Public and Application subnets will be accessible from 0.0.0.0/0 createNetworkAcl: false # Whether to create the DB subnet createDbSubnet: false # Whether to enable VPC flow logging to an S3 bucket createFlowLogs: false # Whether to create a bastion host createBastionHost: false bastionHostKeyName: MyKey # required if creating a bastion host # Whether to create a NAT instance createNatInstance: false # Optionally specify AZs (defaults to auto-discover all availabile AZs) zones: - eu-west-1a - eu-west-1b - eu-west-1c # By default, S3 and DynamoDB endpoints will be available within the VPC # see https://docs.aws.amazon.com/vpc/latest/userguide/vpc-endpoints.html # for a list of available service endpoints to provision within the VPC # (varies per region) services: - kms - secretsmanager # Optionally specify subnet groups to create. If not provided, subnet groups # for RDS, Redshift, ElasticCache and DAX will be provisioned. subnetGroups: # - rds provider: name: aws runtime: python3.7 memorySize: 256 # Low memory, because of more I/O bound than CPU bound. region: eu-west-1 profile: ${opt:profile, 'default'} stage: ${opt:stage, 'v1'} stackTags: STACK: "${self:service}" REGION: "${self:provider.region}" environment: RESULTS_TABLE: ${self:service}-results COUNTERS_TABLE: ${self:service}-counter CANDIDATE_COMPARE_TRIGGERED_TABLE: ${self:service}-candidatecomparetriggered EXPERIMENTS_REFRESH_MINUTES: 1 iamRoleStatements: - Effect: "Allow" Sid: "InvokePermission" Action: - lambda:InvokeFunction Resource: "*" - Effect: "Allow" Action: - s3:GetObject - s3:ListBucket Resource: - "arn:aws:s3:::${self:custom.experimentsBucket}" - "arn:aws:s3:::${self:custom.experimentsBucket}/*" - Effect: "Allow" Action: - cloudwatch:PutMetricData Resource: '*' - Effect: Allow Action: - dynamodb:Query - dynamodb:Scan - dynamodb:GetItem - dynamodb:PutItem - dynamodb:UpdateItem - dynamodb:DeleteItem Resource: "arn:aws:dynamodb:${opt:region, self:provider.region}:*:table/${self:provider.environment.RESULTS_TABLE}" - Effect: Allow Action: - dynamodb:Query Resource: "arn:aws:dynamodb:${opt:region, self:provider.region}:*:table/${self:provider.environment.RESULTS_TABLE}/index/run_id_gsi" - Effect: Allow Action: - dynamodb:Query - dynamodb:Scan - dynamodb:GetItem - dynamodb:PutItem - dynamodb:UpdateItem - dynamodb:DeleteItem Resource: "arn:aws:dynamodb:${opt:region, self:provider.region}:*:table/${self:provider.environment.COUNTERS_TABLE}" - Effect: Allow Action: - dynamodb:Query - dynamodb:Scan - dynamodb:GetItem - dynamodb:PutItem - dynamodb:UpdateItem - dynamodb:DeleteItem Resource: "arn:aws:dynamodb:${opt:region, self:provider.region}:*:table/${self:provider.environment.CANDIDATE_COMPARE_TRIGGERED_TABLE}" vpc: # securityGroupIds and subnetIds will be populated by serverless-vpc-plugin plugin securityGroupIds: subnetIds: package: exclude: - node_modules/** #Since none of the Lambda's uses Node. layers: scientistRuntime: path: custom_runtime_layer name: ${self:provider.stage}-scientistRuntime description: Custom runtime for Scientst compatibleRuntimes: - python3.6 functions: experimentor: name: serverless-scientist-experimentor runtime: python3.7 handler: experimentor.lambda_handler timeout: 30 environment: METRICS_HOST: { "Fn::GetAtt": ["MetricsInstance", "PrivateDnsName" ] } events: - schedule: name: serverless-scientist-keep-experimentor-hot rate: rate(5 minutes) input: keephot: true scientist: name: serverless-scientist handler: scientist.lambda_handler runtime: provided layers: - arn:aws:lambda:eu-west-1:399891621064:layer:AWSLambda-Python36-SciPy1x:2 - { Ref: ScientistRuntimeLambdaLayer } timeout: 30 events: - schedule: name: serverless-scientist-keep-scientist-hot rate: rate(5 minutes) input: keephot: true - http: path: scientist method: any cors: true - http: path: scientist/{experiment} method: any cors: true environment: EXPERIMENTS_BUCKET: ${self:custom.experimentsBucket} EXPERIMENTOR_ARN: { "Fn::GetAtt": ["ExperimentorLambdaFunction", "Arn" ] } AWSREGION: ${self:custom.aws_region} METRICS_HOST: { "Fn::GetAtt": ["MetricsInstance", "PrivateDnsName" ] } showdiff: name: showdiff runtime: python3.7 handler: showdiff.lambda_handler timeout: 30 events: - http: path: showdiff method: get cors: true resources: Resources: ExperimentsS3Bucket: Type: AWS::S3::Bucket Properties: BucketName: ${self:custom.experimentsBucket} AccessControl: Private MetricsProvisioningFilesS3Bucket: Type: AWS::S3::Bucket Properties: BucketName: ${self:custom.metricsProvisioningFiles} AccessControl: Private ResultsDynamoDbTable: Type: 'AWS::DynamoDB::Table' # Enable this once we really want to retain the data. # DeletionPolicy: Retain Properties: AttributeDefinitions: - AttributeName: id AttributeType: S - AttributeName: run_id AttributeType: N KeySchema: - AttributeName: id KeyType: HASH BillingMode: PAY_PER_REQUEST TableName: ${self:provider.environment.RESULTS_TABLE} GlobalSecondaryIndexes: - IndexName: run_id_gsi KeySchema: - AttributeName: run_id KeyType: HASH Projection: NonKeyAttributes: - id - experiment_name - run_type - implementation_name - arn - received_response - run_metrics - comparators - request_payload ProjectionType: INCLUDE CounterDbTable: Type: 'AWS::DynamoDB::Table' # Enable this once we really want to retain the data. # DeletionPolicy: Retain Properties: AttributeDefinitions: - AttributeName: counter_id AttributeType: S KeySchema: - AttributeName: counter_id KeyType: HASH BillingMode: PAY_PER_REQUEST TableName: ${self:provider.environment.COUNTERS_TABLE} CandidateCompareTriggeredDbTable: Type: 'AWS::DynamoDB::Table' # Enable this once we really want to retain the data. # DeletionPolicy: Retain Properties: AttributeDefinitions: - AttributeName: candidate_id AttributeType: S KeySchema: - AttributeName: candidate_id KeyType: HASH TimeToLiveSpecification: AttributeName: expire_at Enabled: true BillingMode: PAY_PER_REQUEST TableName: ${self:provider.environment.CANDIDATE_COMPARE_TRIGGERED_TABLE} MetricsInstance: Type: AWS::EC2::Instance Metadata: AWS::CloudFormation::Init: config: commands: 01_start_docker_compose: command: !Sub | aws s3 sync s3://scientist-metricsinstance-provisioning-${opt:experiment_bucketpostfix, 'default'} /home/ec2-user/ su -c 'cd /home/ec2-user; docker-compose up -d' - ec2-user Properties: ImageId: ami-07683a44e80cd32c5 IamInstanceProfile: !Ref 'GetProvisioningFilesProfile' InstanceType: t2.small NetworkInterfaces: - AssociatePublicIpAddress: "true" DeviceIndex: "0" SubnetId: !Ref PublicSubnet1 GroupSet: - !Ref SSHSecurityGroup - !Ref ElasticSearchecurityGroup - !Ref HTTPSecurityGroup - !Ref HTTPSSecurityGroup # Temp added for Instruct tutorial - !Ref GrafanaSecurityGroup UserData: Fn::Base64: !Sub | #!/bin/bash -ex sudo yum update -y sudo yum install -y docker sudo usermod -a -G docker ec2-user sudo curl -L https://github.com/docker/compose/releases/download/1.24.0/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose sudo chmod +x /usr/local/bin/docker-compose sudo ln -s /usr/local/bin/docker-compose /usr/bin/docker-compose sudo service docker start sudo chkconfig docker on for i in {1..40}; do FILE_CNT=`sudo aws s3 ls s3://scientist-metricsinstance-provisioning-${opt:experiment_bucketpostfix, 'default'} | wc -l` if [ $FILE_CNT -eq 0 ]; then echo "Files not yet available in S3 bucket, will sleep and retry" sleep 5 else break fi done sudo aws s3 sync s3://scientist-metricsinstance-provisioning-${opt:experiment_bucketpostfix, 'default'} /home/ec2-user/ export AWS_DEFAULT_REGION=eu-west-1 export SERVICE_ENDPOINT=`aws cloudformation describe-stacks --stack-name serverless-scientist-v1 --query "Stacks[0].Outputs[?OutputKey=='ServiceEndpoint'].OutputValue" --output text` sudo sed -i 's|{{SERVICE_ENDPOINT}}|'$SERVICE_ENDPOINT'|g' "/home/ec2-user/grafana_provisioning/dashboard_definitions/experiments_dashboard.json" cd /home/ec2-user sudo su - ec2-user -c 'cd /home/ec2-user; docker-compose up -d' BlockDeviceMappings: - DeviceName: /dev/xvda Ebs: VolumeSize: 10 GetProvisioningFilesProfile: Type: AWS::IAM::InstanceProfile Properties: Path: / Roles: - !Ref 'GetProvisioningFilesRole' GetProvisioningFilesPolicy: Type: AWS::IAM::Policy Properties: PolicyName: GetProvisioningFilesPolicy PolicyDocument: Statement: - Effect: Allow Action: - s3:List* Resource: { Fn::GetAtt: [ 'MetricsProvisioningFilesS3Bucket', 'Arn'] } - Effect: Allow Action: - s3:GetObject Resource: '*' - Effect: Allow Action: - cloudformation:DescribeStacks Resource: '*' Roles: - !Ref 'GetProvisioningFilesRole' GetProvisioningFilesRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: - ec2.amazonaws.com Action: - sts:AssumeRole Path: / SSHSecurityGroup: Type: AWS::EC2::SecurityGroup Properties: VpcId: !Ref VPC GroupDescription: Enable SSH access via port 22 SecurityGroupIngress: - CidrIp: 0.0.0.0/0 FromPort: 22 IpProtocol: tcp ToPort: 22 ElasticSearchecurityGroup: Type: AWS::EC2::SecurityGroup Properties: VpcId: !Ref VPC GroupDescription: ElasticSearch SecurityGroupIngress: - SourceSecurityGroupId: !Ref LambdaExecutionSecurityGroup IpProtocol: tcp FromPort: 9200 ToPort: 9200 HTTPSecurityGroup: Type: AWS::EC2::SecurityGroup Properties: VpcId: !Ref VPC GroupDescription: HTTP SecurityGroupIngress: - CidrIp: 0.0.0.0/0 FromPort: 80 IpProtocol: tcp ToPort: 80 HTTPSSecurityGroup: Type: AWS::EC2::SecurityGroup Properties: VpcId: !Ref VPC GroupDescription: HTTPS SecurityGroupIngress: - CidrIp: 0.0.0.0/0 FromPort: 443 IpProtocol: tcp ToPort: 443 # Temporarily added for Instruqt tutorial such that we do not have to generate # SSL keys for the EC2 instances etc. GrafanaSecurityGroup: Type: AWS::EC2::SecurityGroup Properties: VpcId: !Ref VPC GroupDescription: HTTPS SecurityGroupIngress: - CidrIp: 0.0.0.0/0 FromPort: 3000 IpProtocol: tcp ToPort: 3000 Outputs: GrafanaHost: Description: The address where grafana is hosted Value: 'Fn::GetAtt': [MetricsInstance, PublicDnsName] Export: Name: GrafanaHost