serverless.yml 14.7 KB
Newer Older
1
service: serverless-scientist
2 3 4 5

plugins:
  - serverless-offline-python
  - serverless-python-requirements
6
  - serverless-s3-sync
7 8
  - serverless-offline
  - serverless-apigw-binary
9
  - serverless-pseudo-parameters
10
  - serverless-vpc-plugin
11 12 13 14

custom:
  pythonRequirements:
    dockerizePip: non-linux
15 16 17 18
    slim: true
    # Hack to make sure that boto3 lib is included in the package
    noDeploy:
        - xx
19
  logRetentionInDays: 30
20
  gateway: "test"
21 22 23 24 25
  aws_account_id: "#{AWS::AccountId}"
  aws_region: "#{AWS::Region}"
  # Unfortunately it is impossible in the s3sync 1.8.0 plugin version to refer to
  # a resource or use the custom.experimentsBucket variable as bucketName if it
  # includes #{AWS::AccountId}
26
  experimentsBucket: scientist-experiments-${opt:experiment_bucketpostfix, 'default'}
27
  metricsProvisioningFiles: scientist-metricsinstance-provisioning-${opt:experiment_bucketpostfix, 'default'}
28
  s3Sync:
29 30
  - bucketName: ${self:custom.experimentsBucket}
    localDir: experiment_definitions
31 32
  - bucketName: ${self:custom.metricsProvisioningFiles}
    localDir: metrics_filesystem
33 34
  apigwBinary:  # Necessary to support binary media types (like images).
    types:
35
      - '*/*'  # Strangely, adding 'image/png' and other binary formats doesn't work.
36

37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
  vpcConfig:
    cidrBlock: '10.0.0.0/16'

    # if createNatGateway is a boolean "true", a NAT Gateway and EIP will be provisioned in each zone
    # if createNatGateway is a number, that number of NAT Gateways will be provisioned
    createNatGateway: true

    # When enabled, the DB subnet will only be accessible from the Application subnet
    # Both the Public and Application subnets will be accessible from 0.0.0.0/0
    createNetworkAcl: false

    # Whether to create the DB subnet
    createDbSubnet: false

    # Whether to enable VPC flow logging to an S3 bucket
    createFlowLogs: false

    # Whether to create a bastion host
    createBastionHost: false
    bastionHostKeyName: MyKey # required if creating a bastion host

    # Whether to create a NAT instance
    createNatInstance: false

    # Optionally specify AZs (defaults to auto-discover all availabile AZs)
    zones:
      - eu-west-1a
      - eu-west-1b
      - eu-west-1c

    # By default, S3 and DynamoDB endpoints will be available within the VPC
    # see https://docs.aws.amazon.com/vpc/latest/userguide/vpc-endpoints.html
    # for a list of available service endpoints to provision within the VPC
    # (varies per region)
    services:
      - kms
      - secretsmanager

    # Optionally specify subnet groups to create. If not provided, subnet groups
    # for RDS, Redshift, ElasticCache and DAX will be provisioned.
    subnetGroups:
      # - rds
79

80 81
provider:
  name: aws
82
  runtime: python3.7
83
  memorySize: 256  # Low memory, because of more I/O bound than CPU bound.
84
  region: eu-west-1
85
  profile: ${opt:profile, 'default'}
86
  stage: ${opt:stage, 'v1'}
87 88
  stackTags:
    STACK: "${self:service}"
89
    REGION: "${self:provider.region}"
90
  environment:
91
    RESULTS_TABLE: ${self:service}-results
Gero Vermaas's avatar
Gero Vermaas committed
92
    COUNTERS_TABLE: ${self:service}-counter
93
    CANDIDATE_COMPARE_TRIGGERED_TABLE: ${self:service}-candidatecomparetriggered
94
    EXPERIMENTS_REFRESH_MINUTES: 1
95 96 97 98 99 100

  iamRoleStatements:
  - Effect: "Allow"
    Sid: "InvokePermission"
    Action:
      - lambda:InvokeFunction
101
    Resource: "*"
102 103 104 105 106 107 108
  - Effect: "Allow"
    Action:
      - s3:GetObject
      - s3:ListBucket
    Resource:
      - "arn:aws:s3:::${self:custom.experimentsBucket}"
      - "arn:aws:s3:::${self:custom.experimentsBucket}/*"
109 110 111 112
  - Effect: "Allow"
    Action:
      - cloudwatch:PutMetricData
    Resource: '*'
113 114 115 116 117 118 119 120 121 122 123 124
  - Effect: Allow
    Action:
      - dynamodb:Query
      - dynamodb:Scan
      - dynamodb:GetItem
      - dynamodb:PutItem
      - dynamodb:UpdateItem
      - dynamodb:DeleteItem
    Resource: "arn:aws:dynamodb:${opt:region, self:provider.region}:*:table/${self:provider.environment.RESULTS_TABLE}"
  - Effect: Allow
    Action:
      - dynamodb:Query
125
    Resource: "arn:aws:dynamodb:${opt:region, self:provider.region}:*:table/${self:provider.environment.RESULTS_TABLE}/index/run_id_gsi"
Gero Vermaas's avatar
Gero Vermaas committed
126 127 128 129 130 131 132 133 134
  - Effect: Allow
    Action:
      - dynamodb:Query
      - dynamodb:Scan
      - dynamodb:GetItem
      - dynamodb:PutItem
      - dynamodb:UpdateItem
      - dynamodb:DeleteItem
    Resource: "arn:aws:dynamodb:${opt:region, self:provider.region}:*:table/${self:provider.environment.COUNTERS_TABLE}"
135 136 137 138 139 140 141 142 143
  - Effect: Allow
    Action:
      - dynamodb:Query
      - dynamodb:Scan
      - dynamodb:GetItem
      - dynamodb:PutItem
      - dynamodb:UpdateItem
      - dynamodb:DeleteItem
    Resource: "arn:aws:dynamodb:${opt:region, self:provider.region}:*:table/${self:provider.environment.CANDIDATE_COMPARE_TRIGGERED_TABLE}"
144 145 146 147
  vpc:
    # securityGroupIds and subnetIds will be populated by serverless-vpc-plugin plugin
    securityGroupIds:
    subnetIds:
148

149 150 151 152 153 154 155 156 157 158 159 160
package:
  exclude:
    - node_modules/** #Since none of the Lambda's uses Node.

layers:
  scientistRuntime:
    path: custom_runtime_layer
    name: ${self:provider.stage}-scientistRuntime
    description: Custom runtime for Scientst
    compatibleRuntimes:
      - python3.6

161
functions:
162
  experimentor:
163
    name: serverless-scientist-experimentor
164
    runtime: python3.7
165
    handler: experimentor.lambda_handler
166
    timeout: 30
167
    environment:
168
      METRICS_HOST: { "Fn::GetAtt": ["MetricsInstance", "PrivateDnsName" ] }
169 170
    events:
      - schedule:
171
          name: serverless-scientist-keep-experimentor-hot
172 173 174
          rate: rate(5 minutes)
          input:
            keephot: true
175
  scientist:
176
    name: serverless-scientist
177
    handler: scientist.lambda_handler
178 179 180 181
    runtime: provided
    layers:
      - arn:aws:lambda:eu-west-1:399891621064:layer:AWSLambda-Python36-SciPy1x:2
      - { Ref: ScientistRuntimeLambdaLayer }
182
    timeout: 30
183
    events:
184
      - schedule:
185
          name: serverless-scientist-keep-scientist-hot
186 187 188
          rate: rate(5 minutes)
          input:
            keephot: true
189 190 191
      - http:
          path: scientist
          method: any
192
          cors: true
193 194 195
      - http:
          path: scientist/{experiment}
          method: any
196
          cors: true
197 198
    environment:
      EXPERIMENTS_BUCKET: ${self:custom.experimentsBucket}
199
      EXPERIMENTOR_ARN: { "Fn::GetAtt": ["ExperimentorLambdaFunction", "Arn" ] }
200
      AWSREGION: ${self:custom.aws_region}
201 202 203 204 205 206 207 208 209 210 211
      METRICS_HOST: { "Fn::GetAtt": ["MetricsInstance", "PrivateDnsName" ] }
  showdiff:
    name: showdiff
    runtime: python3.7
    handler: showdiff.lambda_handler
    timeout: 30
    events:
      - http:
          path: showdiff
          method: get
          cors: true
212 213 214

resources:
  Resources:
215
    ExperimentsS3Bucket:
216 217 218 219
      Type: AWS::S3::Bucket
      Properties:
        BucketName: ${self:custom.experimentsBucket}
        AccessControl: Private
220 221 222 223 224
    MetricsProvisioningFilesS3Bucket:
      Type: AWS::S3::Bucket
      Properties:
        BucketName: ${self:custom.metricsProvisioningFiles}
        AccessControl: Private
225 226
    ResultsDynamoDbTable:
      Type: 'AWS::DynamoDB::Table'
227 228
      # Enable this once we really want to retain the data.
      # DeletionPolicy: Retain
229 230 231 232 233 234 235
      Properties:
        AttributeDefinitions:
          -
            AttributeName: id
            AttributeType: S
          -
            AttributeName: run_id
Gero Vermaas's avatar
Gero Vermaas committed
236
            AttributeType: N
237 238 239 240
        KeySchema:
          -
            AttributeName: id
            KeyType: HASH
241
        BillingMode: PAY_PER_REQUEST
242 243 244
        TableName: ${self:provider.environment.RESULTS_TABLE}
        GlobalSecondaryIndexes:
          -
245
            IndexName: run_id_gsi
246 247 248 249 250 251 252
            KeySchema:
              -
                AttributeName: run_id
                KeyType: HASH
            Projection:
              NonKeyAttributes:
                - id
253
                - experiment_name
254
                - run_type
255
                - implementation_name
256
                - arn
257 258
                - received_response
                - run_metrics
259
                - comparators
260
                - request_payload
261
              ProjectionType: INCLUDE
Gero Vermaas's avatar
Gero Vermaas committed
262 263 264 265 266 267 268 269 270 271 272 273 274
    CounterDbTable:
      Type: 'AWS::DynamoDB::Table'
      # Enable this once we really want to retain the data.
      # DeletionPolicy: Retain
      Properties:
        AttributeDefinitions:
          -
            AttributeName: counter_id
            AttributeType: S
        KeySchema:
          -
            AttributeName: counter_id
            KeyType: HASH
275
        BillingMode: PAY_PER_REQUEST
Gero Vermaas's avatar
Gero Vermaas committed
276
        TableName: ${self:provider.environment.COUNTERS_TABLE}
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
    CandidateCompareTriggeredDbTable:
      Type: 'AWS::DynamoDB::Table'
      # Enable this once we really want to retain the data.
      # DeletionPolicy: Retain
      Properties:
        AttributeDefinitions:
          -
            AttributeName: candidate_id
            AttributeType: S
        KeySchema:
          -
            AttributeName: candidate_id
            KeyType: HASH
        TimeToLiveSpecification:
          AttributeName: expire_at
          Enabled: true
293
        BillingMode: PAY_PER_REQUEST
294
        TableName: ${self:provider.environment.CANDIDATE_COMPARE_TRIGGERED_TABLE}
295 296 297 298 299 300 301 302 303
    MetricsInstance:
        Type: AWS::EC2::Instance
        Metadata:
          AWS::CloudFormation::Init:
            config:
              commands:
                01_start_docker_compose:
                  command:
                    !Sub |
304
                      aws s3 sync s3://scientist-metricsinstance-provisioning-${opt:experiment_bucketpostfix, 'default'} /home/ec2-user/
305
                      su -c 'cd /home/ec2-user; docker-compose up -d' - ec2-user
306 307
        Properties:
            ImageId: ami-07683a44e80cd32c5
308
            IamInstanceProfile: !Ref 'GetProvisioningFilesProfile'
309 310 311 312 313 314 315 316 317 318
            InstanceType: t2.small
            NetworkInterfaces:
              - AssociatePublicIpAddress: "true"
                DeviceIndex: "0"
                SubnetId: !Ref PublicSubnet1
                GroupSet:
                - !Ref SSHSecurityGroup
                - !Ref ElasticSearchecurityGroup
                - !Ref HTTPSecurityGroup
                - !Ref HTTPSSecurityGroup
319 320
                # Temp added for Instruct tutorial
                - !Ref GrafanaSecurityGroup
321 322 323 324 325 326 327 328 329 330 331 332
            UserData:
              Fn::Base64:
                !Sub |
                    #!/bin/bash -ex
                    sudo yum update -y
                    sudo yum install -y docker
                    sudo usermod -a -G docker ec2-user
                    sudo curl -L https://github.com/docker/compose/releases/download/1.24.0/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose
                    sudo chmod +x /usr/local/bin/docker-compose
                    sudo ln -s /usr/local/bin/docker-compose /usr/bin/docker-compose
                    sudo service docker start
                    sudo chkconfig docker on
333 334 335 336 337 338 339 340 341 342 343 344 345
                    for i in {1..40}; do
                      FILE_CNT=`sudo aws s3 ls s3://scientist-metricsinstance-provisioning-${opt:experiment_bucketpostfix, 'default'} | wc -l`
                      if [ $FILE_CNT -eq 0 ]; then
                        echo "Files not yet available in S3 bucket, will sleep and retry"
                        sleep 5
                      else
                        break
                      fi
                    done
                    sudo aws s3 sync s3://scientist-metricsinstance-provisioning-${opt:experiment_bucketpostfix, 'default'} /home/ec2-user/
                    export AWS_DEFAULT_REGION=eu-west-1
                    export SERVICE_ENDPOINT=`aws cloudformation describe-stacks --stack-name serverless-scientist-v1 --query "Stacks[0].Outputs[?OutputKey=='ServiceEndpoint'].OutputValue" --output text`
                    sudo sed -i 's|{{SERVICE_ENDPOINT}}|'$SERVICE_ENDPOINT'|g' "/home/ec2-user/grafana_provisioning/dashboard_definitions/experiments_dashboard.json"
346 347
                    cd /home/ec2-user
                    sudo su - ec2-user -c 'cd /home/ec2-user; docker-compose up -d'
348 349 350 351 352
            BlockDeviceMappings:
            -
              DeviceName: /dev/xvda
              Ebs:
                VolumeSize: 10
353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
    GetProvisioningFilesProfile:
      Type: AWS::IAM::InstanceProfile
      Properties:
        Path: /
        Roles:
          - !Ref 'GetProvisioningFilesRole'
    GetProvisioningFilesPolicy:
      Type: AWS::IAM::Policy
      Properties:
        PolicyName: GetProvisioningFilesPolicy
        PolicyDocument:
          Statement:
            - Effect: Allow
              Action:
                - s3:List*
              Resource: { Fn::GetAtt: [ 'MetricsProvisioningFilesS3Bucket', 'Arn'] }
            - Effect: Allow
              Action:
                - s3:GetObject
              Resource: '*'
373 374 375 376
            - Effect: Allow
              Action:
                - cloudformation:DescribeStacks
              Resource: '*'
377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
        Roles:
          - !Ref 'GetProvisioningFilesRole'
    GetProvisioningFilesRole:
      Type: AWS::IAM::Role
      Properties:
        AssumeRolePolicyDocument:
          Version: '2012-10-17'
          Statement:
            - Effect: Allow
              Principal:
                Service:
                  - ec2.amazonaws.com
              Action:
                - sts:AssumeRole
        Path: /
392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431
    SSHSecurityGroup:
      Type: AWS::EC2::SecurityGroup
      Properties:
        VpcId: !Ref VPC
        GroupDescription: Enable SSH access via port 22
        SecurityGroupIngress:
        - CidrIp: 0.0.0.0/0
          FromPort: 22
          IpProtocol: tcp
          ToPort: 22
    ElasticSearchecurityGroup:
      Type: AWS::EC2::SecurityGroup
      Properties:
        VpcId: !Ref VPC
        GroupDescription: ElasticSearch
        SecurityGroupIngress:
        - SourceSecurityGroupId: !Ref LambdaExecutionSecurityGroup
          IpProtocol: tcp
          FromPort: 9200
          ToPort: 9200
    HTTPSecurityGroup:
      Type: AWS::EC2::SecurityGroup
      Properties:
        VpcId: !Ref VPC
        GroupDescription: HTTP
        SecurityGroupIngress:
        - CidrIp: 0.0.0.0/0
          FromPort: 80
          IpProtocol: tcp
          ToPort: 80
    HTTPSSecurityGroup:
      Type: AWS::EC2::SecurityGroup
      Properties:
        VpcId: !Ref VPC
        GroupDescription: HTTPS
        SecurityGroupIngress:
        - CidrIp: 0.0.0.0/0
          FromPort: 443
          IpProtocol: tcp
          ToPort: 443
432 433 434 435
    # Temporarily added for Instruqt tutorial such that we do not have to generate
    # SSL keys for the EC2 instances etc.
    GrafanaSecurityGroup:
      Type: AWS::EC2::SecurityGroup
436
      Properties:
437 438 439 440 441 442 443
        VpcId: !Ref VPC
        GroupDescription: HTTPS
        SecurityGroupIngress:
        - CidrIp: 0.0.0.0/0
          FromPort: 3000
          IpProtocol: tcp
          ToPort: 3000
444

445 446 447 448 449 450 451
  Outputs:
    GrafanaHost:
        Description: The address where grafana is hosted
        Value:
          'Fn::GetAtt': [MetricsInstance, PublicDnsName]
        Export:
          Name: GrafanaHost