Verified Commit 1816fc45 authored by Tomasz Maczukin's avatar Tomasz Maczukin Committed by Steve Azzopardi

Add backoff mechanism to google driver operations checks

Signed-off-by: Tomasz Maczukin's avatarTomasz Maczukin <[email protected]>
parent 5050ac2b
......@@ -9,6 +9,7 @@ import (
"strings"
"time"
"github.com/cenkalti/backoff"
"github.com/docker/machine/drivers/driverutil"
"github.com/docker/machine/libmachine/log"
raw "google.golang.org/api/compute/v1"
......@@ -39,6 +40,8 @@ type ComputeUtil struct {
SwarmMaster bool
SwarmHost string
openPorts []string
operationBackoffFactory *backoffFactory
}
const (
......@@ -61,23 +64,24 @@ func newComputeUtil(driver *Driver) (*ComputeUtil, error) {
}
return &ComputeUtil{
zone: driver.Zone,
instanceName: driver.MachineName,
userName: driver.SSHUser,
project: driver.Project,
diskTypeURL: driver.DiskType,
address: driver.Address,
network: driver.Network,
subnetwork: driver.Subnetwork,
preemptible: driver.Preemptible,
useInternalIP: driver.UseInternalIP,
useInternalIPOnly: driver.UseInternalIPOnly,
service: service,
zoneURL: apiURL + driver.Project + "/zones/" + driver.Zone,
globalURL: apiURL + driver.Project + "/global",
SwarmMaster: driver.SwarmMaster,
SwarmHost: driver.SwarmHost,
openPorts: driver.OpenPorts,
zone: driver.Zone,
instanceName: driver.MachineName,
userName: driver.SSHUser,
project: driver.Project,
diskTypeURL: driver.DiskType,
address: driver.Address,
network: driver.Network,
subnetwork: driver.Subnetwork,
preemptible: driver.Preemptible,
useInternalIP: driver.UseInternalIP,
useInternalIPOnly: driver.UseInternalIPOnly,
service: service,
zoneURL: apiURL + driver.Project + "/zones/" + driver.Zone,
globalURL: apiURL + driver.Project + "/global",
SwarmMaster: driver.SwarmMaster,
SwarmHost: driver.SwarmHost,
openPorts: driver.OpenPorts,
operationBackoffFactory: driver.OperationBackoffFactory,
}, nil
}
......@@ -435,6 +439,15 @@ func (c *ComputeUtil) startInstance() error {
// waitForOp waits for the operation to finish.
func (c *ComputeUtil) waitForOp(opGetter func() (*raw.Operation, error)) error {
var next time.Duration
if c.operationBackoffFactory == nil {
return errors.New("operationBackoffFactory is not defined")
}
b := c.operationBackoffFactory.create()
b.Reset()
for {
op, err := opGetter()
if err != nil {
......@@ -444,12 +457,18 @@ func (c *ComputeUtil) waitForOp(opGetter func() (*raw.Operation, error)) error {
log.Debugf("Operation %q status: %s", op.Name, op.Status)
if op.Status == "DONE" {
if op.Error != nil {
return fmt.Errorf("Operation error: %v", *op.Error.Errors[0])
return fmt.Errorf("operation error: %v", *op.Error.Errors[0])
}
break
}
time.Sleep(1 * time.Second)
if next = b.NextBackOff(); next == backoff.Stop {
return errors.New("maximum backoff elapsed time exceeded")
}
time.Sleep(next)
}
return nil
}
......
package google
import (
"errors"
"testing"
"time"
"github.com/stretchr/testify/assert"
raw "google.golang.org/api/compute/v1"
......@@ -70,3 +72,115 @@ func TestMissingOpenedPorts(t *testing.T) {
assert.Equal(t, test.expectedMissing, missingPorts, test.description)
}
}
type testOperationCaller struct {
operationDuration time.Duration
getError error
operationError *raw.OperationErrorErrors
calls int
startedAt time.Time
}
func (oc *testOperationCaller) Get() (*raw.Operation, error) {
oc.calls++
if oc.getError != nil {
return nil, oc.getError
}
op := &raw.Operation{
Name: "test operation",
}
if time.Since(oc.startedAt) >= oc.operationDuration {
op.Status = "DONE"
} else {
op.Status = "PENDING"
}
if oc.operationError != nil {
op.Error = &raw.OperationError{
Errors: []*raw.OperationErrorErrors{
oc.operationError,
},
}
}
return op, nil
}
func TestWaitForOpBackOff(t *testing.T) {
tests := map[string]struct {
backoffFactoryNotDefined bool
operationDuration time.Duration
maxOperationDuration time.Duration
getError error
operationError *raw.OperationErrorErrors
expectedError error
}{
"error on call": {
getError: errors.New("test error"),
expectedError: errors.New("test error"),
},
"operation too long": {
operationDuration: 5 * time.Second,
maxOperationDuration: 1 * time.Second,
expectedError: errors.New("maximum backoff elapsed time exceeded"),
},
"operation error": {
operationDuration: 1 * time.Second,
maxOperationDuration: 5 * time.Second,
operationError: &raw.OperationErrorErrors{
Code: "code",
Location: "location",
Message: "message",
},
expectedError: errors.New("operation error: {code location message [] []}"),
},
"backoff factory not defined": {
backoffFactoryNotDefined: true,
operationDuration: 5 * time.Second,
maxOperationDuration: 5 * time.Second,
expectedError: errors.New("operationBackoffFactory is not defined"),
},
"proper operation call": {
operationDuration: 5 * time.Second,
maxOperationDuration: 5 * time.Second,
},
}
for name, test := range tests {
t.Run(name, func(t *testing.T) {
toc := &testOperationCaller{
operationDuration: test.operationDuration,
getError: test.getError,
operationError: test.operationError,
startedAt: time.Now(),
}
cu := &ComputeUtil{}
if !test.backoffFactoryNotDefined {
cu.operationBackoffFactory = &backoffFactory{
InitialInterval: 125 * time.Millisecond,
RandomizationFactor: 0,
Multiplier: 2,
MaxInterval: 4 * time.Second,
MaxElapsedTime: test.maxOperationDuration,
}
}
err := cu.waitForOp(toc.Get)
if test.expectedError != nil {
assert.EqualError(t, err, test.expectedError.Error())
} else {
assert.NoError(t, err)
}
assert.True(t, toc.calls < 8, "Too many *OperationServices.Get() calls")
})
}
}
......@@ -4,8 +4,11 @@ import (
"errors"
"fmt"
"net"
"strconv"
"strings"
"time"
"github.com/cenkalti/backoff"
"github.com/docker/machine/libmachine/drivers"
"github.com/docker/machine/libmachine/log"
"github.com/docker/machine/libmachine/mcnflag"
......@@ -13,6 +16,25 @@ import (
"github.com/docker/machine/libmachine/state"
)
type backoffFactory struct {
InitialInterval time.Duration
RandomizationFactor float64
Multiplier float64
MaxInterval time.Duration
MaxElapsedTime time.Duration
}
func (bf *backoffFactory) create() *backoff.ExponentialBackOff {
b := backoff.NewExponentialBackOff()
b.InitialInterval = bf.InitialInterval
b.RandomizationFactor = bf.RandomizationFactor
b.Multiplier = bf.Multiplier
b.MaxInterval = bf.MaxInterval
b.MaxElapsedTime = bf.MaxElapsedTime
return b
}
// Driver is a struct compatible with the docker.hosts.drivers.Driver interface.
type Driver struct {
*drivers.BaseDriver
......@@ -33,6 +55,8 @@ type Driver struct {
Tags string
UseExisting bool
OpenPorts []string
OperationBackoffFactory *backoffFactory
}
const (
......@@ -46,6 +70,12 @@ const (
defaultDiskSize = 10
defaultNetwork = "default"
defaultSubnetwork = ""
defaultGoogleOperationBackoffInitialInterval = 1
defaultGoogleOperationBackoffRandomizationFactor = "0.5"
defaultGoogleOperationBackoffMultipler = "2"
defaultGoogleOperationBackoffMaxInterval = 30
defaultGoogleOperationBackoffMaxElapsedTime = 300
)
// GetCreateFlags registers the flags this driver adds to
......@@ -152,6 +182,31 @@ func (d *Driver) GetCreateFlags() []mcnflag.Flag {
Name: "google-open-port",
Usage: "Make the specified port number accessible from the Internet, e.g, 8080/tcp",
},
mcnflag.IntFlag{
Name: "google-operation-backoff-initial-interval",
Usage: "Initial interval for GCP Operation check exponential backoff",
Value: defaultGoogleOperationBackoffInitialInterval,
},
mcnflag.StringFlag{
Name: "google-operation-backoff-randomization-factor",
Usage: "Randomization factor for GCP Operation check exponential backoff",
Value: defaultGoogleOperationBackoffRandomizationFactor,
},
mcnflag.StringFlag{
Name: "google-operation-backoff-multipler",
Usage: "Multipler factor for GCP Operation check exponential backoff",
Value: defaultGoogleOperationBackoffMultipler,
},
mcnflag.IntFlag{
Name: "google-operation-backoff-max-interval",
Usage: "Maximum interval for GCP Operation check exponential backoff",
Value: defaultGoogleOperationBackoffMaxInterval,
},
mcnflag.IntFlag{
Name: "google-operation-backoff-max-elapsed-time",
Usage: "Maximum elapsed time for GCP Operation check exponential backoff",
Value: defaultGoogleOperationBackoffMaxElapsedTime,
},
}
}
......@@ -223,6 +278,24 @@ func (d *Driver) SetConfigFromFlags(flags drivers.DriverOptions) error {
d.SSHPort = 22
d.SetSwarmConfigFromFlags(flags)
backoffRandomizationFactor, err := strconv.ParseFloat(flags.String("google-operation-backoff-randomization-factor"), 64)
if err != nil {
return fmt.Errorf("error while parsing google-operation-backoff-randomization-factor value: %v", err)
}
backoffMultipler, err := strconv.ParseFloat(flags.String("google-operation-backoff-multipler"), 64)
if err != nil {
return fmt.Errorf("error while parsing google-operation-backoff-multipler value: %v", err)
}
d.OperationBackoffFactory = &backoffFactory{
InitialInterval: time.Duration(flags.Int("google-operation-backoff-initial-interval")) * time.Second,
RandomizationFactor: backoffRandomizationFactor,
Multiplier: backoffMultipler,
MaxInterval: time.Duration(flags.Int("google-operation-backoff-max-interval")) * time.Second,
MaxElapsedTime: time.Duration(flags.Int("google-operation-backoff-max-elapsed-time")) * time.Second,
}
return nil
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment