Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • edbaunton/buildgrid
  • BuildGrid/buildgrid
  • bloomberg/buildgrid
  • devcurmudgeon/buildgrid
  • mhadjimichael/buildgrid
  • jmacarthur/buildgrid
  • rkothur/buildgrid
  • valentindavid/buildgrid
  • jjardon/buildgrid
  • RichKen/buildgrid
  • jbonney/buildgrid
  • onsha_alexander/buildgrid
  • santigl/buildgrid
  • mostynb/buildgrid
  • hoffbrinkle/buildgrid
  • Malinskiy/buildgrid
  • coldtom/buildgrid
  • azeemb_a/buildgrid
  • pointswaves/buildgrid
  • BenjaminSchubert/buildgrid
  • michaellee8/buildgrid
  • anil-anil/buildgrid
  • seanborg/buildgrid
  • jdelong12/buildgrid
  • jclay/buildgrid
  • bweston92/buildgrid
  • zchen723/buildgrid
  • cpratt34/buildgrid
  • armbiant/apache-buildgrid
  • armbiant/android-buildgrid
  • itsme300/buildgrid
  • sbairoliya/buildgrid
32 results
Show changes
Commits on Source (7)
......@@ -184,7 +184,8 @@ ignore-on-opaque-inference=yes
# List of class names for which member attributes should not be checked (useful
# for classes with dynamically set attributes). This supports the use of
# qualified names.
ignored-classes=google.protobuf.any_pb2.Any
ignored-classes=google.protobuf.any_pb2.Any,
google.protobuf.timestamp_pb2.Timestamp
# List of module names for which member attributes should not be checked
# (useful for modules/projects where namespaces are manipulated during runtime
......
......@@ -17,16 +17,32 @@ import random
import time
from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
from buildgrid.utils import get_hostname
def work_dummy(context, lease):
""" Just returns lease after some random time
"""
action_result = remote_execution_pb2.ActionResult()
lease.result.Clear()
time.sleep(random.randint(1, 5))
action_result.execution_metadata.worker = get_hostname()
action_result = remote_execution_pb2.ActionResult()
# Simulation input-downloading phase:
action_result.execution_metadata.input_fetch_start_timestamp.GetCurrentTime()
time.sleep(random.random())
action_result.execution_metadata.input_fetch_completed_timestamp.GetCurrentTime()
# Simulation execution phase:
action_result.execution_metadata.execution_start_timestamp.GetCurrentTime()
time.sleep(random.random())
action_result.execution_metadata.execution_completed_timestamp.GetCurrentTime()
# Simulation output-uploading phase:
action_result.execution_metadata.output_upload_start_timestamp.GetCurrentTime()
time.sleep(random.random())
action_result.execution_metadata.output_upload_completed_timestamp.GetCurrentTime()
lease.result.Pack(action_result)
......
......@@ -19,7 +19,7 @@ import tempfile
from buildgrid.client.cas import download, upload
from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
from buildgrid.utils import output_file_maker, output_directory_maker
from buildgrid.utils import get_hostname, output_file_maker, output_directory_maker
def work_host_tools(context, lease):
......@@ -29,10 +29,13 @@ def work_host_tools(context, lease):
logger = context.logger
action_digest = remote_execution_pb2.Digest()
action_result = remote_execution_pb2.ActionResult()
lease.payload.Unpack(action_digest)
lease.result.Clear()
action_result.execution_metadata.worker = get_hostname()
with tempfile.TemporaryDirectory() as temp_directory:
with download(context.cas_channel, instance=instance_name) as downloader:
action = downloader.get_message(action_digest,
......@@ -43,8 +46,12 @@ def work_host_tools(context, lease):
command = downloader.get_message(action.command_digest,
remote_execution_pb2.Command())
action_result.execution_metadata.input_fetch_start_timestamp.GetCurrentTime()
downloader.download_directory(action.input_root_digest, temp_directory)
action_result.execution_metadata.input_fetch_completed_timestamp.GetCurrentTime()
environment = os.environ.copy()
for variable in command.environment_variables:
if variable.name not in ['PATH', 'PWD']:
......@@ -70,6 +77,8 @@ def work_host_tools(context, lease):
logger.debug(' '.join(command_line))
action_result.execution_metadata.execution_start_timestamp.GetCurrentTime()
process = subprocess.Popen(command_line,
cwd=working_directory,
env=environment,
......@@ -80,7 +89,8 @@ def work_host_tools(context, lease):
stdout, stderr = process.communicate()
returncode = process.returncode
action_result = remote_execution_pb2.ActionResult()
action_result.execution_metadata.execution_completed_timestamp.GetCurrentTime()
# TODO: Upload to CAS or output RAW
# For now, just pass raw
# https://gitlab.com/BuildGrid/buildgrid/issues/90
......@@ -92,6 +102,8 @@ def work_host_tools(context, lease):
logger.debug("Command stdout: [{}]".format(stdout))
logger.debug("Command exit code: [{}]".format(returncode))
action_result.execution_metadata.output_upload_start_timestamp.GetCurrentTime()
with upload(context.cas_channel, instance=instance_name) as uploader:
output_files, output_directories = [], []
......@@ -121,6 +133,8 @@ def work_host_tools(context, lease):
action_result.output_directories.extend(output_directories)
action_result.execution_metadata.output_upload_completed_timestamp.GetCurrentTime()
lease.result.Pack(action_result)
return lease
......@@ -20,15 +20,21 @@ Operations command
Check the status of operations
"""
from collections import OrderedDict
import logging
from operator import attrgetter
from urllib.parse import urlparse
import sys
from textwrap import indent
import click
from google.protobuf import json_format
import grpc
from buildgrid._enums import OperationStage
from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2, remote_execution_pb2_grpc
from buildgrid._protos.google.longrunning import operations_pb2, operations_pb2_grpc
from buildgrid._protos.google.rpc import code_pb2
from ..cli import pass_context
......@@ -65,45 +71,145 @@ def cli(context, remote, instance_name, client_key, client_cert, server_cert):
context.logger.debug("Starting for remote {}".format(context.remote))
def _print_operation_status(operation, print_details=False):
metadata = remote_execution_pb2.ExecuteOperationMetadata()
# The metadata is expected to be an ExecuteOperationMetadata message:
assert operation.metadata.Is(metadata.DESCRIPTOR)
operation.metadata.Unpack(metadata)
stage = OperationStage(metadata.stage)
if not operation.done:
if stage == OperationStage.CACHE_CHECK:
click.echo('CacheCheck: {}: Querying action-cache (stage={})'
.format(operation.name, metadata.stage))
elif stage == OperationStage.QUEUED:
click.echo('Queued: {}: Waiting for execution (stage={})'
.format(operation.name, metadata.stage))
elif stage == OperationStage.EXECUTING:
click.echo('Executing: {}: Currently running (stage={})'
.format(operation.name, metadata.stage))
else:
click.echo('Error: {}: In an invalid state (stage={})'
.format(operation.name, metadata.stage), err=True)
return
assert stage == OperationStage.COMPLETED
response = remote_execution_pb2.ExecuteResponse()
# The response is expected to be an ExecutionResponse message:
assert operation.response.Is(response.DESCRIPTOR)
operation.response.Unpack(response)
if response.status.code != code_pb2.OK:
click.echo('Failure: {}: {} (code={})'
.format(operation.name, response.status.message, response.status.code))
else:
if response.result.exit_code != 0:
click.echo('Success: {}: Completed with failure (stage={}, exit_code={})'
.format(operation.name, metadata.stage, response.result.exit_code))
else:
click.echo('Success: {}: Completed succesfully (stage={}, exit_code={})'
.format(operation.name, metadata.stage, response.result.exit_code))
if print_details:
metadata = response.result.execution_metadata
click.echo(indent('worker={}'.format(metadata.worker), ' '))
queued = metadata.queued_timestamp.ToDatetime()
click.echo(indent('queued_at={}'.format(queued), ' '))
worker_start = metadata.worker_start_timestamp.ToDatetime()
worker_completed = metadata.worker_completed_timestamp.ToDatetime()
click.echo(indent('work_duration={}'.format(worker_completed - worker_start), ' '))
fetch_start = metadata.input_fetch_start_timestamp.ToDatetime()
fetch_completed = metadata.input_fetch_completed_timestamp.ToDatetime()
click.echo(indent('fetch_duration={}'.format(fetch_completed - fetch_start), ' '))
execution_start = metadata.execution_start_timestamp.ToDatetime()
execution_completed = metadata.execution_completed_timestamp.ToDatetime()
click.echo(indent('exection_duration={}'.format(execution_completed - execution_start), ' '))
upload_start = metadata.output_upload_start_timestamp.ToDatetime()
upload_completed = metadata.output_upload_completed_timestamp.ToDatetime()
click.echo(indent('upload_duration={}'.format(upload_completed - upload_start), ' '))
click.echo(indent('total_duration={}'.format(worker_completed - queued), ' '))
@cli.command('status', short_help="Get the status of an operation.")
@click.argument('operation-name', nargs=1, type=click.STRING, required=True)
@click.option('--json', is_flag=True, show_default=True,
help="Print operations status in JSON format.")
@pass_context
def status(context, operation_name):
context.logger.info("Getting operation status...")
def status(context, operation_name, json):
stub = operations_pb2_grpc.OperationsStub(context.channel)
request = operations_pb2.GetOperationRequest(name=operation_name)
response = stub.GetOperation(request)
context.logger.info(response)
operation = stub.GetOperation(request)
if not json:
_print_operation_status(operation, print_details=True)
else:
click.echo(json_format.MessageToJson(operation))
@cli.command('list', short_help="List operations.")
@click.option('--json', is_flag=True, show_default=True,
help="Print operations list in JSON format.")
@pass_context
def lists(context):
context.logger.info("Getting list of operations")
def lists(context, json):
stub = operations_pb2_grpc.OperationsStub(context.channel)
request = operations_pb2.ListOperationsRequest(name=context.instance_name)
response = stub.ListOperations(request)
if not response.operations:
context.logger.warning("No operations to list")
click.echo('Error: No operations to list.', err=True)
return
for op in response.operations:
context.logger.info(op)
operations_map = OrderedDict([
(OperationStage.CACHE_CHECK, []),
(OperationStage.QUEUED, []),
(OperationStage.EXECUTING, []),
(OperationStage.COMPLETED, [])
])
for operation in response.operations:
metadata = remote_execution_pb2.ExecuteOperationMetadata()
# The metadata is expected to be an ExecuteOperationMetadata message:
assert operation.metadata.Is(metadata.DESCRIPTOR)
operation.metadata.Unpack(metadata)
stage = OperationStage(metadata.stage)
operations_map[stage].append(operation)
for operations in operations_map.values():
operations.sort(key=attrgetter('name'))
for operation in operations:
if not json:
_print_operation_status(operation)
else:
click.echo(json_format.MessageToJson(operation))
@cli.command('wait', short_help="Streams an operation until it is complete.")
@click.argument('operation-name', nargs=1, type=click.STRING, required=True)
@click.option('--json', is_flag=True, show_default=True,
help="Print operations statuses in JSON format.")
@pass_context
def wait(context, operation_name):
def wait(context, operation_name, json):
stub = remote_execution_pb2_grpc.ExecutionStub(context.channel)
request = remote_execution_pb2.WaitExecutionRequest(name=operation_name)
response = stub.WaitExecution(request)
operation_iterator = stub.WaitExecution(request)
for stream in response:
context.logger.info(stream)
for operation in operation_iterator:
if not json and operation.done:
_print_operation_status(operation, print_details=True)
elif not json:
_print_operation_status(operation)
else:
click.echo(json_format.MessageToJson(operation))
# Copyright (C) 2018 Bloomberg LP
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# <http://www.apache.org/licenses/LICENSE-2.0>
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from enum import Enum
from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
class BotStatus(Enum):
# Initially unknown state.
BOT_STATUS_UNSPECIFIED = bots_pb2.BotStatus.Value('BOT_STATUS_UNSPECIFIED')
# The bot is healthy, and will accept leases as normal.
OK = bots_pb2.BotStatus.Value('OK')
# The bot is unhealthy and will not accept new leases.
UNHEALTHY = bots_pb2.BotStatus.Value('UNHEALTHY')
# The bot has been asked to reboot the host.
HOST_REBOOTING = bots_pb2.BotStatus.Value('HOST_REBOOTING')
# The bot has been asked to shut down.
BOT_TERMINATING = bots_pb2.BotStatus.Value('BOT_TERMINATING')
class LeaseState(Enum):
# Initially unknown state.
LEASE_STATE_UNSPECIFIED = bots_pb2.LeaseState.Value('LEASE_STATE_UNSPECIFIED')
# The server expects the bot to accept this lease.
PENDING = bots_pb2.LeaseState.Value('PENDING')
# The bot has accepted this lease.
ACTIVE = bots_pb2.LeaseState.Value('ACTIVE')
# The bot is no longer leased.
COMPLETED = bots_pb2.LeaseState.Value('COMPLETED')
# The bot should immediately release all resources associated with the lease.
CANCELLED = bots_pb2.LeaseState.Value('CANCELLED')
class OperationStage(Enum):
# Initially unknown stage.
UNKNOWN = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('UNKNOWN')
# Checking the result against the cache.
CACHE_CHECK = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('CACHE_CHECK')
# Currently idle, awaiting a free machine to execute.
QUEUED = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('QUEUED')
# Currently being executed by a worker.
EXECUTING = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('EXECUTING')
# Finished execution.
COMPLETED = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('COMPLETED')
......@@ -26,49 +26,15 @@ import asyncio
import logging
import platform
import uuid
from enum import Enum
import grpc
from buildgrid._enums import BotStatus, LeaseState
from buildgrid._protos.google.rpc import code_pb2
from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2, worker_pb2
from buildgrid._exceptions import BotError
class BotStatus(Enum):
# Default value.
BOT_STATUS_UNSPECIFIED = bots_pb2.BotStatus.Value('BOT_STATUS_UNSPECIFIED')
# The bot is healthy, and will accept leases as normal.
OK = bots_pb2.BotStatus.Value('OK')
# The bot is unhealthy and will not accept new leases.
UNHEALTHY = bots_pb2.BotStatus.Value('UNHEALTHY')
# The bot has been asked to reboot the host.
HOST_REBOOTING = bots_pb2.BotStatus.Value('HOST_REBOOTING')
# The bot has been asked to shut down.
BOT_TERMINATING = bots_pb2.BotStatus.Value('BOT_TERMINATING')
class LeaseState(Enum):
# Default value.
LEASE_STATE_UNSPECIFIED = bots_pb2.LeaseState.Value('LEASE_STATE_UNSPECIFIED')
# The server expects the bot to accept this lease.
PENDING = bots_pb2.LeaseState.Value('PENDING')
# The bot has accepted this lease.
ACTIVE = bots_pb2.LeaseState.Value('ACTIVE')
# The bot is no longer leased.
COMPLETED = bots_pb2.LeaseState.Value('COMPLETED')
# The bot should immediately release all resources associated with the lease.
CANCELLED = bots_pb2.LeaseState.Value('CANCELLED')
class BotSession:
def __init__(self, parent, interface):
""" Unique bot ID within the farm used to identify this bot
......
......@@ -15,39 +15,15 @@
import logging
import uuid
from enum import Enum
from google.protobuf import timestamp_pb2
from buildgrid._enums import LeaseState, OperationStage
from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
from buildgrid._protos.google.longrunning import operations_pb2
class OperationStage(Enum):
# Initially unknown stage.
UNKNOWN = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('UNKNOWN')
# Checking the result against the cache.
CACHE_CHECK = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('CACHE_CHECK')
# Currently idle, awaiting a free machine to execute.
QUEUED = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('QUEUED')
# Currently being executed by a worker.
EXECUTING = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('EXECUTING')
# Finished execution.
COMPLETED = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('COMPLETED')
class LeaseState(Enum):
# Initially unknown state.
LEASE_STATE_UNSPECIFIED = bots_pb2.LeaseState.Value('LEASE_STATE_UNSPECIFIED')
# The server expects the bot to accept this lease.
PENDING = bots_pb2.LeaseState.Value('PENDING')
# The bot has accepted this lease.
ACTIVE = bots_pb2.LeaseState.Value('ACTIVE')
# The bot is no longer leased.
COMPLETED = bots_pb2.LeaseState.Value('COMPLETED')
# The bot should immediately release all resources associated with the lease.
CANCELLED = bots_pb2.LeaseState.Value('CANCELLED')
class Job:
def __init__(self, action, action_digest):
......@@ -60,6 +36,9 @@ class Job:
self.__execute_response = None
self.__operation_metadata = remote_execution_pb2.ExecuteOperationMetadata()
self.__queued_timestamp = timestamp_pb2.Timestamp()
self.__worker_start_timestamp = timestamp_pb2.Timestamp()
self.__worker_completed_timestamp = timestamp_pb2.Timestamp()
self.__operation_metadata.action_digest.CopyFrom(action_digest)
self.__operation_metadata.stage = OperationStage.UNKNOWN.value
......@@ -177,10 +156,18 @@ class Job:
self._lease.state = state.value
if self._lease.state == LeaseState.PENDING.value:
self.__worker_start_timestamp.Clear()
self.__worker_completed_timestamp.Clear()
self._lease.status.Clear()
self._lease.result.Clear()
elif self._lease.state == LeaseState.ACTIVE.value:
self.__worker_start_timestamp.GetCurrentTime()
elif self._lease.state == LeaseState.COMPLETED.value:
self.__worker_completed_timestamp.GetCurrentTime()
action_result = remote_execution_pb2.ActionResult()
# TODO: Make a distinction between build and bot failures!
......@@ -191,6 +178,11 @@ class Job:
assert result.Is(action_result.DESCRIPTOR)
result.Unpack(action_result)
action_metadata = action_result.execution_metadata
action_metadata.queued_timestamp.CopyFrom(self.__worker_start_timestamp)
action_metadata.worker_start_timestamp.CopyFrom(self.__worker_start_timestamp)
action_metadata.worker_completed_timestamp.CopyFrom(self.__worker_completed_timestamp)
self.__execute_response = remote_execution_pb2.ExecuteResponse()
self.__execute_response.result.CopyFrom(action_result)
self.__execute_response.cached_result = False
......@@ -208,6 +200,8 @@ class Job:
self.__operation_metadata.stage = stage.value
if self.__operation_metadata.stage == OperationStage.QUEUED.value:
if self.__queued_timestamp.ByteSize() == 0:
self.__queued_timestamp.GetCurrentTime()
self._n_tries += 1
elif self.__operation_metadata.stage == OperationStage.COMPLETED.value:
......
......@@ -109,11 +109,16 @@ class Scheduler:
"""
job = self.jobs[job_name]
if lease_state != LeaseState.COMPLETED:
job.update_lease_state(lease_state)
if lease_state == LeaseState.PENDING:
job.update_lease_state(LeaseState.PENDING)
job.update_operation_stage(OperationStage.QUEUED)
else:
job.update_lease_state(lease_state,
elif lease_state == LeaseState.ACTIVE:
job.update_lease_state(LeaseState.ACTIVE)
job.update_operation_stage(OperationStage.EXECUTING)
elif lease_state == LeaseState.COMPLETED:
job.update_lease_state(LeaseState.COMPLETED,
status=lease_status, result=lease_result)
if self._action_cache is not None and not job.do_not_cache:
......
......@@ -15,11 +15,21 @@
from operator import attrgetter
import os
import socket
from buildgrid.settings import HASH
from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
def get_hostname():
"""Returns the hostname of the machine executing that function.
Returns:
str: Hostname for the current machine.
"""
return socket.gethostname()
def create_digest(bytes_to_digest):
"""Computes the :obj:`Digest` of a piece of data.
......
......@@ -144,7 +144,8 @@ def test_list_operations_with_result(instance, controller, execute_request, cont
execute_response = remote_execution_pb2.ExecuteResponse()
response.operations[0].response.Unpack(execute_response)
assert execute_response.result == action_result
assert execute_response.result.output_files == action_result.output_files
def test_list_operations_empty(instance, context):
......