Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • edbaunton/buildgrid
  • BuildGrid/buildgrid
  • bloomberg/buildgrid
  • devcurmudgeon/buildgrid
  • mhadjimichael/buildgrid
  • jmacarthur/buildgrid
  • rkothur/buildgrid
  • valentindavid/buildgrid
  • jjardon/buildgrid
  • RichKen/buildgrid
  • jbonney/buildgrid
  • onsha_alexander/buildgrid
  • santigl/buildgrid
  • mostynb/buildgrid
  • hoffbrinkle/buildgrid
  • Malinskiy/buildgrid
  • coldtom/buildgrid
  • azeemb_a/buildgrid
  • pointswaves/buildgrid
  • BenjaminSchubert/buildgrid
  • michaellee8/buildgrid
  • anil-anil/buildgrid
  • seanborg/buildgrid
  • jdelong12/buildgrid
  • jclay/buildgrid
  • bweston92/buildgrid
  • zchen723/buildgrid
  • cpratt34/buildgrid
  • armbiant/apache-buildgrid
  • armbiant/android-buildgrid
  • itsme300/buildgrid
  • sbairoliya/buildgrid
32 results
Show changes
Commits on Source (4)
......@@ -17,8 +17,6 @@ import os
import subprocess
import tempfile
from google.protobuf import any_pb2
from buildgrid.client.cas import download, upload
from buildgrid._exceptions import BotError
from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
......@@ -29,13 +27,14 @@ from buildgrid.utils import read_file, write_file
def work_buildbox(context, lease):
"""Executes a lease for a build action, using buildbox.
"""
local_cas_directory = context.local_cas
# instance_name = context.parent
logger = context.logger
action_digest = remote_execution_pb2.Digest()
lease.payload.Unpack(action_digest)
lease.result.Clear()
with download(context.cas_channel) as downloader:
action = downloader.get_message(action_digest,
......@@ -131,10 +130,7 @@ def work_buildbox(context, lease):
action_result.output_directories.extend([output_directory])
action_result_any = any_pb2.Any()
action_result_any.Pack(action_result)
lease.result.CopyFrom(action_result_any)
lease.result.Pack(action_result)
return lease
......
......@@ -17,8 +17,6 @@ import os
import subprocess
import tempfile
from google.protobuf import any_pb2
from buildgrid.client.cas import download, upload
from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
from buildgrid.utils import output_file_maker, output_directory_maker
......@@ -27,12 +25,13 @@ from buildgrid.utils import output_file_maker, output_directory_maker
def work_host_tools(context, lease):
"""Executes a lease for a build action, using host tools.
"""
instance_name = context.parent
logger = context.logger
action_digest = remote_execution_pb2.Digest()
lease.payload.Unpack(action_digest)
lease.result.Clear()
with tempfile.TemporaryDirectory() as temp_directory:
with download(context.cas_channel, instance=instance_name) as downloader:
......@@ -122,9 +121,6 @@ def work_host_tools(context, lease):
action_result.output_directories.extend(output_directories)
action_result_any = any_pb2.Any()
action_result_any.Pack(action_result)
lease.result.CopyFrom(action_result_any)
lease.result.Pack(action_result)
return lease
......@@ -169,6 +169,7 @@ def run_command(context, input_root, commands, output_file, output_directory):
downloader.download_file(output_file_response.digest, path)
if output_file_response.path in output_executeables:
st = os.stat(path)
os.chmod(path, st.st_mode | stat.S_IXUSR)
for output_file_response in execute_response.result.output_files:
if output_file_response.path in output_executeables:
st = os.stat(path)
os.chmod(path, st.st_mode | stat.S_IXUSR)
......@@ -109,7 +109,7 @@ class BotsInterface:
if server_state == LeaseState.PENDING:
if client_state == LeaseState.ACTIVE:
self._scheduler.update_job_lease_state(client_lease.id, client_lease.state)
self._scheduler.update_job_lease_state(client_lease.id, client_state)
elif client_state == LeaseState.COMPLETED:
# TODO: Lease was rejected
raise NotImplementedError("'Not Accepted' is unsupported")
......@@ -122,8 +122,7 @@ class BotsInterface:
pass
elif client_state == LeaseState.COMPLETED:
self._scheduler.update_job_lease_state(client_lease.id, client_lease.state)
self._scheduler.job_complete(client_lease.id, client_lease.result, client_lease.status)
self._scheduler.update_job_lease_state(client_lease.id, client_state, lease_status=client_lease.status, lease_result=client_lease.result)
return None
else:
......
......@@ -53,7 +53,7 @@ class ExecutionInstance:
self._scheduler.append_job(job, skip_cache_lookup)
return job.get_operation()
return job.operation
def register_message_client(self, name, queue):
try:
......
......@@ -19,57 +19,33 @@ import logging
import uuid
from enum import Enum
from google.protobuf import any_pb2
from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
from buildgrid._protos.google.longrunning import operations_pb2
class ExecuteStage(Enum):
class OperationStage(Enum):
# Initially unknown stage.
UNKNOWN = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('UNKNOWN')
# Checking the result against the cache.
CACHE_CHECK = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('CACHE_CHECK')
# Currently idle, awaiting a free machine to execute.
QUEUED = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('QUEUED')
# Currently being executed by a worker.
EXECUTING = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('EXECUTING')
# Finished execution.
COMPLETED = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('COMPLETED')
class BotStatus(Enum):
BOT_STATUS_UNSPECIFIED = bots_pb2.BotStatus.Value('BOT_STATUS_UNSPECIFIED')
# The bot is healthy, and will accept leases as normal.
OK = bots_pb2.BotStatus.Value('OK')
# The bot is unhealthy and will not accept new leases.
UNHEALTHY = bots_pb2.BotStatus.Value('UNHEALTHY')
# The bot has been asked to reboot the host.
HOST_REBOOTING = bots_pb2.BotStatus.Value('HOST_REBOOTING')
# The bot has been asked to shut down.
BOT_TERMINATING = bots_pb2.BotStatus.Value('BOT_TERMINATING')
class LeaseState(Enum):
# Initially unknown state.
LEASE_STATE_UNSPECIFIED = bots_pb2.LeaseState.Value('LEASE_STATE_UNSPECIFIED')
# The server expects the bot to accept this lease.
PENDING = bots_pb2.LeaseState.Value('PENDING')
# The bot has accepted this lease.
ACTIVE = bots_pb2.LeaseState.Value('ACTIVE')
# The bot is no longer leased.
COMPLETED = bots_pb2.LeaseState.Value('COMPLETED')
# The bot should immediately release all resources associated with the lease.
CANCELLED = bots_pb2.LeaseState.Value('CANCELLED')
......@@ -77,18 +53,23 @@ class LeaseState(Enum):
class Job:
def __init__(self, action_digest, do_not_cache=False, message_queue=None):
self.lease = None
self.logger = logging.getLogger(__name__)
self.n_tries = 0
self.result = None
self.result_cached = False
self._name = str(uuid.uuid4())
self._action_digest = action_digest
self._do_not_cache = do_not_cache
self._execute_stage = ExecuteStage.UNKNOWN
self._name = str(uuid.uuid4())
self._operation = operations_pb2.Operation(name=self._name)
self._operation = operations_pb2.Operation()
self._lease = None
self._n_tries = 0
self.__operation_metadata = remote_execution_pb2.ExecuteOperationMetadata()
self.__operation_metadata.action_digest.CopyFrom(self._action_digest)
self.__operation_metadata.stage = OperationStage.UNKNOWN.value
self._operation_update_queues = []
self._operation.name = self._name
self._operation.done = False
if message_queue is not None:
self.register_client(message_queue)
......@@ -105,57 +86,72 @@ class Job:
def do_not_cache(self):
return self._do_not_cache
def check_job_finished(self):
if not self._operation_update_queues:
return self._operation.done
return False
@property
def operation(self):
return self._operation
@property
def operation_stage(self):
return OperationStage(self.__operation_metadata.state)
@property
def lease(self):
return self._lease
@property
def lease_state(self):
return LeaseState(self._lease.state)
@property
def n_tries(self):
return self._n_tries
def register_client(self, queue):
self._operation_update_queues.append(queue)
queue.put(self.get_operation())
queue.put(self._operation)
def unregister_client(self, queue):
self._operation_update_queues.remove(queue)
def get_operation(self):
self._operation.metadata.CopyFrom(self._pack_any(self.get_operation_meta()))
if self.result is not None:
self._operation.done = True
response = remote_execution_pb2.ExecuteResponse(result=self.result,
cached_result=self.result_cached)
def create_lease(self):
self._lease = bots_pb2.Lease()
self._lease.id = self._name
self._lease.payload.Pack(self._action_digest)
self._lease.state = LeaseState.PENDING.value
if not self.result_cached:
response.status.CopyFrom(self.lease.status)
return self._lease
self._operation.response.CopyFrom(self._pack_any(response))
def update_lease_state(self, state, status=None, result=None):
if state.value == self._lease.state:
return
return self._operation
self._lease.state = state.value
def get_operation_meta(self):
meta = remote_execution_pb2.ExecuteOperationMetadata()
meta.stage = self._execute_stage.value
meta.action_digest.CopyFrom(self._action_digest)
if self._lease.state == LeaseState.COMPLETED.value:
response = remote_execution_pb2.ExecuteResponse()
response.result.CopyFrom(result)
response.cached_result = False
response.status.CopyFrom(status)
return meta
self._operation.response.Pack(response)
self._operation.done = True
def create_lease(self):
action_digest = self._pack_any(self._action_digest)
def cancel_lease(self):
pass
def update_operation_stage(self, stage):
if stage.value == self.__operation_metadata.stage:
return
self.__operation_metadata.stage = stage.value
lease = bots_pb2.Lease(id=self.name,
payload=action_digest,
state=LeaseState.PENDING.value)
self.lease = lease
return lease
if self.__operation_metadata.stage == OperationStage.QUEUED.value:
self._n_tries += 1
def get_operations(self):
return operations_pb2.ListOperationsResponse(operations=[self.get_operation()])
self._operation.metadata.Pack(self.__operation_metadata)
def update_execute_stage(self, stage):
self._execute_stage = stage
for queue in self._operation_update_queues:
queue.put(self.get_operation())
queue.put(self._operation)
def _pack_any(self, pack):
some_any = any_pb2.Any()
some_any.Pack(pack)
return some_any
def cancel_operation(self):
pass
......@@ -27,7 +27,7 @@ from buildgrid._exceptions import NotFoundError
from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
from buildgrid._protos.google.longrunning import operations_pb2
from .job import ExecuteStage, LeaseState
from .job import OperationStage, LeaseState
class Scheduler:
......@@ -45,7 +45,7 @@ class Scheduler:
def unregister_client(self, name, queue):
job = self.jobs[name]
job.unregister_client(queue)
if job.check_job_finished():
if job.operation.done:
del self.jobs[name]
def append_job(self, job, skip_cache_lookup=False):
......@@ -55,64 +55,55 @@ class Scheduler:
cached_result = self._action_cache.get_action_result(job.action_digest)
except NotFoundError:
self.queue.append(job)
job.update_execute_stage(ExecuteStage.QUEUED)
job.update_operation_stage(OperationStage.QUEUED)
else:
job.result = cached_result
job.result_cached = True
job.update_execute_stage(ExecuteStage.COMPLETED)
job.update_operation_stage(OperationStage.COMPLETED)
else:
self.queue.append(job)
job.update_execute_stage(ExecuteStage.QUEUED)
job.update_operation_stage(OperationStage.QUEUED)
def retry_job(self, name):
if name in self.jobs:
job = self.jobs[name]
if job.n_tries >= self.MAX_N_TRIES:
# TODO: Decide what to do with these jobs
job.update_execute_stage(ExecuteStage.COMPLETED)
job.update_operation_stage(OperationStage.COMPLETED)
# TODO: Mark these jobs as done
else:
job.update_execute_stage(ExecuteStage.QUEUED)
job.n_tries += 1
job.update_operation_stage(OperationStage.QUEUED)
self.queue.appendleft(job)
def job_complete(self, name, result, status):
job = self.jobs[name]
job.lease.status.CopyFrom(status)
action_result = remote_execution_pb2.ActionResult()
result.Unpack(action_result)
job.result = action_result
if not job.do_not_cache and self._action_cache is not None:
if not job.lease.status.code:
self._action_cache.update_action_result(job.action_digest, action_result)
job.update_execute_stage(ExecuteStage.COMPLETED)
def get_operations(self):
response = operations_pb2.ListOperationsResponse()
for v in self.jobs.values():
response.operations.extend([v.get_operation()])
response.operations.extend([v.operation])
return response
def update_job_lease_state(self, name, state):
job = self.jobs[name]
job.lease.state = state
def update_job_lease_state(self, job_name, lease_state, lease_status=None, lease_result=None):
job = self.jobs[job_name]
if lease_state != LeaseState.COMPLETED:
job.update_lease_state(lease_state)
else:
action_result = remote_execution_pb2.ActionResult()
lease_result.Unpack(action_result)
job.update_lease_state(lease_state, status=lease_status, result=action_result)
if not job.do_not_cache and self._action_cache is not None:
if not job.lease.status.code:
self._action_cache.update_action_result(job.action_digest, action_result)
job.update_operation_stage(OperationStage.COMPLETED)
def get_job_lease(self, name):
return self.jobs[name].lease
def cancel_session(self, name):
job = self.jobs[name]
state = job.lease.state
if state in (LeaseState.PENDING.value, LeaseState.ACTIVE.value):
self.retry_job(name)
def create_lease(self):
if self.queue:
job = self.queue.popleft()
job.update_execute_stage(ExecuteStage.EXECUTING)
job.update_operation_stage(OperationStage.EXECUTING)
job.create_lease()
job.lease.state = LeaseState.PENDING.value
return job.lease
return None
......@@ -82,7 +82,7 @@ def test_execute(skip_cache_lookup, instance, context):
assert isinstance(result, operations_pb2.Operation)
metadata = remote_execution_pb2.ExecuteOperationMetadata()
result.metadata.Unpack(metadata)
assert metadata.stage == job.ExecuteStage.QUEUED.value
assert metadata.stage == job.OperationStage.QUEUED.value
assert uuid.UUID(result.name, version=4)
assert result.done is False
......@@ -116,7 +116,7 @@ def test_wait_execution(instance, controller, context):
action_result = remote_execution_pb2.ActionResult()
action_result_any.Pack(action_result)
j.update_execute_stage(job.ExecuteStage.COMPLETED)
j.update_operation_stage(job.OperationStage.COMPLETED)
response = instance.WaitExecution(request, context)
......@@ -125,7 +125,7 @@ def test_wait_execution(instance, controller, context):
assert isinstance(result, operations_pb2.Operation)
metadata = remote_execution_pb2.ExecuteOperationMetadata()
result.metadata.Unpack(metadata)
assert metadata.stage == job.ExecuteStage.COMPLETED.value
assert metadata.stage == job.OperationStage.COMPLETED.value
assert uuid.UUID(result.name, version=4)
assert result.done is True
......
......@@ -30,6 +30,7 @@ from buildgrid._protos.google.longrunning import operations_pb2
from buildgrid._protos.google.rpc import status_pb2
from buildgrid.server.cas.storage import lru_memory_cache
from buildgrid.server.controller import ExecutionController
from buildgrid.server.job import OperationStage
from buildgrid.server.operations import service
from buildgrid.server.operations.service import OperationsService
from buildgrid.utils import create_digest
......@@ -131,9 +132,10 @@ def test_list_operations_with_result(instance, controller, execute_request, cont
action_result.output_files.extend([output_file])
controller.operations_instance._scheduler.jobs[response_execute.name].create_lease()
controller.operations_instance._scheduler.job_complete(response_execute.name,
_pack_any(action_result),
status_pb2.Status())
controller.operations_instance._scheduler.update_job_lease_state(response_execute.name,
OperationStage.COMPLETED,
lease_status=status_pb2.Status(),
lease_result=_pack_any(action_result))
request = operations_pb2.ListOperationsRequest(name=instance_name)
response = instance.ListOperations(request, context)
......