_context.py 23.9 KB
Newer Older
1
#
2
#  Copyright (C) 2016-2018 Codethink Limited
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU Lesser General Public
#  License as published by the Free Software Foundation; either
#  version 2 of the License, or (at your option) any later version.
#
#  This library is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
#  Lesser General Public License for more details.
#
#  You should have received a copy of the GNU Lesser General Public
#  License along with this library. If not, see <http://www.gnu.org/licenses/>.
#
#  Authors:
#        Tristan Van Berkom <tristan.vanberkom@codethink.co.uk>

20
import os
21
import datetime
22 23
from collections import deque
from collections.abc import Mapping
24
from contextlib import contextmanager
25
from . import utils
26
from . import _cachekey
27
from . import _signals
28
from . import _site
29
from . import _yaml
30 31
from ._exceptions import LoadError, LoadErrorReason, BstError
from ._message import Message, MessageType
32
from ._profile import Topics, profile_start, profile_end
33
from ._artifactcache import ArtifactCache, ArtifactCacheUsage
34
from ._cas import CASCache
35
from ._workspaces import Workspaces, WorkspaceProjectCache
36
from .plugin import _plugin_lookup
37
from .sandbox import SandboxRemote
38

39

40 41 42 43 44 45 46 47 48 49 50
# Context()
#
# The Context object holds all of the user preferences
# and context for a given invocation of BuildStream.
#
# This is a collection of data from configuration files and command
# line arguments and consists of information such as where to store
# logs and artifacts, where to perform builds and cache downloaded sources,
# verbosity levels and basically anything pertaining to the context
# in which BuildStream was invoked.
#
51 52 53
# Args:
#    directory (str): The directory that buildstream was invoked in
#
54
class Context():
55

56
    def __init__(self, directory=None):
57

58
        # Filename indicating which configuration file was used, or None for the defaults
59 60
        self.config_origin = None

61 62 63
        # The directory under which other directories are based
        self.cachedir = None

64
        # The directory where various sources are stored
65 66
        self.sourcedir = None

67
        # The directory where build sandboxes will be created
68 69
        self.builddir = None

70 71 72
        # The directory for CAS
        self.casdir = None

73 74 75
        # Extract directory
        self.extractdir = None

76 77 78
        # The directory for temporary files
        self.tmpdir = None

William Salmon's avatar
William Salmon committed
79 80 81
        # Default root location for workspaces
        self.workspacedir = None

82
        # The locations from which to push and pull prebuilt artifacts
83
        self.artifact_cache_specs = None
84

85 86 87
        # The global remote execution configuration
        self.remote_execution_specs = None

88
        # The directory to store build logs
89 90
        self.logdir = None

91
        # The abbreviated cache key length to display in the UI
92
        self.log_key_length = None
93

94
        # Whether debug mode is enabled
95
        self.log_debug = None
96

97
        # Whether verbose mode is enabled
98
        self.log_verbose = None
99

100
        # Maximum number of lines to print from build logs
101
        self.log_error_lines = None
102

103
        # Maximum number of lines to print in the master log for a detailed message
104
        self.log_message_lines = None
105

106
        # Format string for printing the pipeline at startup time
107 108
        self.log_element_format = None

109 110 111
        # Format string for printing message lines in the master log
        self.log_message_format = None

112
        # Maximum number of fetch or refresh tasks
113
        self.sched_fetchers = None
114

115
        # Maximum number of build tasks
116
        self.sched_builders = None
117

118
        # Maximum number of push tasks
119
        self.sched_pushers = None
120

121
        # Maximum number of retries for network tasks
122
        self.sched_network_retries = None
123

124
        # What to do when a build fails in non interactive mode
125 126 127 128
        self.sched_error_action = None

        # Size of the artifact cache in bytes
        self.config_cache_quota = None
129

130 131 132
        # Whether or not to attempt to pull build trees globally
        self.pull_buildtrees = None

133 134 135
        # Whether or not to cache build trees on artifact creation
        self.cache_buildtrees = None

136 137 138
        # Whether elements must be rebuilt when their dependencies have changed
        self._strict_build_plan = None

139 140 141
        # Make sure the XDG vars are set in the environment before loading anything
        self._init_xdg()

142 143
        # Private variables
        self._cache_key = None
144
        self._message_handler = None
145
        self._message_depth = deque()
146
        self._artifactcache = None
147
        self._projects = []
148
        self._project_overrides = {}
149
        self._workspaces = None
150
        self._workspace_project_cache = WorkspaceProjectCache()
151 152
        self._log_handle = None
        self._log_filename = None
153
        self._cascache = None
154
        self._directory = directory
155

156 157 158 159 160 161 162 163 164 165 166 167 168 169
    # load()
    #
    # Loads the configuration files
    #
    # Args:
    #    config (filename): The user specified configuration file, if any
    #
    # Raises:
    #   LoadError
    #
    # This will first load the BuildStream default configuration and then
    # override that configuration with the configuration file indicated
    # by *config*, if any was specified.
    #
170
    def load(self, config=None):
171
        profile_start(Topics.LOAD_CONTEXT, 'load')
172

173 174 175 176
        # If a specific config file is not specified, default to trying
        # a $XDG_CONFIG_HOME/buildstream.conf file
        #
        if not config:
177 178
            default_config = os.path.join(os.environ['XDG_CONFIG_HOME'],
                                          'buildstream.conf')
179 180 181
            if os.path.exists(default_config):
                config = default_config

182
        # Load default config
183
        #
184
        defaults = _yaml.load(_site.default_user_config)
185

186
        if config:
187
            self.config_origin = os.path.abspath(config)
188
            user_config = _yaml.load(config)
189
            _yaml.composite(defaults, user_config)
190

191 192 193 194 195 196
        # Give obsoletion warnings
        if defaults.get('builddir'):
            raise LoadError(LoadErrorReason.INVALID_DATA,
                            "builddir is obsolete, use cachedir")

        if defaults.get('artifactdir'):
197 198
            raise LoadError(LoadErrorReason.INVALID_DATA,
                            "artifactdir is obsolete")
199

200
        _yaml.node_validate(defaults, [
201 202 203
            'cachedir', 'sourcedir', 'builddir', 'logdir', 'scheduler',
            'artifacts', 'logging', 'projects', 'cache', 'prompt',
            'workspacedir', 'remote-execution',
204 205
        ])

206
        for directory in ['cachedir', 'sourcedir', 'logdir', 'workspacedir']:
207 208 209
            # Allow the ~ tilde expansion and any environment variables in
            # path specification in the config files.
            #
210
            path = _yaml.node_get(defaults, str, directory)
211 212
            path = os.path.expanduser(path)
            path = os.path.expandvars(path)
213
            path = os.path.normpath(path)
214
            setattr(self, directory, path)
215

216
        # add directories not set by users
217
        self.extractdir = os.path.join(self.cachedir, 'extract')
218 219 220 221
        self.tmpdir = os.path.join(self.cachedir, 'tmp')
        self.casdir = os.path.join(self.cachedir, 'cas')
        self.builddir = os.path.join(self.cachedir, 'build')

222 223 224 225 226 227 228
        # Move old artifact cas to cas if it exists and create symlink
        old_casdir = os.path.join(self.cachedir, 'artifacts', 'cas')
        if (os.path.exists(old_casdir) and not os.path.islink(old_casdir) and
                not os.path.exists(self.casdir)):
            os.rename(old_casdir, self.casdir)
            os.symlink(self.casdir, old_casdir)

229
        # Load quota configuration
230 231
        # We need to find the first existing directory in the path of our
        # cachedir - the cachedir may not have been created yet.
232
        cache = _yaml.node_get(defaults, Mapping, 'cache')
233
        _yaml.node_validate(cache, ['quota', 'pull-buildtrees', 'cache-buildtrees'])
234

235
        self.config_cache_quota = _yaml.node_get(cache, str, 'quota')
236

237
        # Load artifact share configuration
238
        self.artifact_cache_specs = ArtifactCache.specs_from_config_node(defaults)
239

240 241
        self.remote_execution_specs = SandboxRemote.specs_from_config_node(defaults)

242 243 244
        # Load pull build trees configuration
        self.pull_buildtrees = _yaml.node_get(cache, bool, 'pull-buildtrees')

245 246 247 248
        # Load cache build trees configuration
        self.cache_buildtrees = _node_get_option_str(
            cache, 'cache-buildtrees', ['always', 'failure', 'never'])

249
        # Load logging config
250
        logging = _yaml.node_get(defaults, Mapping, 'logging')
251
        _yaml.node_validate(logging, [
252 253
            'key-length', 'verbose',
            'error-lines', 'message-lines',
254
            'debug', 'element-format', 'message-format'
255
        ])
256
        self.log_key_length = _yaml.node_get(logging, int, 'key-length')
257 258 259
        self.log_debug = _yaml.node_get(logging, bool, 'debug')
        self.log_verbose = _yaml.node_get(logging, bool, 'verbose')
        self.log_error_lines = _yaml.node_get(logging, int, 'error-lines')
260
        self.log_message_lines = _yaml.node_get(logging, int, 'message-lines')
261
        self.log_element_format = _yaml.node_get(logging, str, 'element-format')
262
        self.log_message_format = _yaml.node_get(logging, str, 'message-format')
263

264
        # Load scheduler config
265
        scheduler = _yaml.node_get(defaults, Mapping, 'scheduler')
266
        _yaml.node_validate(scheduler, [
267 268 269
            'on-error', 'fetchers', 'builders',
            'pushers', 'network-retries'
        ])
270 271
        self.sched_error_action = _node_get_option_str(
            scheduler, 'on-error', ['continue', 'quit', 'terminate'])
272 273
        self.sched_fetchers = _yaml.node_get(scheduler, int, 'fetchers')
        self.sched_builders = _yaml.node_get(scheduler, int, 'builders')
274
        self.sched_pushers = _yaml.node_get(scheduler, int, 'pushers')
275
        self.sched_network_retries = _yaml.node_get(scheduler, int, 'network-retries')
276

277
        # Load per-projects overrides
278 279 280 281
        self._project_overrides = _yaml.node_get(defaults, Mapping, 'projects', default_value={})

        # Shallow validation of overrides, parts of buildstream which rely
        # on the overrides are expected to validate elsewhere.
282
        for _, overrides in _yaml.node_items(self._project_overrides):
283 284
            _yaml.node_validate(overrides, ['artifacts', 'options', 'strict', 'default-mirror',
                                            'remote-execution'])
285

286 287
        profile_end(Topics.LOAD_CONTEXT, 'load')

288 289 290
    @property
    def artifactcache(self):
        if not self._artifactcache:
291
            self._artifactcache = ArtifactCache(self)
292 293 294

        return self._artifactcache

295 296 297 298 299 300 301 302 303 304
    # get_artifact_cache_usage()
    #
    # Fetches the current usage of the artifact cache
    #
    # Returns:
    #     (ArtifactCacheUsage): The current status
    #
    def get_artifact_cache_usage(self):
        return ArtifactCacheUsage(self.artifactcache)

305
    # add_project():
306 307 308 309 310 311
    #
    # Add a project to the context.
    #
    # Args:
    #    project (Project): The project to add
    #
312
    def add_project(self, project):
313
        if not self._projects:
314
            self._workspaces = Workspaces(project, self._workspace_project_cache)
315 316
        self._projects.append(project)

317
    # get_projects():
318 319 320 321 322 323
    #
    # Return the list of projects in the context.
    #
    # Returns:
    #    (list): The list of projects
    #
324
    def get_projects(self):
325 326
        return self._projects

327
    # get_toplevel_project():
328 329 330 331 332
    #
    # Return the toplevel project, the one which BuildStream was
    # invoked with as opposed to a junctioned subproject.
    #
    # Returns:
333
    #    (Project): The Project object
334
    #
335
    def get_toplevel_project(self):
336 337
        return self._projects[0]

338 339 340 341 342 343 344
    # get_workspaces():
    #
    # Return a Workspaces object containing a list of workspaces.
    #
    # Returns:
    #    (Workspaces): The Workspaces object
    #
345 346 347
    def get_workspaces(self):
        return self._workspaces

348 349 350 351 352 353 354 355 356 357
    # get_workspace_project_cache():
    #
    # Return the WorkspaceProjectCache object used for this BuildStream invocation
    #
    # Returns:
    #    (WorkspaceProjectCache): The WorkspaceProjectCache object
    #
    def get_workspace_project_cache(self):
        return self._workspace_project_cache

358
    # get_overrides():
359
    #
360 361 362
    # Fetch the override dictionary for the active project. This returns
    # a node loaded from YAML and as such, values loaded from the returned
    # node should be loaded using the _yaml.node_get() family of functions.
363
    #
364
    # Args:
365
    #    project_name (str): The project name
366 367 368 369
    #
    # Returns:
    #    (Mapping): The overrides dictionary for the specified project
    #
370
    def get_overrides(self, project_name):
371
        return _yaml.node_get(self._project_overrides, Mapping, project_name, default_value={})
372

373
    # get_strict():
374 375 376 377 378 379
    #
    # Fetch whether we are strict or not
    #
    # Returns:
    #    (bool): Whether or not to use strict build plan
    #
380
    def get_strict(self):
381 382 383 384 385 386
        if self._strict_build_plan is None:
            # Either we're not overridden or we've never worked it out before
            # so work out if we should be strict, and then cache the result
            toplevel = self.get_toplevel_project()
            overrides = self.get_overrides(toplevel.name)
            self._strict_build_plan = _yaml.node_get(overrides, bool, 'strict', default_value=True)
387 388

        # If it was set by the CLI, it overrides any config
389 390 391
        # Ditto if we've already computed this, then we return the computed
        # value which we cache here too.
        return self._strict_build_plan
392

393
    # get_cache_key():
394 395 396 397 398 399
    #
    # Returns the cache key, calculating it if necessary
    #
    # Returns:
    #    (str): A hex digest cache key for the Context
    #
400
    def get_cache_key(self):
401 402 403
        if self._cache_key is None:

            # Anything that alters the build goes into the unique key
404
            self._cache_key = _cachekey.generate_key({})
405

406
        return self._cache_key
407

408
    # set_message_handler()
409 410 411 412 413
    #
    # Sets the handler for any status messages propagated through
    # the context.
    #
    # The message handler should have the same signature as
414 415
    # the message() method
    def set_message_handler(self, handler):
416 417
        self._message_handler = handler

418
    # silent_messages():
419
    #
420 421
    # Returns:
    #    (bool): Whether messages are currently being silenced
422
    #
423
    def silent_messages(self):
424 425 426 427
        for silent in self._message_depth:
            if silent:
                return True
        return False
428

429
    # message():
430
    #
431 432
    # Proxies a message back to the caller, this is the central
    # point through which all messages pass.
433 434
    #
    # Args:
435
    #    message: A Message object
436
    #
437
    def message(self, message):
438 439 440

        # Tag message only once
        if message.depth is None:
441
            message.depth = len(list(self._message_depth))
442

443 444 445
        # If we are recording messages, dump a copy into the open log file.
        self._record_message(message)

446
        # Send it off to the log handler (can be the frontend,
447
        # or it can be the child task which will propagate
448
        # to the frontend)
449
        assert self._message_handler
450 451

        self._message_handler(message, context=self)
452

453
    # silence()
454 455 456 457 458 459 460
    #
    # A context manager to silence messages, this behaves in
    # the same way as the `silent_nested` argument of the
    # Context._timed_activity() context manager: especially
    # important messages will not be silenced.
    #
    @contextmanager
461
    def silence(self):
462
        self._push_message_depth(True)
463 464 465 466
        try:
            yield
        finally:
            self._pop_message_depth()
467

468
    # timed_activity()
469 470 471 472 473 474 475 476 477 478
    #
    # Context manager for performing timed activities and logging those
    #
    # Args:
    #    context (Context): The invocation context object
    #    activity_name (str): The name of the activity
    #    detail (str): An optional detailed message, can be multiline output
    #    silent_nested (bool): If specified, nested messages will be silenced
    #
    @contextmanager
479
    def timed_activity(self, activity_name, *, unique_id=None, detail=None, silent_nested=False):
480

481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496
        starttime = datetime.datetime.now()
        stopped_time = None

        def stop_time():
            nonlocal stopped_time
            stopped_time = datetime.datetime.now()

        def resume_time():
            nonlocal stopped_time
            nonlocal starttime
            sleep_time = datetime.datetime.now() - stopped_time
            starttime += sleep_time

        with _signals.suspendable(stop_time, resume_time):
            try:
                # Push activity depth for status messages
497
                message = Message(unique_id, MessageType.START, activity_name, detail=detail)
498
                self.message(message)
499 500 501
                self._push_message_depth(silent_nested)
                yield

502
            except BstError:
503 504 505
                # Note the failure in status messages and reraise, the scheduler
                # expects an error when there is an error.
                elapsed = datetime.datetime.now() - starttime
506
                message = Message(unique_id, MessageType.FAIL, activity_name, elapsed=elapsed)
507
                self._pop_message_depth()
508
                self.message(message)
509 510 511
                raise

            elapsed = datetime.datetime.now() - starttime
512
            message = Message(unique_id, MessageType.SUCCESS, activity_name, elapsed=elapsed)
513
            self._pop_message_depth()
514 515
            self.message(message)

516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646
    # recorded_messages()
    #
    # Records all messages in a log file while the context manager
    # is active.
    #
    # In addition to automatically writing all messages to the
    # specified logging file, an open file handle for process stdout
    # and stderr will be available via the Context.get_log_handle() API,
    # and the full logfile path will be available via the
    # Context.get_log_filename() API.
    #
    # Args:
    #     filename (str): A logging directory relative filename,
    #                     the pid and .log extension will be automatically
    #                     appended
    #
    # Yields:
    #     (str): The fully qualified log filename
    #
    @contextmanager
    def recorded_messages(self, filename):

        # We dont allow recursing in this context manager, and
        # we also do not allow it in the main process.
        assert self._log_handle is None
        assert self._log_filename is None
        assert not utils._is_main_process()

        # Create the fully qualified logfile in the log directory,
        # appending the pid and .log extension at the end.
        self._log_filename = os.path.join(self.logdir,
                                          '{}.{}.log'.format(filename, os.getpid()))

        # Ensure the directory exists first
        directory = os.path.dirname(self._log_filename)
        os.makedirs(directory, exist_ok=True)

        with open(self._log_filename, 'a') as logfile:

            # Write one last line to the log and flush it to disk
            def flush_log():

                # If the process currently had something happening in the I/O stack
                # then trying to reenter the I/O stack will fire a runtime error.
                #
                # So just try to flush as well as we can at SIGTERM time
                try:
                    logfile.write('\n\nForcefully terminated\n')
                    logfile.flush()
                except RuntimeError:
                    os.fsync(logfile.fileno())

            self._log_handle = logfile
            with _signals.terminator(flush_log):
                yield self._log_filename

            self._log_handle = None
            self._log_filename = None

    # get_log_handle()
    #
    # Fetches the active log handle, this will return the active
    # log file handle when the Context.recorded_messages() context
    # manager is active
    #
    # Returns:
    #     (file): The active logging file handle, or None
    #
    def get_log_handle(self):
        return self._log_handle

    # get_log_filename()
    #
    # Fetches the active log filename, this will return the active
    # log filename when the Context.recorded_messages() context
    # manager is active
    #
    # Returns:
    #     (str): The active logging filename, or None
    #
    def get_log_filename(self):
        return self._log_filename

    # _record_message()
    #
    # Records the message if recording is enabled
    #
    # Args:
    #    message (Message): The message to record
    #
    def _record_message(self, message):

        if self._log_handle is None:
            return

        INDENT = "    "
        EMPTYTIME = "--:--:--"
        template = "[{timecode: <8}] {type: <7}"

        # If this message is associated with a plugin, print what
        # we know about the plugin.
        plugin_name = ""
        if message.unique_id:
            template += " {plugin}"
            plugin = _plugin_lookup(message.unique_id)
            plugin_name = plugin.name

        template += ": {message}"

        detail = ''
        if message.detail is not None:
            template += "\n\n{detail}"
            detail = message.detail.rstrip('\n')
            detail = INDENT + INDENT.join(detail.splitlines(True))

        timecode = EMPTYTIME
        if message.message_type in (MessageType.SUCCESS, MessageType.FAIL):
            hours, remainder = divmod(int(message.elapsed.total_seconds()), 60**2)
            minutes, seconds = divmod(remainder, 60)
            timecode = "{0:02d}:{1:02d}:{2:02d}".format(hours, minutes, seconds)

        text = template.format(timecode=timecode,
                               plugin=plugin_name,
                               type=message.message_type.upper(),
                               message=message.message,
                               detail=detail)

        # Write to the open log file
        self._log_handle.write('{}\n'.format(text))
        self._log_handle.flush()

647 648 649 650 651 652 653 654 655 656 657
    # _push_message_depth() / _pop_message_depth()
    #
    # For status messages, send the depth of timed
    # activities inside a given task through the message
    #
    def _push_message_depth(self, silent_nested):
        self._message_depth.appendleft(silent_nested)

    def _pop_message_depth(self):
        assert self._message_depth
        self._message_depth.popleft()
658

659 660 661 662 663 664 665 666 667 668 669
    # Force the resolved XDG variables into the environment,
    # this is so that they can be used directly to specify
    # preferred locations of things from user configuration
    # files.
    def _init_xdg(self):
        if not os.environ.get('XDG_CACHE_HOME'):
            os.environ['XDG_CACHE_HOME'] = os.path.expanduser('~/.cache')
        if not os.environ.get('XDG_CONFIG_HOME'):
            os.environ['XDG_CONFIG_HOME'] = os.path.expanduser('~/.config')
        if not os.environ.get('XDG_DATA_HOME'):
            os.environ['XDG_DATA_HOME'] = os.path.expanduser('~/.local/share')
670

671 672
    def get_cascache(self):
        if self._cascache is None:
673
            self._cascache = CASCache(self.cachedir)
674 675
        return self._cascache

676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701

# _node_get_option_str()
#
# Like _yaml.node_get(), but also checks value is one of the allowed option
# strings. Fetches a value from a dictionary node, and makes sure it's one of
# the pre-defined options.
#
# Args:
#    node (dict): The dictionary node
#    key (str): The key to get a value for in node
#    allowed_options (iterable): Only accept these values
#
# Returns:
#    The value, if found in 'node'.
#
# Raises:
#    LoadError, when the value is not of the expected type, or is not found.
#
def _node_get_option_str(node, key, allowed_options):
    result = _yaml.node_get(node, str, key)
    if result not in allowed_options:
        provenance = _yaml.node_get_provenance(node, key)
        raise LoadError(LoadErrorReason.INVALID_DATA,
                        "{}: {} should be one of: {}".format(
                            provenance, key, ", ".join(allowed_options)))
    return result