From 790fb40b75075c9af64798df3aa54fd4d8fe140e Mon Sep 17 00:00:00 2001
From: Tiago Gomes <tiago.gomes@codethink.co.uk>
Date: Tue, 17 Jul 2018 17:34:17 +0100
Subject: [PATCH 1/2] Update checkout man page for new --deps option

---
 man/bst-checkout.1 | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/man/bst-checkout.1 b/man/bst-checkout.1
index aaf14203c2..1a98296240 100644
--- a/man/bst-checkout.1
+++ b/man/bst-checkout.1
@@ -12,8 +12,11 @@ Checkout a built artifact to the specified directory
 \fB\-f,\fP \-\-force
 Overwrite files existing in checkout directory
 .TP
+\fB\-f,\fP \-\-deps
+The dependencies to checkout (default: run)
+.TP
 \fB\-\-integrate\fP / \-\-no\-integrate
 Whether to run integration commands
 .TP
 \fB\-\-hardlinks\fP
-Checkout hardlinks instead of copies (handle with care)
\ No newline at end of file
+Checkout hardlinks instead of copies (handle with care)
-- 
GitLab


From 9c1f024b15849fae1d8512c0215e4fb0b24fc311 Mon Sep 17 00:00:00 2001
From: Tiago Gomes <tiago.gomes@codethink.co.uk>
Date: Mon, 16 Jul 2018 16:45:16 +0100
Subject: [PATCH 2/2] Add support for creating a tarball on bst checkout

One of the tests added is configured to be skipped for now, as dumping
binary data is causing a bad descriptor exception when using the pytest
capture module.

Closes #263.
---
 NEWS                            |   8 +++
 buildstream/_frontend/cli.py    |  24 +++++--
 buildstream/_stream.py          | 114 ++++++++++++++++++++++++--------
 man/bst-checkout.1              |  10 ++-
 tests/frontend/buildcheckout.py | 109 +++++++++++++++++++++++++++++-
 5 files changed, 227 insertions(+), 38 deletions(-)

diff --git a/NEWS b/NEWS
index c9ac8c3662..dd60fe62bd 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,10 @@
+=================
+buildstream 1.3.1
+=================
+
+  o Add a `--tar` option to `bst checkout` which allows a tarball to be
+    created from the artifact contents.
+
 =================
 buildstream 1.1.4
 =================
@@ -19,6 +26,7 @@ buildstream 1.1.4
     runs out of space. The exact behavior is configurable in the user's
     buildstream.conf.
 
+
 =================
 buildstream 1.1.3
 =================
diff --git a/buildstream/_frontend/cli.py b/buildstream/_frontend/cli.py
index 51744e806f..bd2ce8a739 100644
--- a/buildstream/_frontend/cli.py
+++ b/buildstream/_frontend/cli.py
@@ -626,7 +626,7 @@ def shell(app, element, sysroot, mount, isolate, build_, command):
 ##################################################################
 @cli.command(short_help="Checkout a built artifact")
 @click.option('--force', '-f', default=False, is_flag=True,
-              help="Overwrite files existing in checkout directory")
+              help="Allow files to be overwritten")
 @click.option('--deps', '-d', default='run',
               type=click.Choice(['run', 'none']),
               help='The dependencies to checkout (default: run)')
@@ -634,20 +634,30 @@ def shell(app, element, sysroot, mount, isolate, build_, command):
               help="Whether to run integration commands")
 @click.option('--hardlinks', default=False, is_flag=True,
               help="Checkout hardlinks instead of copies (handle with care)")
+@click.option('--tar', default=False, is_flag=True,
+              help="Create a tarball from the artifact contents instead "
+                   "of a file tree. If LOCATION is '-', the tarball "
+                   "will be dumped to the standard output.")
 @click.argument('element',
                 type=click.Path(readable=False))
-@click.argument('directory', type=click.Path(file_okay=False))
+@click.argument('location', type=click.Path())
 @click.pass_obj
-def checkout(app, element, directory, force, deps, integrate, hardlinks):
-    """Checkout a built artifact to the specified directory
+def checkout(app, element, location, force, deps, integrate, hardlinks, tar):
+    """Checkout a built artifact to the specified location
     """
+
+    if hardlinks and tar:
+        click.echo("ERROR: options --hardlinks and --tar conflict", err=True)
+        sys.exit(-1)
+
     with app.initialized():
         app.stream.checkout(element,
-                            directory=directory,
-                            deps=deps,
+                            location=location,
                             force=force,
+                            deps=deps,
                             integrate=integrate,
-                            hardlinks=hardlinks)
+                            hardlinks=hardlinks,
+                            tar=tar)
 
 
 ##################################################################
diff --git a/buildstream/_stream.py b/buildstream/_stream.py
index 002d377882..365709c46b 100644
--- a/buildstream/_stream.py
+++ b/buildstream/_stream.py
@@ -20,6 +20,7 @@
 #        Tristan Maat <tristan.maat@codethink.co.uk>
 
 import os
+import sys
 import stat
 import shlex
 import shutil
@@ -350,56 +351,91 @@ class Stream():
 
     # checkout()
     #
-    # Checkout the pipeline target artifact to the specified directory
+    # Checkout target artifact to the specified location
     #
     # Args:
     #    target (str): Target to checkout
-    #    directory (str): The directory to checkout the artifact to
-    #    force (bool): Force overwrite files which exist in `directory`
+    #    location (str): Location to checkout the artifact to
+    #    force (bool): Whether files can be overwritten if necessary
+    #    deps (str): The dependencies to checkout
     #    integrate (bool): Whether to run integration commands
     #    hardlinks (bool): Whether checking out files hardlinked to
     #                      their artifacts is acceptable
+    #    tar (bool): If true, a tarball from the artifact contents will
+    #                be created, otherwise the file tree of the artifact
+    #                will be placed at the given location. If true and
+    #                location is '-', the tarball will be dumped on the
+    #                standard output.
     #
     def checkout(self, target, *,
-                 deps='run',
-                 directory=None,
+                 location=None,
                  force=False,
+                 deps='run',
                  integrate=True,
-                 hardlinks=False):
+                 hardlinks=False,
+                 tar=False):
 
         # We only have one target in a checkout command
         elements, _ = self._load((target,), (), fetch_subprojects=True)
         target = elements[0]
 
-        try:
-            os.makedirs(directory, exist_ok=True)
-        except OSError as e:
-            raise StreamError("Failed to create checkout directory: {}".format(e)) from e
-
-        if not os.access(directory, os.W_OK):
-            raise StreamError("Directory {} not writable".format(directory))
+        if not tar:
+            try:
+                os.makedirs(location, exist_ok=True)
+            except OSError as e:
+                raise StreamError("Failed to create checkout directory: '{}'"
+                                  .format(e)) from e
 
-        if not force and os.listdir(directory):
-            raise StreamError("Checkout directory is not empty: {}"
-                              .format(directory))
+        if not tar:
+            if not os.access(location, os.W_OK):
+                raise StreamError("Checkout directory '{}' not writable"
+                                  .format(location))
+            if not force and os.listdir(location):
+                raise StreamError("Checkout directory '{}' not empty"
+                                  .format(location))
+        elif os.path.exists(location) and location != '-':
+            if not os.access(location, os.W_OK):
+                raise StreamError("Output file '{}' not writable"
+                                  .format(location))
+            if not force and os.path.exists(location):
+                raise StreamError("Output file '{}' already exists"
+                                  .format(location))
 
         # Stage deps into a temporary sandbox first
         try:
-            with target._prepare_sandbox(Scope.RUN, None, deps=deps, integrate=integrate) as sandbox:
+            with target._prepare_sandbox(Scope.RUN, None, deps=deps,
+                                         integrate=integrate) as sandbox:
 
                 # Copy or move the sandbox to the target directory
                 sandbox_root = sandbox.get_directory()
-                with target.timed_activity("Checking out files in {}".format(directory)):
-                    try:
-                        if hardlinks:
-                            self._checkout_hardlinks(sandbox_root, directory)
-                        else:
-                            utils.copy_files(sandbox_root, directory)
-                    except OSError as e:
-                        raise StreamError("Failed to checkout files: {}".format(e)) from e
+                if not tar:
+                    with target.timed_activity("Checking out files in '{}'"
+                                               .format(location)):
+                        try:
+                            if hardlinks:
+                                self._checkout_hardlinks(sandbox_root, location)
+                            else:
+                                utils.copy_files(sandbox_root, location)
+                        except OSError as e:
+                            raise StreamError("Failed to checkout files: '{}'"
+                                              .format(e)) from e
+                else:
+                    if location == '-':
+                        with target.timed_activity("Creating tarball"):
+                            with os.fdopen(sys.stdout.fileno(), 'wb') as fo:
+                                with tarfile.open(fileobj=fo, mode="w|") as tf:
+                                    Stream._add_directory_to_tarfile(
+                                        tf, sandbox_root, '.')
+                    else:
+                        with target.timed_activity("Creating tarball '{}'"
+                                                   .format(location)):
+                            with tarfile.open(location, "w:") as tf:
+                                Stream._add_directory_to_tarfile(
+                                    tf, sandbox_root, '.')
+
         except BstError as e:
-            raise StreamError("Error while staging dependencies into a sandbox: {}".format(e),
-                              reason=e.reason) from e
+            raise StreamError("Error while staging dependencies into a sandbox"
+                              ": '{}'".format(e), reason=e.reason) from e
 
     # workspace_open
     #
@@ -1027,6 +1063,30 @@ class Stream():
         else:
             utils.link_files(sandbox_root, directory)
 
+    # Add a directory entry deterministically to a tar file
+    #
+    # This function takes extra steps to ensure the output is deterministic.
+    # First, it sorts the results of os.listdir() to ensure the ordering of
+    # the files in the archive is the same.  Second, it sets a fixed
+    # timestamp for each entry. See also https://bugs.python.org/issue24465.
+    @staticmethod
+    def _add_directory_to_tarfile(tf, dir_name, dir_arcname, mtime=0):
+        for filename in sorted(os.listdir(dir_name)):
+            name = os.path.join(dir_name, filename)
+            arcname = os.path.join(dir_arcname, filename)
+
+            tarinfo = tf.gettarinfo(name, arcname)
+            tarinfo.mtime = mtime
+
+            if tarinfo.isreg():
+                with open(name, "rb") as f:
+                    tf.addfile(tarinfo, f)
+            elif tarinfo.isdir():
+                tf.addfile(tarinfo)
+                Stream._add_directory_to_tarfile(tf, name, arcname, mtime)
+            else:
+                tf.addfile(tarinfo)
+
     # Write the element build script to the given directory
     def _write_element_script(self, directory, element):
         try:
diff --git a/man/bst-checkout.1 b/man/bst-checkout.1
index 1a98296240..243bdaa30e 100644
--- a/man/bst-checkout.1
+++ b/man/bst-checkout.1
@@ -3,14 +3,14 @@
 bst\-checkout \- Checkout a built artifact
 .SH SYNOPSIS
 .B bst checkout
-[OPTIONS] ELEMENT DIRECTORY
+[OPTIONS] ELEMENT LOCATION
 .SH DESCRIPTION
-Checkout a built artifact to the specified directory
+Checkout a built artifact to the specified location
     
 .SH OPTIONS
 .TP
 \fB\-f,\fP \-\-force
-Overwrite files existing in checkout directory
+Allow files to be overwritten
 .TP
 \fB\-f,\fP \-\-deps
 The dependencies to checkout (default: run)
@@ -20,3 +20,7 @@ Whether to run integration commands
 .TP
 \fB\-\-hardlinks\fP
 Checkout hardlinks instead of copies (handle with care)
+.TP
+\fB\-\-tar\fP
+Create a tarball from the artifact contents instead of a file tree. If
+LOCATION is '-', the tarball will be dumped to the standard output.
diff --git a/tests/frontend/buildcheckout.py b/tests/frontend/buildcheckout.py
index 0fce355d4f..1ec8f491f6 100644
--- a/tests/frontend/buildcheckout.py
+++ b/tests/frontend/buildcheckout.py
@@ -1,4 +1,6 @@
 import os
+import tarfile
+import hashlib
 import pytest
 from tests.testutils import cli, create_repo, ALL_REPO_KINDS
 
@@ -54,7 +56,6 @@ def test_build_checkout(datafiles, cli, strict, hardlinks):
     filename = os.path.join(checkout, 'usr', 'bin', 'hello')
     assert os.path.exists(filename)
 
-    # Check that the executable hello file is found in the checkout
     filename = os.path.join(checkout, 'usr', 'include', 'pony.h')
     assert os.path.exists(filename)
 
@@ -95,6 +96,88 @@ def test_build_checkout_deps(datafiles, cli, deps):
         assert not os.path.exists(filename)
 
 
+@pytest.mark.datafiles(DATA_DIR)
+def test_build_checkout_tarball(datafiles, cli):
+    project = os.path.join(datafiles.dirname, datafiles.basename)
+    checkout = os.path.join(cli.directory, 'checkout.tar')
+
+    result = cli.run(project=project, args=['build', 'target.bst'])
+    result.assert_success()
+
+    builddir = os.path.join(cli.directory, 'build')
+    assert os.path.isdir(builddir)
+    assert not os.listdir(builddir)
+
+    checkout_args = ['checkout', '--tar', 'target.bst', checkout]
+
+    result = cli.run(project=project, args=checkout_args)
+    result.assert_success()
+
+    tar = tarfile.TarFile(checkout)
+    assert os.path.join('.', 'usr', 'bin', 'hello') in tar.getnames()
+    assert os.path.join('.', 'usr', 'include', 'pony.h') in tar.getnames()
+
+
+@pytest.mark.skip(reason="Capturing the binary output is causing a stacktrace")
+@pytest.mark.datafiles(DATA_DIR)
+def test_build_checkout_tarball_stdout(datafiles, cli):
+    project = os.path.join(datafiles.dirname, datafiles.basename)
+    tarball = os.path.join(cli.directory, 'tarball.tar')
+
+    result = cli.run(project=project, args=['build', 'target.bst'])
+    result.assert_success()
+
+    builddir = os.path.join(cli.directory, 'build')
+    assert os.path.isdir(builddir)
+    assert not os.listdir(builddir)
+
+    checkout_args = ['checkout', '--tar', 'target.bst', '-']
+
+    result = cli.run(project=project, args=checkout_args)
+    result.assert_success()
+
+    with open(tarball, 'wb') as f:
+        f.write(result.output)
+
+    tar = tarfile.TarFile(tarball)
+    assert os.path.join('.', 'usr', 'bin', 'hello') in tar.getnames()
+    assert os.path.join('.', 'usr', 'include', 'pony.h') in tar.getnames()
+
+
+@pytest.mark.datafiles(DATA_DIR)
+def test_build_checkout_tarball_is_deterministic(datafiles, cli):
+    project = os.path.join(datafiles.dirname, datafiles.basename)
+    tarball1 = os.path.join(cli.directory, 'tarball1.tar')
+    tarball2 = os.path.join(cli.directory, 'tarball2.tar')
+
+    result = cli.run(project=project, args=['build', 'target.bst'])
+    result.assert_success()
+
+    builddir = os.path.join(cli.directory, 'build')
+    assert os.path.isdir(builddir)
+    assert not os.listdir(builddir)
+
+    checkout_args = ['checkout', '--force', '--tar', 'target.bst']
+
+    checkout_args1 = checkout_args + [tarball1]
+    result = cli.run(project=project, args=checkout_args1)
+    result.assert_success()
+
+    checkout_args2 = checkout_args + [tarball2]
+    result = cli.run(project=project, args=checkout_args2)
+    result.assert_success()
+
+    with open(tarball1, 'rb') as f:
+        contents = f.read()
+    hash1 = hashlib.sha1(contents).hexdigest()
+
+    with open(tarball2, 'rb') as f:
+        contents = f.read()
+    hash2 = hashlib.sha1(contents).hexdigest()
+
+    assert hash1 == hash2
+
+
 @pytest.mark.datafiles(DATA_DIR)
 @pytest.mark.parametrize("hardlinks", [("copies"), ("hardlinks")])
 def test_build_checkout_nonempty(datafiles, cli, hardlinks):
@@ -171,6 +254,30 @@ def test_build_checkout_force(datafiles, cli, hardlinks):
     assert os.path.exists(filename)
 
 
+@pytest.mark.datafiles(DATA_DIR)
+def test_build_checkout_force_tarball(datafiles, cli):
+    project = os.path.join(datafiles.dirname, datafiles.basename)
+    tarball = os.path.join(cli.directory, 'tarball.tar')
+
+    result = cli.run(project=project, args=['build', 'target.bst'])
+    result.assert_success()
+
+    builddir = os.path.join(cli.directory, 'build')
+    assert os.path.isdir(builddir)
+    assert not os.listdir(builddir)
+
+    with open(tarball, "w") as f:
+        f.write("Hello")
+
+    checkout_args = ['checkout', '--force', '--tar', 'target.bst', tarball]
+
+    result = cli.run(project=project, args=checkout_args)
+    result.assert_success()
+
+    tar = tarfile.TarFile(tarball)
+    assert os.path.join('.', 'usr', 'bin', 'hello') in tar.getnames()
+    assert os.path.join('.', 'usr', 'include', 'pony.h') in tar.getnames()
+
 fetch_build_checkout_combos = \
     [("strict", kind) for kind in ALL_REPO_KINDS] + \
     [("non-strict", kind) for kind in ALL_REPO_KINDS]
-- 
GitLab