blob.rb 5.79 KB
Newer Older
1 2
# frozen_string_literal: true

Robert Speicher's avatar
Robert Speicher committed
3 4 5
module Gitlab
  module Git
    class Blob
6
      include Gitlab::BlobHelper
7
      include Gitlab::EncodingHelper
8
      extend Gitlab::Git::WrapsGitalyErrors
Robert Speicher's avatar
Robert Speicher committed
9 10

      # This number is the maximum amount of data that we want to display to
11 12 13
      # the user. We load as much as we can for encoding detection and LFS
      # pointer parsing. All other cases where we need full blob data should
      # use load_all_data!.
14
      MAX_DATA_DISPLAY_SIZE = 10.megabytes
Robert Speicher's avatar
Robert Speicher committed
15

16 17 18 19 20 21
      # These limits are used as a heuristic to ignore files which can't be LFS
      # pointers. The format of these is described in
      # https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md#the-pointer
      LFS_POINTER_MIN_SIZE = 120.bytes
      LFS_POINTER_MAX_SIZE = 200.bytes

Robert Speicher's avatar
Robert Speicher committed
22 23 24
      attr_accessor :name, :path, :size, :data, :mode, :id, :commit_id, :loaded_size, :binary

      class << self
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
        def find(repository, sha, path, limit: MAX_DATA_DISPLAY_SIZE)
          return unless path

          path = path.sub(%r{\A/*}, '')
          path = '/' if path.empty?
          name = File.basename(path)

          # Gitaly will think that setting the limit to 0 means unlimited, while
          # the client might only need the metadata and thus set the limit to 0.
          # In this method we'll then set the limit to 1, but clear the byte of data
          # that we got back so for the outside world it looks like the limit was
          # actually 0.
          req_limit = limit == 0 ? 1 : limit

          entry = Gitlab::GitalyClient::CommitService.new(repository).tree_entry(sha, path, req_limit)
          return unless entry

          entry.data = "" if limit == 0

          case entry.type
          when :COMMIT
            new(id: entry.oid, name: name, size: 0, data: '', path: path, commit_id: sha)
          when :BLOB
            new(id: entry.oid, name: name, size: entry.size, data: entry.data.dup, mode: entry.mode.to_s(8),
                path: path, commit_id: sha, binary: binary?(entry.data))
Robert Speicher's avatar
Robert Speicher committed
50 51 52 53
          end
        end

        def raw(repository, sha)
54
          repository.gitaly_blob_client.get_blob(oid: sha, limit: MAX_DATA_DISPLAY_SIZE)
55
        end
Robert Speicher's avatar
Robert Speicher committed
56

57
        # Returns an array of Blob instances, specified in blob_references as
58 59
        # [[commit_sha, path], [commit_sha, path], ...]. If blob_size_limit < 0 then the
        # full blob contents are returned. If blob_size_limit >= 0 then each blob will
60
        # contain no more than limit bytes in its data attribute.
61
        #
62
        # Keep in mind that this method may allocate a lot of memory. It is up
63
        # to the caller to limit the number of blobs and blob_size_limit.
64
        #
65
        def batch(repository, blob_references, blob_size_limit: MAX_DATA_DISPLAY_SIZE)
66
          repository.gitaly_blob_client.get_blobs(blob_references, blob_size_limit).to_a
67 68
        end

69 70 71 72 73 74
        # Returns an array of Blob instances just with the metadata, that means
        # the data attribute has no content.
        def batch_metadata(repository, blob_references)
          batch(repository, blob_references, blob_size_limit: 0)
        end

75 76 77 78
        # Find LFS blobs given an array of sha ids
        # Returns array of Gitlab::Git::Blob
        # Does not guarantee blob data will be set
        def batch_lfs_pointers(repository, blob_ids)
79
          wrapped_gitaly_errors do
80
            repository.gitaly_blob_client.batch_lfs_pointers(blob_ids.to_a)
81
          end
82 83
        end

84
        def binary?(data)
85
          EncodingHelper.detect_libgit2_binary?(data)
86 87
        end

88 89 90
        def size_could_be_lfs?(size)
          size.between?(LFS_POINTER_MIN_SIZE, LFS_POINTER_MAX_SIZE)
        end
Robert Speicher's avatar
Robert Speicher committed
91 92 93 94
      end

      def initialize(options)
        %w(id name path size data mode commit_id binary).each do |key|
95
          self.__send__("#{key}=", options[key.to_sym]) # rubocop:disable GitlabSecurity/PublicSend
Robert Speicher's avatar
Robert Speicher committed
96 97 98 99
        end

        # Retain the actual size before it is encoded
        @loaded_size = @data.bytesize if @data
100
        @loaded_all_data = @loaded_size == size
Robert Speicher's avatar
Robert Speicher committed
101 102
      end

103
      def binary_in_repo?
Robert Speicher's avatar
Robert Speicher committed
104 105 106 107 108 109 110 111 112 113 114 115
        @binary.nil? ? super : @binary == true
      end

      def data
        encode! @data
      end

      # Load all blob data (not just the first MAX_DATA_DISPLAY_SIZE bytes) into
      # memory as a Ruby string.
      def load_all_data!(repository)
        return if @data == '' # don't mess with submodule blobs

116 117 118 119 120 121
        # Even if we return early, recalculate wether this blob is binary in
        # case a blob was initialized as text but the full data isn't
        @binary = nil

        return if @loaded_all_data

122
        @data = repository.gitaly_blob_client.get_blob(oid: id, limit: -1).data
Robert Speicher's avatar
Robert Speicher committed
123 124 125 126 127 128 129 130
        @loaded_all_data = true
        @loaded_size = @data.bytesize
      end

      def name
        encode! @name
      end

131 132 133 134
      def path
        encode! @path
      end

135 136 137 138
      def truncated?
        size && (size > loaded_size)
      end

Robert Speicher's avatar
Robert Speicher committed
139 140 141 142 143 144
      # Valid LFS object pointer is a text file consisting of
      # version
      # oid
      # size
      # see https://github.com/github/git-lfs/blob/v1.1.0/docs/spec.md#the-pointer
      def lfs_pointer?
145
        self.class.size_could_be_lfs?(size) && has_lfs_version_key? && lfs_oid.present? && lfs_size.present?
Robert Speicher's avatar
Robert Speicher committed
146 147 148 149 150 151 152 153 154 155 156 157 158 159
      end

      def lfs_oid
        if has_lfs_version_key?
          oid = data.match(/(?<=sha256:)([0-9a-f]{64})/)
          return oid[1] if oid
        end

        nil
      end

      def lfs_size
        if has_lfs_version_key?
          size = data.match(/(?<=size )([0-9]+)/)
160
          return size[1].to_i if size
Robert Speicher's avatar
Robert Speicher committed
161 162 163 164 165
        end

        nil
      end

166 167 168 169
      def external_storage
        return unless lfs_pointer?

        :lfs
Robert Speicher's avatar
Robert Speicher committed
170 171
      end

172 173
      alias_method :external_size, :lfs_size

Robert Speicher's avatar
Robert Speicher committed
174 175 176
      private

      def has_lfs_version_key?
177
        !empty? && text_in_repo? && data.start_with?("version https://git-lfs.github.com/spec")
Robert Speicher's avatar
Robert Speicher committed
178 179 180 181
      end
    end
  end
end