http_io.rb 4.45 KB
Newer Older
1 2
# frozen_string_literal: true

3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
##
# This class is compatible with IO class (https://ruby-doc.org/core-2.3.1/IO.html)
# source: https://gitlab.com/snippets/1685610
module Gitlab
  class HttpIO
    BUFFER_SIZE = 128.kilobytes

    InvalidURLError = Class.new(StandardError)
    FailedToGetChunkError = Class.new(StandardError)

    attr_reader :uri, :size
    attr_reader :tell
    attr_reader :chunk, :chunk_range

    alias_method :pos, :tell

    def initialize(url, size)
      raise InvalidURLError unless ::Gitlab::UrlSanitizer.valid?(url)

      @uri = URI(url)
      @size = size
      @tell = 0
    end

    def close
      # no-op
    end

    def binmode
      # no-op
    end

    def binmode?
      true
    end

    def path
      nil
    end

    def url
      @uri.to_s
    end

    def seek(pos, where = IO::SEEK_SET)
      new_pos =
        case where
        when IO::SEEK_END
          size + pos
        when IO::SEEK_SET
          pos
        when IO::SEEK_CUR
          tell + pos
        else
          -1
        end

      raise 'new position is outside of file' if new_pos < 0 || new_pos > size

      @tell = new_pos
    end

    def eof?
      tell == size
    end

    def each_line
      until eof?
        line = readline
        break if line.nil?

        yield(line)
      end
    end

78 79
    def read(length = nil, outbuf = nil)
      out = []
80 81 82 83 84 85 86 87

      length ||= size - tell

      until length <= 0 || eof?
        data = get_chunk
        break if data.empty?

        chunk_bytes = [BUFFER_SIZE - chunk_offset, length].min
88
        data_slice = data.byteslice(0, chunk_bytes)
89

90 91 92
        out << data_slice
        @tell += data_slice.bytesize
        length -= data_slice.bytesize
93 94
      end

95 96
      out = out.join

97 98
      # If outbuf is passed, we put the output into the buffer. This supports IO.copy_stream functionality
      if outbuf
99
        outbuf.replace(out)
100 101 102 103 104 105
      end

      out
    end

    def readline
106
      out = []
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121

      until eof?
        data = get_chunk
        new_line = data.index("\n")

        if !new_line.nil?
          out << data[0..new_line]
          @tell += new_line + 1
          break
        else
          out << data
          @tell += data.bytesize
        end
      end

122
      out.join
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
    end

    def write(data)
      raise NotImplementedError
    end

    def truncate(offset)
      raise NotImplementedError
    end

    def flush
      raise NotImplementedError
    end

    def present?
      true
    end

    private

    ##
    # The below methods are not implemented in IO class
    #
    def in_range?
      @chunk_range&.include?(tell)
    end

    def get_chunk
      unless in_range?
        response = Net::HTTP.start(uri.hostname, uri.port, proxy_from_env: true, use_ssl: uri.scheme == 'https') do |http|
          http.request(request)
        end

        raise FailedToGetChunkError unless response.code == '200' || response.code == '206'

        @chunk = response.body.force_encoding(Encoding::BINARY)
        @chunk_range = response.content_range

        ##
        # Note: If provider does not return content_range, then we set it as we requested
        # Provider: minio
George Tsiolis's avatar
George Tsiolis committed
164 165
        # - When the file size is larger than requested Content-range, the Content-range is included in responses with Net::HTTPPartialContent 206
        # - When the file size is smaller than requested Content-range, the Content-range is included in responses with Net::HTTPPartialContent 206
166
        # Provider: AWS
George Tsiolis's avatar
George Tsiolis committed
167 168
        # - When the file size is larger than requested Content-range, the Content-range is included in responses with Net::HTTPPartialContent 206
        # - When the file size is smaller than requested Content-range, the Content-range is included in responses with Net::HTTPPartialContent 206
169
        # Provider: GCS
George Tsiolis's avatar
George Tsiolis committed
170 171
        # - When the file size is larger than requested Content-range, the Content-range is included in responses with Net::HTTPPartialContent 206
        # - When the file size is smaller than requested Content-range, the Content-range is included in responses with Net::HTTPOK 200
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
        @chunk_range ||= (chunk_start...(chunk_start + @chunk.bytesize))
      end

      @chunk[chunk_offset..BUFFER_SIZE]
    end

    def request
      Net::HTTP::Get.new(uri).tap do |request|
        request.set_range(chunk_start, BUFFER_SIZE)
      end
    end

    def chunk_offset
      tell % BUFFER_SIZE
    end

    def chunk_start
      (tell / BUFFER_SIZE) * BUFFER_SIZE
    end

    def chunk_end
      [chunk_start + BUFFER_SIZE, size].min
    end
  end
end