Commit 4df2363c authored by Tiago's avatar Tiago
Browse files

providing strategy for punycode translation, using idnx when possible, falling back otherwise

parent 5d64f93e
Pipeline #319502058 passed with stages
in 8 minutes and 24 seconds
......@@ -66,6 +66,7 @@ group :test do
gem "aws-sdk-s3"
gem "faraday"
gem "idnx" if RUBY_VERSION >= "2.4.0"
gem "oga"
if RUBY_VERSION >= "3.0.0"
......
......@@ -51,7 +51,7 @@ module HTTPX
def initialize(type, uri, options)
@type = type
@origins = [uri.origin]
@origin = Utils.uri(uri.origin)
@origin = Utils.to_uri(uri.origin)
@options = Options.new(options)
@window_size = @options.window_size
@read_buffer = Buffer.new(BUFFER_SIZE)
......
# frozen_string_literal: true
module HTTPX
# :nocov:
# -*- coding: utf-8 -*-
#--
# punycode.rb - PunyCode encoder for the Domain Name library
#
# Copyright (C) 2011-2017 Akinori MUSHA, All rights reserved.
#
# Ported from puny.c, a part of VeriSign XCode (encode/decode) IDN
# Library.
#
# Copyright (C) 2000-2002 Verisign Inc., All rights reserved.
#
# Redistribution and use in source and binary forms, with or
# without modification, are permitted provided that the following
# conditions are met:
#
# 1) Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2) Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3) Neither the name of the VeriSign Inc. nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# This software is licensed under the BSD open source license. For more
# information visit www.opensource.org.
#
# Authors:
# John Colosi (VeriSign)
# Srikanth Veeramachaneni (VeriSign)
# Nagesh Chigurupati (Verisign)
# Praveen Srinivasan(Verisign)
#++
module Punycode
BASE = 36
TMIN = 1
TMAX = 26
SKEW = 38
DAMP = 700
INITIAL_BIAS = 72
INITIAL_N = 0x80
DELIMITER = "-"
MAXINT = (1 << 32) - 1
LOBASE = BASE - TMIN
CUTOFF = LOBASE * TMAX / 2
RE_NONBASIC = /[^\x00-\x7f]/.freeze
# Returns the numeric value of a basic code point (for use in
# representing integers) in the range 0 to base-1, or nil if cp
# is does not represent a value.
DECODE_DIGIT = {}.tap do |map|
# ASCII A..Z map to 0..25
# ASCII a..z map to 0..25
(0..25).each { |i| map[65 + i] = map[97 + i] = i }
# ASCII 0..9 map to 26..35
(26..35).each { |i| map[22 + i] = i }
begin
require "idnx"
module Punycode
module_function
def encode_hostname(hostname)
Idnx.to_punycode(hostname)
end
end
# Returns the basic code point whose value (when used for
# representing integers) is d, which must be in the range 0 to
# BASE-1. The lowercase form is used unless flag is true, in
# which case the uppercase form is used. The behavior is
# undefined if flag is nonzero and digit d has no uppercase
# form.
ENCODE_DIGIT = proc { |d, flag|
(d + 22 + (d < 26 ? 75 : 0) - (flag ? (1 << 5) : 0)).chr
# 0..25 map to ASCII a..z or A..Z
# 26..35 map to ASCII 0..9
}
DOT = "."
PREFIX = "xn--"
# Most errors we raise are basically kind of ArgumentError.
class ArgumentError < ::ArgumentError; end
class BufferOverflowError < ArgumentError; end
class << self
rescue LoadError
# :nocov:
# -*- coding: utf-8 -*-
#--
# punycode.rb - PunyCode encoder for the Domain Name library
#
# Copyright (C) 2011-2017 Akinori MUSHA, All rights reserved.
#
# Ported from puny.c, a part of VeriSign XCode (encode/decode) IDN
# Library.
#
# Copyright (C) 2000-2002 Verisign Inc., All rights reserved.
#
# Redistribution and use in source and binary forms, with or
# without modification, are permitted provided that the following
# conditions are met:
#
# 1) Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2) Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3) Neither the name of the VeriSign Inc. nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# This software is licensed under the BSD open source license. For more
# information visit www.opensource.org.
#
# Authors:
# John Colosi (VeriSign)
# Srikanth Veeramachaneni (VeriSign)
# Nagesh Chigurupati (Verisign)
# Praveen Srinivasan(Verisign)
#++
module Punycode
BASE = 36
TMIN = 1
TMAX = 26
SKEW = 38
DAMP = 700
INITIAL_BIAS = 72
INITIAL_N = 0x80
DELIMITER = "-"
MAXINT = (1 << 32) - 1
LOBASE = BASE - TMIN
CUTOFF = LOBASE * TMAX / 2
RE_NONBASIC = /[^\x00-\x7f]/.freeze
# Returns the numeric value of a basic code point (for use in
# representing integers) in the range 0 to base-1, or nil if cp
# is does not represent a value.
DECODE_DIGIT = {}.tap do |map|
# ASCII A..Z map to 0..25
# ASCII a..z map to 0..25
(0..25).each { |i| map[65 + i] = map[97 + i] = i }
# ASCII 0..9 map to 26..35
(26..35).each { |i| map[22 + i] = i }
end
# Returns the basic code point whose value (when used for
# representing integers) is d, which must be in the range 0 to
# BASE-1. The lowercase form is used unless flag is true, in
# which case the uppercase form is used. The behavior is
# undefined if flag is nonzero and digit d has no uppercase
# form.
ENCODE_DIGIT = proc { |d, flag|
(d + 22 + (d < 26 ? 75 : 0) - (flag ? (1 << 5) : 0)).chr
# 0..25 map to ASCII a..z or A..Z
# 26..35 map to ASCII 0..9
}
DOT = "."
PREFIX = "xn--"
# Most errors we raise are basically kind of ArgumentError.
class ArgumentError < ::ArgumentError; end
class BufferOverflowError < ArgumentError; end
module_function
# Encode a +string+ in Punycode
def encode(string)
input = string.unpack("U*")
......
......@@ -45,7 +45,7 @@ module HTTPX
def initialize(verb, uri, options = {})
@verb = verb.to_s.downcase.to_sym
@options = Options.new(options)
@uri = Utils.uri(uri)
@uri = Utils.to_uri(uri)
if @uri.relative?
raise(Error, "invalid URI: #{@uri}") unless @options.origin
......
......@@ -18,14 +18,16 @@ module HTTPX
end
if RUBY_VERSION < "2.3"
def uri(*args)
URI(*args)
def to_uri(uri)
URI(uri)
end
else
URIParser = URI::RFC2396_Parser.new
def uri(uri)
def to_uri(uri)
return Kernel.URI(uri) unless uri.is_a?(String) && !uri.ascii_only?
uri = Kernel.URI(URIParser.escape(uri))
......@@ -34,7 +36,7 @@ module HTTPX
non_ascii_hostname.force_encoding(Encoding::UTF_8)
idna_hostname = DomainName.new(non_ascii_hostname).hostname
idna_hostname = Punycode.encode_hostname(non_ascii_hostname)
uri.host = idna_hostname
uri.non_ascii_hostname = non_ascii_hostname
......
module HTTPX
module Utils
def self?.parse_retry_after: (String) -> Numeric
def self?.to_uri: (generic_uri uri) -> URI::Generic
end
end
\ No newline at end of file
......@@ -6,12 +6,12 @@ RUBY_PLATFORM=`ruby -e 'puts RUBY_PLATFORM'`
RUBY_ENGINE=`ruby -e 'puts RUBY_ENGINE'`
if [[ "$RUBY_ENGINE" = "truffleruby" ]]; then
microdnf install -y iptables iproute which file
microdnf install -y iptables iproute which file idn2
elif [[ "$RUBY_PLATFORM" = "java" ]]; then
echo "
deb http://deb.debian.org/debian sid main contrib non-free
deb-src http://deb.debian.org/debian sid main contrib non-free" >> /etc/apt/sources.list
apt-get update && apt-get install -y iptables openssl libssl-dev ca-certificates file
apt-get update && apt-get install -y iptables openssl libssl-dev ca-certificates file idn2
update-ca-certificates
elif [[ ${RUBY_VERSION:0:3} = "2.1" ]]; then
apt-get update && apt-get install -y libsodium-dev iptables
......@@ -23,7 +23,7 @@ elif [[ ${RUBY_VERSION:0:3} = "2.3" ]]; then
wget http://deb.debian.org/debian/pool/main/o/openssl1.0/libssl1.0-dev_1.0.2u-1~deb9u1_amd64.deb
dpkg -i libssl1.0-dev_1.0.2u-1~deb9u1_amd64.deb
else
apt-get update && apt-get install -y iptables
apt-get update && apt-get install -y iptables idn2
fi
# use port 9090 to test connection timeouts
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment