Add String.slice_bytes() and String.split()

String.slice_bytes() can be used to slice a String into a ByteArray,
similar to String.slice():

    'foo'.slice_bytes(start: 0, length: 2).to_string # => 'fo'

String.split() can be used to split a string into an Array of
substrings:

    'foo/bar/baz'.split('/') # => Array.new('foo', 'bar', 'baz')
parent 510e6d0c
Pipeline #104211455 passed with stages
in 19 minutes and 34 seconds
......@@ -25,6 +25,7 @@ import std::integer
import std::float
import std::nil
import std::block
import std::process
import std::string
import std::array
import std::iterator
......@@ -40,7 +41,6 @@ import std::array::extensions::(self as _)
import std::map::(Map as _Map)
import std::range::(Range as _Range)
import std::process
import std::vm
import std::module
......
......@@ -2,11 +2,11 @@
#
# Strings are UTF-8 encoded and immutable. A String must _always_ contain valid
# UTF-8.
import std::conversion::(ToFloat, ToInteger, ToString)
import std::hash::(Hasher, Hash)
import std::length::Length
import std::operators::(Equal, Add)
import std::process
impl String {
# Returns the uppercase equivalent of the current `String`.
......@@ -80,21 +80,27 @@ impl String {
# position and the number of _characters_ to include starting at the start
# position.
#
# This method will panic if the `length` argument is negative.
#
# # Examples
#
# Slicing a `String`:
#
# 'hello_world'.slice(0, 5) # => 'hello'
# 'hello_world'.slice(start: 0, length: 5) # => 'hello'
#
# If the `length` argument is greater than the available amount of characters
# we only include the characters until the end of the `String`:
#
# 'hello_world'.slice(0, 100) # => 'hello_world'
# 'hello_world'.slice(start: 0, length: 100) # => 'hello_world'
#
# Similar to indexing an `Array` you can use a negative `start` position:
#
# 'hello_world'.slice(-1, 5) # => 'world'
# 'hello_world'.slice(start: -1, length: 5) # => 'world'
def slice(start: Integer, length: Integer) -> String {
length.negative?.if_true {
process.panic('The slice length must be zero or greater')
}
_INKOC.string_slice(self, start, length)
}
......
# Extensions for the `String` type that can only be defined later on in the
# bootstrapping process.
import std::byte_array::ToByteArray
import std::process
# Returns `True` if a string separator starts at the given byte position.
#
# The `string` argument is the `String` to split, using the separator (as a
# sequence of bytes) specified in `separator`.
#
# The `start` argument specifies the byte position (in the `String`) to check
# for the separator.
def split_at?(
string: String,
separator: ByteArray,
start: Integer
) -> Boolean {
(string.bytesize - start < separator.length).if_true {
return False
}
separator.each_with_index do (byte, index) {
(byte == string.byte(start + index)).if_false {
return False
}
}
True
}
impl ToByteArray for String {
# Returns a `ByteArray` containing the bytes of this `String`.
......@@ -8,3 +34,101 @@ impl ToByteArray for String {
_INKOC.string_to_byte_array(self) as ByteArray
}
}
impl String {
# Slices `self` into a `ByteArray`.
#
# The returned `ByteArray` will contain all _bytes_ starting at the _byte_
# position specified in `start`, and will contain up to `length` _bytes_.
#
# Just like `String.slice` the starting position can be negative, meaning the
# slicing starts from the end of the `String`.
#
# This method will panic if the `length` argument is negative.
#
# # Examples
#
# Slicing a `String` into a `ByteArray`:
#
# 'inko'.slice_bytes(start: 0, length: 4).to_string # => 'inko'
#
# Slicing a `String` into a `ByteArray` using a negative start index:
#
# 'inko'.slice_bytes(start: -4, length: 4).to_string # => 'inko'
def slice_bytes(start: Integer, length: Integer) -> ByteArray {
length.negative?.if_true {
process.panic('The slice length must be zero or greater')
}
let bytes = ByteArray.new
let total_bytes = bytesize
let mut index =
start.negative?.if(true: { start % total_bytes }, false: { start })
let mut end_at = index + length
let max_index = total_bytes
(end_at > total_bytes).if_true {
end_at = total_bytes
}
{ index < end_at }.while_true {
bytes.push(byte(index))
index += 1
}
bytes
}
# Splits `self` into an `Array` of `Strings`, each separated by the given
# separator.
#
# # Examples
#
# Splitting a `String` using a single character as the separator:
#
# 'foo/bar/baz'.split('/') # => Array.new('foo', 'bar', 'baz')
#
# Splitting a `String` using multiple characters as the separator:
#
# 'foo::bar::baz'.split('::') # => Array.new('foo', 'bar', 'baz')
def split(separator: String) -> Array!(String) {
empty?.if_true {
return Array.new
}
separator.empty?.or { separator.length >= length }.if_true {
return Array.new(self)
}
let segments = Array.new
let buffer = ByteArray.new
let sep_bytes = separator.to_byte_array
let sep_start = sep_bytes[0]!
let mut index = 0
let max = bytesize
{ index < max }.while_true {
let byte = byte(index)
(byte == sep_start)
.and { split_at?(string: self, separator: sep_bytes, start: index) }
.if(
true: {
segments.push(buffer.drain_to_string)
index += sep_bytes.length
},
false: {
buffer.push(byte)
index += 1
}
)
}
buffer.empty?.if_false {
segments.push(buffer.drain_to_string)
}
segments
}
}
......@@ -28,11 +28,20 @@ test.group('std::string::String.bytesize') do (g) {
}
test.group('std::string::String.slice') do (g) {
g.test('Slicing a String') {
assert.equal('hello_world'.slice(0, 5), 'hello')
assert.equal('hello_world'.slice(0, 20), 'hello_world')
assert.equal('hello_world'.slice(-5, 5), 'world')
assert.equal('hello_world'.slice(-1, 5), 'd')
g.test('Slicing a String from the start of the String') {
assert.equal('hello_world'.slice(start: 0, length: 5), 'hello')
assert.equal('hello_world'.slice(start: 0, length: 20), 'hello_world')
}
g.test('Slicing a String from the end of the String') {
assert.equal('hello_world'.slice(start: -5, length: 5), 'world')
assert.equal('hello_world'.slice(start: -1, length: 5), 'd')
}
g.test('Slicing a String with a negative length') {
assert.panic {
'hello'.slice(start: 0, length: -5)
}
}
}
......@@ -148,3 +157,95 @@ test.group('std::string::String.to_byte_array') do (g) {
assert.equal('inko'.to_byte_array, ByteArray.new(105, 110, 107, 111))
}
}
test.group('std::string::String.slice_bytes') do (g) {
g.test('Slicing a String into bytes of the String') {
assert.equal(
'hello_world'.slice_bytes(start: 0, length: 5),
'hello'.to_byte_array
)
assert.equal(
'hello_world'.slice_bytes(start: 0, length: 20),
'hello_world'.to_byte_array
)
}
g.test('Slicing a String into bytes from the end of the String') {
assert.equal(
'hello_world'.slice_bytes(start: -5, length: 5),
'world'.to_byte_array
)
assert.equal(
'hello_world'.slice_bytes(start: -1, length: 5),
'd'.to_byte_array
)
}
g.test('Slicing a String into bytes with a negative length') {
assert.panic {
'hello'.slice_bytes(start: 0, length: -5)
}
}
g.test('Slicing a Unicode String into bytes') {
assert.equal(
'→foo'.slice_bytes(start: 0, length: 2),
ByteArray.new(226, 134)
)
assert.equal('→foo'.slice_bytes(start: 0, length: 6), '→foo'.to_byte_array)
assert.equal('→foo'.slice_bytes(start: 0, length: 3), '→'.to_byte_array)
}
}
test.group('std::string::String.split') do (g) {
g.test('Splitting a String using a single character separator') {
assert.equal('foo/bar/baz'.split('/'), Array.new('foo', 'bar', 'baz'))
}
g.test('Splitting a String using multiple characters as the separator') {
assert.equal('foo::bar::baz'.split('::'), Array.new('foo', 'bar', 'baz'))
}
g.test('Splitting a String that does not contain the separator') {
assert.equal('foo'.split('/'), Array.new('foo'))
}
g.test('Splitting a String using a multibyte separator') {
assert.equal('foo→bar→baz'.split('→'), Array.new('foo', 'bar', 'baz'))
}
g.test('Splitting a String containing multibyte characters') {
assert.equal('foo→bar→baz'.split('bar'), Array.new('foo→', '→baz'))
}
g.test('Splitting a String containing multiple separators in a row') {
assert.equal('foo//bar/baz'.split('/'), Array.new('foo', '', 'bar', 'baz'))
}
g.test('Splitting a String with an empty String as the separator') {
assert.equal('foo/bar/baz'.split(''), Array.new('foo/bar/baz'))
}
g.test('Splitting a String with a separator longer than the String') {
assert.equal('foo/bar'.split('///////////////'), Array.new('foo/bar'))
}
g.test('Splitting a String that ends with the separator') {
assert.equal('foo/'.split('/'), Array.new('foo'))
}
g.test('Splitting a String that ends with the separator multiple times') {
assert.equal('foo//'.split('/'), Array.new('foo', ''))
assert.equal('foo///'.split('/'), Array.new('foo', '', ''))
assert.equal('foo//'.split('//'), Array.new('foo'))
assert.equal('foo///'.split('//'), Array.new('foo', '/'))
}
g.test('Splitting an empty String') {
assert.equal(''.split('/'), Array.new)
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment