Skip to content

Commit

Permalink
Optimize String.starts_with/ends_with
Browse files Browse the repository at this point in the history
Instead of using String.byte_index, these methods use the underlying
logic of the optimized String.== method to more efficiently compare the
head/tail of a String.

In addition, ByteArray.== is implemented in Inko using the same logic as
String.==, removing the need for a runtime library function just to
compare two ByteArray values.

Changelog: performance
  • Loading branch information
yorickpeterse committed Jul 25, 2024
1 parent d0a29da commit 656f5db
Show file tree
Hide file tree
Showing 5 changed files with 161 additions and 65 deletions.
8 changes: 0 additions & 8 deletions rt/src/runtime/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,6 @@ pub unsafe extern "system" fn inko_byte_array_size(
(*bytes).value.len() as i64
}

#[no_mangle]
pub unsafe extern "system" fn inko_byte_array_eq(
lhs: *const ByteArray,
rhs: *const ByteArray,
) -> i64 {
((*lhs).value == (*rhs).value) as i64
}

#[no_mangle]
pub unsafe extern "system" fn inko_byte_array_clear(bytes: *mut ByteArray) {
(*bytes).value.clear();
Expand Down
11 changes: 8 additions & 3 deletions std/src/std/byte_array.inko
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import std.hash (Hash, Hasher)
import std.io (Read)
import std.iter (Stream)
import std.option (Option)
import std.ptr
import std.string (Bytes, IntoString, ToString)

fn extern inko_byte_array_new(state: Pointer[UInt8]) -> ByteArray
Expand Down Expand Up @@ -35,8 +36,6 @@ fn extern inko_byte_array_drain_to_string(

fn extern inko_byte_array_drop(bytes: mut ByteArray)

fn extern inko_byte_array_eq(left: ref ByteArray, right: ref ByteArray) -> Bool

fn extern inko_byte_array_get(bytes: ref ByteArray, index: Int) -> Int

fn extern inko_byte_array_size(bytes: ref ByteArray) -> Int
Expand Down Expand Up @@ -570,7 +569,13 @@ impl Equal[ref ByteArray] for ByteArray {
# ByteArray.from_array([10]) == ByteArray.from_array([20]) # => false
# ```
fn pub ==(other: ref ByteArray) -> Bool {
inko_byte_array_eq(self, other)
let size = self.size

if size == other.size {
ptr.equal(to_pointer, other.to_pointer, size: size)
} else {
false
}
}
}

Expand Down
60 changes: 60 additions & 0 deletions std/src/std/ptr.inko
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Methods for working with raw pointers
#
# The methods in this module are intended for use within the standard library
# only, and must be used with extreme care due to their unsafe nature.

# Returns `true` if `left` and `right` point to a sequence of bytes that is the
# same.
#
# # Safety
#
# Callers of this method _must_ ensure that `left` and `right` point to at least
# `size` bytes of data. If this isn't the case, the behaviour of this function
# is undefined.
fn equal(left: Pointer[UInt8], right: Pointer[UInt8], size: Int) -> Bool {
let mut chunks = size / 8
let mut idx = 0

# We take advantage of the fact that an Int can fit 8 bytes, and thus read 8
# bytes at once and compare the resulting Int, instead of comparing each
# individual byte.
while chunks > 0 {
let lhs = (left as Int + idx as Pointer[Int]).0
let rhs = (right as Int + idx as Pointer[Int]).0

if lhs != rhs { return false }

chunks -= 1
idx += 8
}

if size - idx >= 4 {
let lhs = (left as Int + idx as Pointer[Int32]).0 as Int
let rhs = (right as Int + idx as Pointer[Int32]).0 as Int

if lhs != rhs { return false }

idx += 4
}

if size - idx >= 2 {
let lhs = (left as Int + idx as Pointer[Int16]).0 as Int
let rhs = (right as Int + idx as Pointer[Int16]).0 as Int

if lhs != rhs { return false }

idx += 2
}

# For the remainder we just compare the individual bytes.
while idx < size {
let lhs = (left as Int + idx as Pointer[UInt8]).0 as Int
let rhs = (right as Int + idx as Pointer[UInt8]).0 as Int

if lhs != rhs { return false }

idx += 1
}

true
}
70 changes: 19 additions & 51 deletions std/src/std/string.inko
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import std.hash (Hash, Hasher)
import std.io (Read)
import std.iter (Iter, Stream)
import std.ops (Add)
import std.ptr

class extern StringResult {
let @tag: Int
Expand Down Expand Up @@ -342,9 +343,12 @@ class builtin String {
# 'hello'.starts_with?('test_') # => false
# ```
fn pub starts_with?(prefix: String) -> Bool {
match byte_index(of: prefix, starting_at: 0) {
case Some(idx) -> idx == 0
case _ -> false
let rsize = prefix.size

if size < rsize or rsize == 0 {
false
} else {
ptr.equal(@ptr, prefix.ptr, rsize)
}
}

Expand All @@ -359,7 +363,16 @@ class builtin String {
# 'hello'.ends_with?('world') # => false
# ```
fn pub ends_with?(suffix: String) -> Bool {
byte_index(of: suffix, starting_at: size - suffix.size).some?
let lsize = size
let rsize = suffix.size

if lsize < rsize or rsize == 0 {
false
} else {
let lhs = (@ptr as Int + lsize - rsize) as Pointer[UInt8]

ptr.equal(lhs, suffix.ptr, rsize)
}
}

# Splits `self` into an iterator of `Strings`, each separated by the given
Expand Down Expand Up @@ -797,54 +810,9 @@ impl Equal[ref String] for String {
# 'foo' == 'bar' # => false
# ```
fn pub ==(other: ref String) -> Bool {
let size = self.size

if size != other.size { return false }

let mut chunks = size / 8
let mut idx = 0
let lptr = @ptr
let rptr = other.ptr

# Strings are compared at the byte level. We take advantage of the fact that
# an Int can fit 8 bytes, and thus read 8 bytes at once and compare the
# resulting Int, instead of comparing each individual byte.
while chunks > 0 {
let lhs = (lptr as Int + idx as Pointer[Int]).0
let rhs = (rptr as Int + idx as Pointer[Int]).0

if lhs != rhs { return false }

chunks -= 1
idx += 8
}

if size - idx >= 4 {
let lhs = (lptr as Int + idx as Pointer[Int32]).0 as Int
let rhs = (rptr as Int + idx as Pointer[Int32]).0 as Int

if lhs != rhs { return false }

idx += 4
}

if size - idx >= 2 {
let lhs = (lptr as Int + idx as Pointer[Int16]).0 as Int
let rhs = (rptr as Int + idx as Pointer[Int16]).0 as Int

if lhs != rhs { return false }

idx += 2
}

# For the remainder we just compare the individual bytes.
while idx < size {
if byte_unchecked(idx) != other.byte_unchecked(idx) { return false }

idx += 1
}
let lsize = self.size

true
if lsize == other.size { ptr.equal(@ptr, other.ptr, lsize) } else { false }
}
}

Expand Down
77 changes: 74 additions & 3 deletions std/test/std/test_byte_array.inko
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,80 @@ fn pub tests(t: mut Tests) {
})

t.test('ByteArray.==', fn (t) {
t.equal(ByteArray.new, ByteArray.new)
t.equal(ByteArray.from_array([10]), ByteArray.from_array([10]))
t.not_equal(ByteArray.from_array([10]), ByteArray.new)
t.equal('a'.to_byte_array, 'a'.to_byte_array)
t.equal('ab'.to_byte_array, 'ab'.to_byte_array)
t.equal('abc'.to_byte_array, 'abc'.to_byte_array)
t.equal('abcd'.to_byte_array, 'abcd'.to_byte_array)
t.equal('abcde'.to_byte_array, 'abcde'.to_byte_array)
t.equal('abcdef'.to_byte_array, 'abcdef'.to_byte_array)
t.equal('abcdefg'.to_byte_array, 'abcdefg'.to_byte_array)
t.equal('abcdefgh'.to_byte_array, 'abcdefgh'.to_byte_array)
t.equal('abcdefghi'.to_byte_array, 'abcdefghi'.to_byte_array)
t.equal('abcdefghij'.to_byte_array, 'abcdefghij'.to_byte_array)
t.equal('abcdefghijk'.to_byte_array, 'abcdefghijk'.to_byte_array)
t.equal('abcdefghijkl'.to_byte_array, 'abcdefghijkl'.to_byte_array)
t.equal('abcdefghijklm'.to_byte_array, 'abcdefghijklm'.to_byte_array)
t.equal('abcdefghijklmn'.to_byte_array, 'abcdefghijklmn'.to_byte_array)
t.equal('abcdefghijklmno'.to_byte_array, 'abcdefghijklmno'.to_byte_array)
t.equal('abcdefghijklmnop'.to_byte_array, 'abcdefghijklmnop'.to_byte_array)
t.equal(
'abcdefghijklmnopq'.to_byte_array,
'abcdefghijklmnopq'.to_byte_array,
)
t.equal(
'abcdefghijklmnopqr'.to_byte_array,
'abcdefghijklmnopqr'.to_byte_array,
)
t.equal(
'abcdefghijklmnopqrs'.to_byte_array,
'abcdefghijklmnopqrs'.to_byte_array,
)
t.equal('Ä'.to_byte_array, 'Ä'.to_byte_array)
t.equal('AÄ'.to_byte_array, 'AÄ'.to_byte_array)
t.equal('쿠키'.to_byte_array, '쿠키'.to_byte_array)

t.not_equal('foo'.to_byte_array, 'bar'.to_byte_array)
t.not_equal('Ä'.to_byte_array, '쿠'.to_byte_array)
t.not_equal('AÄ'.to_byte_array, 'A쿠'.to_byte_array)
t.not_equal('쿠Ä'.to_byte_array, '쿠키'.to_byte_array)
t.not_equal('a'.to_byte_array, 'A'.to_byte_array)
t.not_equal('ab'.to_byte_array, 'AB'.to_byte_array)
t.not_equal('abc'.to_byte_array, 'ABC'.to_byte_array)
t.not_equal('abcd'.to_byte_array, 'ABCD'.to_byte_array)
t.not_equal('abcde'.to_byte_array, 'ABCDE'.to_byte_array)
t.not_equal('abcdef'.to_byte_array, 'ABCDEF'.to_byte_array)
t.not_equal('abcdefg'.to_byte_array, 'ABCDEFG'.to_byte_array)
t.not_equal('abcdefgh'.to_byte_array, 'ABCDEFGH'.to_byte_array)
t.not_equal('abcdefghi'.to_byte_array, 'ABCDEFGHI'.to_byte_array)
t.not_equal('abcdefghij'.to_byte_array, 'ABCDEFGHIJ'.to_byte_array)
t.not_equal('abcdefghijk'.to_byte_array, 'ABCDEFGHIJK'.to_byte_array)
t.not_equal('abcdefghijkl'.to_byte_array, 'ABCDEFGHIJKL'.to_byte_array)
t.not_equal('abcdefghijklm'.to_byte_array, 'ABCDEFGHIJKLM'.to_byte_array)
t.not_equal('abcdefghijklmn'.to_byte_array, 'ABCDEFGHIJKLMN'.to_byte_array)
t.not_equal(
'abcdefghijklmno'.to_byte_array,
'ABCDEFGHIJKLMNO'.to_byte_array,
)
t.not_equal(
'abcdefghijklmnop'.to_byte_array,
'ABCDEFGHIJKLMNOP'.to_byte_array,
)
t.not_equal(
'abcdefghijklmnopq'.to_byte_array,
'ABCDEFGHIJKLMNOPQ'.to_byte_array,
)
t.not_equal(
'abcdefghijklmnopqr'.to_byte_array,
'ABCDEFGHIJKLMNOPQR'.to_byte_array,
)
t.not_equal(
'abcdefghijklmnopqrs'.to_byte_array,
'ABCDEFGHIJKLMNOPQRS'.to_byte_array,
)
t.not_equal('aaaaaaaaa'.to_byte_array, 'aaaaaaaab'.to_byte_array)
t.not_equal('aaaaaaaaaaaa'.to_byte_array, 'aaaaaaaabbbb'.to_byte_array)
t.not_equal('aaaaaaaaaa'.to_byte_array, 'aaaaaaaabb'.to_byte_array)
t.not_equal('aaaaaaaaa'.to_byte_array, 'aaaaaaaab'.to_byte_array)
})

t.test('ByteArray.clone', fn (t) {
Expand Down

0 comments on commit 656f5db

Please sign in to comment.