diff --git a/ext/ParsersExt.jl b/ext/ParsersExt.jl index 99be500..99885f0 100644 --- a/ext/ParsersExt.jl +++ b/ext/ParsersExt.jl @@ -1,6 +1,6 @@ module ParsersExt using Parsers -using InlineStrings: InlineString, InlineString1, addcodeunit +using InlineStrings: InlineString, addcodeunit Parsers.xparse(::Type{T}, buf::AbstractString, pos, len, options, ::Type{S}=T) where {T <: InlineString, S} = Parsers.xparse(T, codeunits(buf), pos, len, options, S) @@ -14,16 +14,7 @@ function Parsers.xparse(::Type{T}, source::Union{AbstractVector{UInt8}, IO}, pos x = T() else poslen = res.val - if T === InlineString1 - if poslen.len != 1 - overflowed = true - x = T() - else - Parsers.fastseek!(source, poslen.pos) - x = InlineString1(Parsers.peekbyte(source, poslen.pos)) - Parsers.fastseek!(source, pos + res.tlen - 1) - end - elseif Parsers.escapedstring(code) || !(source isa AbstractVector{UInt8}) + if Parsers.escapedstring(code) || !(source isa AbstractVector{UInt8}) if poslen.len > (sizeof(T) - 1) overflowed = true x = T() diff --git a/src/InlineStrings.jl b/src/InlineStrings.jl index 7fda5e1..b228ea1 100644 --- a/src/InlineStrings.jl +++ b/src/InlineStrings.jl @@ -19,10 +19,10 @@ See more details by looking at individual docs for [`String1`](@ref), """ abstract type InlineString <: AbstractString end -for sz in (1, 4, 8, 16, 32, 64, 128, 256) - nm = Symbol(:String, max(1, sz - 1)) - nma = Symbol(:InlineString, max(1, sz - 1)) - macro_nm = Symbol(:inline, max(1, sz - 1), :_str) +for sz in (2, 4, 8, 16, 32, 64, 128, 256) + nm = Symbol(:String, sz-1) + nma = Symbol(:InlineString, sz-1) + macro_nm = Symbol(:inline, sz-1, :_str) at_macro_nm = Symbol("@", macro_nm) @eval begin """ @@ -30,9 +30,9 @@ for sz in (1, 4, 8, 16, 32, 64, 128, 256) $($nm)(bytes::AbstractVector{UInt8}, pos, len) $($nm)(ptr::Ptr{UInt8}, [len]) - Custom fixed-size string with a fixed size of $($sz) bytes. - 1 byte is used to store the length of the string. If an - inline string is shorter than $($(max(1, sz - 1))) bytes, the entire + Custom C compatible fixed-size string with a fixed size of $($sz) bytes. + 1 byte is used to store the capacity minus the length of the string. If an + inline string is shorter than $($(sz-1)) bytes, the entire string still occupies the full $($sz) bytes since they are, by definition, fixed size. Otherwise, they can be treated just like normal `String` values. Note that `sizeof(x)` will @@ -58,7 +58,7 @@ for sz in (1, 4, 8, 16, 32, 64, 128, 256) export $nma """ - inline$($(max(1, sz - 1)))"string" + inline$($(sz-1))"string" Macro to create a [`$($nm)`](@ref) with a fixed size of $($sz) bytes. """ @@ -75,7 +75,7 @@ const SmallInlineStrings = Union{String1, String3, String7, String15} # used to zero out n lower bytes of an inline string clear_n_bytes(s, n) = Base.shl_int(Base.lshr_int(s, 8 * n), 8 * n) -_bswap(x::T) where {T <: InlineString} = T === InlineString1 ? x : Base.bswap_int(x) +_bswap(x::T) where {T <: InlineString} = Base.bswap_int(x) const InlineStringTypes = Union{InlineString1, InlineString3, @@ -115,26 +115,17 @@ Base.widen(::Type{InlineString63}) = InlineString127 Base.widen(::Type{InlineString127}) = InlineString255 Base.widen(::Type{InlineString255}) = String -Base.ncodeunits(::InlineString1) = 1 Base.ncodeunits(x::InlineString) = Int(Base.trunc_int(UInt8, x)) Base.codeunit(::InlineString) = UInt8 Base.@propagate_inbounds function Base.codeunit(x::T, i::Int) where {T <: InlineString} @boundscheck checkbounds(Bool, x, i) || throw(BoundsError(x, i)) - if T === InlineString1 - return Base.bitcast(UInt8, x) - else - return Base.trunc_int(UInt8, Base.lshr_int(x, 8 * (sizeof(T) - i))) - end + return Base.trunc_int(UInt8, Base.lshr_int(x, 8 * (sizeof(T) - i))) end function Base.String(x::T) where {T <: InlineString} len = ncodeunits(x) out = Base._string_n(len) - if T === InlineString1 - GC.@preserve out unsafe_store!(pointer(out), codeunit(x, 1)) - return out - end ref = Ref{T}(_bswap(x)) GC.@preserve ref out begin ptr = convert(Ptr{UInt8}, Base.unsafe_convert(Ptr{T}, ref)) @@ -149,9 +140,6 @@ function Base.Symbol(x::T) where {T <: InlineString} (Ref{T}, Int), ref, sizeof(x)) end -Base.cconvert(::Type{Ptr{UInt8}}, x::InlineString1) = Base.cconvert(Ptr{UInt8}, InlineString3(x)) -Base.cconvert(::Type{Ptr{Int8}}, x::InlineString1) = Base.cconvert(Ptr{Int8}, InlineString3(x)) -Base.cconvert(::Type{Cstring}, x::InlineString1) = Base.cconvert(Cstring, InlineString3(x)) Base.cconvert(::Type{Ptr{UInt8}}, x::T) where {T <: InlineString} = Ref{T}(_bswap(clear_n_bytes(x, 1))) Base.cconvert(::Type{Ptr{Int8}}, x::T) where {T <: InlineString} = @@ -189,9 +177,6 @@ end # add a codeunit to end of string method function addcodeunit(x::T, b::UInt8) where {T <: InlineString} - if T === InlineString1 - return Base.bitcast(InlineString1, b), false - end len = Base.trunc_int(UInt8, x) sz = Base.trunc_int(UInt8, sizeof(T)) shf = Base.zext_int(Int16, max(0x01, sz - len - 0x01)) << 3 @@ -199,37 +184,10 @@ function addcodeunit(x::T, b::UInt8) where {T <: InlineString} return Base.add_int(x, Base.zext_int(T, 0x01)), (len + 0x01) >= sz end -# from String -InlineString1(byte::UInt8=0x00) = Base.bitcast(InlineString1, byte) - -function InlineString1(x::AbstractString) - sizeof(x) == 1 || stringtoolong(InlineString1, sizeof(x)) - return Base.bitcast(InlineString1, codeunit(x, 1)) -end - -function InlineString1(buf::AbstractVector{UInt8}, pos=1, len=length(buf)) - len == 1 || stringtoolong(InlineString1, len) - return Base.bitcast(InlineString1, buf[pos]) -end - -function InlineString1(ptr::Ptr{UInt8}, len=nothing) - ptr == Ptr{UInt8}(0) && nullptr(InlineString1) - if len === nothing - y, _ = addcodeunit(InlineString1(), unsafe_load(ptr)) - unsafe_load(ptr, 2) === 0x00 || stringtoolong(InlineString1, 2) - return y - else - len == 1 || stringtoolong(InlineString1, len) - return Base.bitcast(InlineString1, unsafe_load(ptr)) - end -end +# InlineString1(byte::UInt8) = Base.bitcast(InlineString1, byte) -function InlineString1(x::S) where {S <: InlineString} - sizeof(x) == 1 || stringtoolong(InlineString1, sizeof(x)) - return Base.bitcast(InlineString1, codeunit(x, 1)) -end -for T in (:InlineString3, :InlineString7, :InlineString15, :InlineString31, :InlineString63, :InlineString127, :InlineString255) +for T in (:InlineString1, :InlineString3, :InlineString7, :InlineString15, :InlineString31, :InlineString63, :InlineString127, :InlineString255) @eval $T() = Base.zext_int($T, 0x00) @eval function $T(x::AbstractString) @@ -304,11 +262,7 @@ for T in (:InlineString3, :InlineString7, :InlineString15, :InlineString31, :Inl return Base.add_int(y, Base.zext_int($T, UInt8(len))) else # promoting smaller InlineString to larger - if S === InlineString1 - y = Base.shl_int(Base.zext_int($T, x), 8 * (sizeof($T) - sizeof(S))) - else - y = Base.shl_int(Base.zext_int($T, Base.lshr_int(x, 8)), 8 * (sizeof($T) - sizeof(S) + 1)) - end + y = Base.shl_int(Base.zext_int($T, Base.lshr_int(x, 8)), 8 * (sizeof($T) - sizeof(S) + 1)) return Base.add_int(y, Base.zext_int($T, UInt8(sizeof(x)))) end end @@ -320,7 +274,7 @@ end function InlineStringType(n::Integer) n > 255 && stringtoolong(InlineString, n) - return n == 1 ? InlineString1 : n < 4 ? InlineString3 : + return n < 2 ? InlineString1 : n < 4 ? InlineString3 : n < 8 ? InlineString7 : n < 16 ? InlineString15 : n < 32 ? InlineString31 : n < 64 ? InlineString63 : n < 128 ? InlineString127 : InlineString255 @@ -381,7 +335,6 @@ function Base.read(s::IO, ::Type{T}) where {T <: InlineString} end function Base.print(io::IO, x::T) where {T <: InlineString} - x isa InlineString1 && return print(io, Char(Base.bitcast(UInt8, x))) ref = Ref{T}(_bswap(x)) return GC.@preserve ref begin ptr = convert(Ptr{UInt8}, Base.unsafe_convert(Ptr{T}, ref)) @@ -391,9 +344,6 @@ function Base.print(io::IO, x::T) where {T <: InlineString} end function Base.isascii(x::T) where {T <: InlineString} - if T === InlineString1 - return codeunit(x, 1) < 0x80 - end len = ncodeunits(x) x = Base.lshr_int(x, 8 * (sizeof(T) - len)) for _ = 1:(len >> 2) @@ -410,7 +360,6 @@ end # "mutating" operations; care must be taken here to "clear out" # unused bits to ensure our == definition continues to work # which compares the full bit contents of inline strings -Base.chop(s::InlineString1; kw...) = chop(String3(s); kw...) function Base.chop(s::InlineString; head::Integer = 0, tail::Integer = 1) if isempty(s) return s @@ -431,7 +380,6 @@ end Base.getindex(s::InlineString, r::AbstractUnitRange{<:Integer}) = getindex(s, Int(first(r)):Int(last(r))) -Base.getindex(s::InlineString1, r::UnitRange{Int}) = getindex(InlineString3(s), r) Base.@propagate_inbounds function Base.getindex(s::InlineString, r::UnitRange{Int}) isempty(r) && return typeof(s)("") i = first(r) @@ -448,7 +396,6 @@ Base.view(s::InlineString, r::AbstractUnitRange{<:Integer}) = getindex(s, r) if isdefined(Base, :chopprefix) -Base.chopprefix(s::InlineString1, prefix::AbstractString) = chopprefix(String3(s), prefix) function Base.chopprefix(s::InlineString, prefix::AbstractString) if !isempty(prefix) && startswith(s, prefix) return _chopprefix(s, prefix) @@ -456,7 +403,6 @@ function Base.chopprefix(s::InlineString, prefix::AbstractString) return s end -Base.chopprefix(s::InlineString1, prefix::Regex) = chopprefix(String3(s), prefix) function Base.chopprefix(s::InlineString, prefix::Regex) m = match(prefix, String(s), firstindex(s), Base.PCRE.ANCHORED) m === nothing && return s @@ -483,7 +429,6 @@ end throw_strip_argument_error() = throw(ArgumentError("Both arguments are strings. The second argument should be a `Char` or collection of `Char`s")) -Base.lstrip(f, s::InlineString1) = lstrip(f, InlineString3(s)) function Base.lstrip(f, s::InlineString) nc = 0 len = 0 @@ -499,11 +444,9 @@ function Base.lstrip(f, s::InlineString) end Base.lstrip(::AbstractString, ::InlineString) = throw_strip_argument_error() -Base.lstrip(::AbstractString, ::InlineString1) = throw_strip_argument_error() if isdefined(Base, :chopsuffix) -Base.chopsuffix(s::InlineString1, suffix::AbstractString) = chopsuffix(String3(s), suffix) function Base.chopsuffix(s::InlineString, suffix::AbstractString) if !isempty(suffix) && endswith(s, suffix) return _chopsuffix(s, suffix) @@ -511,7 +454,6 @@ function Base.chopsuffix(s::InlineString, suffix::AbstractString) return s end -Base.chopsuffix(s::InlineString1, suffix::Regex) = chopsuffix(String3(s), suffix) function Base.chopsuffix(s::InlineString, suffix::Regex) m = match(suffix, String(s), firstindex(s), Base.PCRE.ENDANCHORED) m === nothing && return s @@ -529,7 +471,6 @@ _chopsuffix(s::InlineString, suffix::AbstractString) = _chopsuffix(s, ncodeunits return Base.or_int(s, _oftype(typeof(s), new_n)) end -Base.rstrip(f, s::InlineString1) = rstrip(f, InlineString3(s)) function Base.rstrip(f, s::InlineString) nc = 0 for c in Iterators.reverse(s) @@ -543,9 +484,7 @@ function Base.rstrip(f, s::InlineString) end Base.rstrip(::AbstractString, ::InlineString) = throw_strip_argument_error() -Base.rstrip(::AbstractString, ::InlineString1) = throw_strip_argument_error() -Base.chomp(s::InlineString1) = chomp(String3(s)) function Base.chomp(s::InlineString) i = lastindex(s) len = ncodeunits(s) @@ -558,14 +497,12 @@ function Base.chomp(s::InlineString) end end -Base.first(s::InlineString1, n::Integer) = first(String3(s), n) function Base.first(s::T, n::Integer) where {T <: InlineString} newlen = nextind(s, min(lastindex(s), nextind(s, 0, n))) - 1 i = sizeof(T) - newlen return Base.or_int(clear_n_bytes(s, i), _oftype(typeof(s), newlen)) end -Base.last(s::InlineString1, n::Integer) = last(String3(s), n) function Base.last(s::T, n::Integer) where {T <: InlineString} nc = ncodeunits(s) + 1 i = max(1, prevind(s, nc, n)) @@ -643,13 +580,11 @@ Base.string(a::_SmallestInlineStrings, b::_SmallestInlineStrings, c::_SmallestIn # Only benefit from keeping the small-ish strings as InlineStrings function _string(a::Ta, b::Tb) where {Ta <: SmallInlineStrings, Tb <: SmallInlineStrings} T = summed_type(Ta, Tb) - lb_a = Int(!isa(a, InlineString1)) # no "length byte" to remove if InlineString1 - lb_b = Int(!isa(b, InlineString1)) len_a = sizeof(a) len_b = sizeof(b) # Remove length byte (lshr), grow to new size (zext), move chars forward (shl). - a2 = Base.shl_int(Base.zext_int(T, Base.lshr_int(a, 8*lb_a)), 8 * (sizeof(T) - sizeof(Ta) + lb_a)) - b2 = Base.shl_int(Base.zext_int(T, Base.lshr_int(b, 8*lb_b)), 8 * (sizeof(T) - sizeof(Tb) + lb_b - len_a)) + a2 = Base.shl_int(Base.zext_int(T, Base.lshr_int(a, 8)), 8 * (sizeof(T) - sizeof(Ta) + 1)) + b2 = Base.shl_int(Base.zext_int(T, Base.lshr_int(b, 8*1)), 8 * (sizeof(T) - sizeof(Tb) + 1 - len_a)) lb = _oftype(T, len_a + len_b) # new length byte return Base.or_int(Base.or_int(a2, b2), lb) end @@ -879,7 +814,7 @@ using Base.Sort, Base.Order # And under certain thresholds, MergeSort is faster than RadixSort, even for small InlineStrings const MergeSortThresholds = Dict( - 1 => 2^5, + 2 => 2^5, 4 => 2^7, 8 => 2^9, 16 => 2^23 @@ -925,7 +860,7 @@ function Base.sort!(vs::AbstractVector, lo::Int, hi::Int, ::InlineStringSortAlg, # setup ts = similar(vs) - rdx = Radix(sizeof(T) == 1 ? 8 : 11) + rdx = Radix(11) radix_size = rdx.size radix_mask = rdx.mask radix_size_pow = rdx.pow diff --git a/test/runtests.jl b/test/runtests.jl index 5c11f97..71940c7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -136,20 +136,20 @@ if isdefined(Base, :chopprefix) @test chopprefix(abc, "a") === InlineString3("bc") @test chopprefix(abc, "bc") === abc @test chopprefix(abc, "abc") === InlineString3("") -@test chopprefix(InlineString1("a"), "a") === InlineString3("") +@test chopprefix(InlineString1("a"), "a") === InlineString1("") # Regex case @test chopprefix(InlineString15("∃∃∃b∃"), r"∃+") === InlineString15("b∃") -@test chopprefix(InlineString1("a"), r".") === InlineString3("") +@test chopprefix(InlineString1("a"), r".") === InlineString1("") end if isdefined(Base, :chopsuffix) @test chopsuffix(abc, "a") === abc @test chopsuffix(abc, "bc") === InlineString3("a") @test chopsuffix(abc, "abc") === InlineString3("") -@test chopsuffix(InlineString1("c"), "c") === InlineString3("") +@test chopsuffix(InlineString1("c"), "c") === InlineString1("") # Regex case @test chopsuffix(InlineString15("∃b∃∃∃"), r"∃+") === InlineString15("∃b") -@test chopsuffix(InlineString1("c"), r".") === InlineString3("") +@test chopsuffix(InlineString1("c"), r".") === InlineString1("") end if isdefined(Base, :chopprefix) && isdefined(Base, :chopsuffix) @@ -209,7 +209,7 @@ S = InlineString7 @test rstrip(S(" a b c ")) === S(" a b c") @test rstrip(isnumeric, S("abc0123")) === S("abc") @test rstrip(S("ello"), ['e','o']) === S("ell") -@test rstrip(InlineString1("x")) === InlineString3("x") +@test rstrip(InlineString1("x")) === InlineString1("x") @test_throws ArgumentError rstrip("test", S(" a b c ")) @test_throws ArgumentError rstrip("test", InlineString1("x")) @@ -217,11 +217,11 @@ S = InlineString7 @test lstrip(S(" a b c ")) === S("a b c ") @test lstrip(isnumeric, S("0123abc")) === S("abc") @test lstrip(S("ello"), ['e','o']) === S("llo") -@test lstrip(InlineString1("x")) === InlineString3("x") +@test lstrip(InlineString1("x")) === InlineString1("x") @test_throws ArgumentError lstrip("test", S(" a b c ")) @test_throws ArgumentError lstrip("test", InlineString1("x")) -@test strip(InlineString1("x")) === InlineString3("x") +@test strip(InlineString1("x")) === InlineString1("x") S = InlineString3 @test strip(S("")) === S("") @test strip(S(" ")) === S("") @@ -238,9 +238,9 @@ for S in SUBTYPES if S == InlineString1 x = S("x") @test x[1] == 'x' - @test x[1:1] isa InlineString3 - @test x[1:1] === view(x, 1:1) === InlineString3(x) - @test x[2:1] === view(x, 2:1) === InlineString3("") + @test x[1:1] isa InlineString1 + @test x[1:1] === view(x, 1:1) === InlineString1(x) + @test x[2:1] === view(x, 2:1) === InlineString1("") @test_throws BoundsError x[2] else abc = S("abc") @@ -437,7 +437,7 @@ for (i, case) in enumerate(testcases) end res = Parsers.xparse(InlineString1, "") -@test Parsers.overflow(res.code) +@test !Parsers.overflow(res.code) res = Parsers.xparse(InlineString1, "ab") @test Parsers.overflow(res.code) res = Parsers.xparse(InlineString1, "b") @@ -482,7 +482,7 @@ end @testset "sorting tests" begin for nelems in (50, 100, 500, 1000, 5000, 100_000) for T in (String1, String3, String7, String15, String31, String63, String127, String255) - x = [randstring(rand(1:(max(1, sizeof(T) - 1)))) for _ = 1:nelems]; + x = [randstring(rand(0:(sizeof(T) - 1))) for _ = 1:nelems]; y = map(T, x); @test sort(x) == sort(y) @test sort(x; rev=true) == sort(y; rev=true)