I need to escape a string that has characters defined in the apache Lucene implementation
Ex. 1 (the “+” asserts that the identifier intNum
is included somewhere in the doc)
+intNum%20NOT%20natNum # all non-positive integers
MWE, the “+” is getting url-escaped and I don’t want this
str = HTTP.escapeuri("+intNum NOT natNum")
@test str == "+intNum%20NOT%20natNum"
false
println(str)
"%2B%intNum%20NOT%20natNum"
attempting to manually escape the escaping didn’t work:
query = HTTP.escapeuri("\\+intNum NOT natNum")
"%5C%intNum%20NOT%20natNum"
query = HTTP.escapeuri("\+intNum NOT natNum")
ERROR: syntax: invalid escape sequence
Is there some other package I should be using for this?
This module overloads HTTP.issafe()
I just copied everything from line 300-321 and added a line to make ‘+’ not get escaped):
using Pkg;Pkg.activate("./")
module LuceneSearch
import HTTP.IOExtras
export issafe,escapeuri
@inline issafe(c::Char) = c == '-' ||
c == '.' ||
c == '_' ||
c == '+' || # added '+' to the list of characters to ignore
(isascii(c) && (isletter(c) || isnumeric(c)))
utf8_chars(str::AbstractString) = (Char(c) for c in IOExtras.bytes(str))
"percent-encode a string, dict, or pair for a uri"
function escapeuri end
escapeuri(c::Char) = string('%', uppercase(string(Int(c), base=16, pad=2)))
escapeuri(str::AbstractString, safe::Function=issafe) =
join(safe(c) ? c : escapeuri(c) for c in utf8_chars(str))
escapeuri(bytes::Vector{UInt8}) = bytes
escapeuri(v::Number) = escapeuri(string(v))
escapeuri(v::Symbol) = escapeuri(string(v))
escapeuri(key, value) = string(escapeuri(key), "=", escapeuri(value))
escapeuri(key, values::Vector) = escapeuri(key => v for v in values)
escapeuri(query) = join((escapeuri(k, v) for (k,v) in query), "&")
escapeuri(nt::NamedTuple) = escapeuri(pairs(nt))
end
Seems to work fine:
using Test,HTTP,.LuceneSearch
@testset "uri escaping" begin
@test HTTP.escapeuri("i+am sam.") == "i%2Bam%20sam."
@test LuceneSearch.escapeuri("i+am sam.") == "i+am%20sam."
end
Test Summary: | Pass Total
uri escaping | 2 2
Test.DefaultTestSet("uri escaping", Any[], 2, false)