I need a word-wrap function that takes an input string and returns a similar string with some of the spaces replaced by newlines such that no line in the returned string exceeds a threshold length. My current version is
function word_wrap(str::AbstractString, maxlen = 92)
outbuf = IOBuffer()
tokens = split(str)
firsttoken = 1
linelen = 0
for (i, t) in enumerate(tokens)
toksz = length(t)
if (linelen + toksz) > maxlen
join(outbuf, view(tokens, firsttoken:(i-1)), ' ')
println(outbuf)
firsttoken = i
linelen = 0
end
linelen += toksz + 1
end
join(outbuf, view(tokens, firsttoken:length(tokens)), ' ')
String(take!(outbuf))
end
There is a Markdown.wrapped_lines and similar, but that could change any point since it is not part of a documented API (which is a pity, there is so much useful stuff there).
Thanks @Tamas_Papp. That was what I was hoping to find.
Of course, now that I have started thinking about it I realize that I just need to work with the cumulative sums of the lengths of the tokens from my function so this is where I am now.
"""
intervals(lengths, width)
Return a partition of 1:length(lengths) such that each interval in the partition
is as large as it can be without its cumulative length exceeding `width`.
The tokens whose lengths are passed will be `join`ed with a space, which is why
the cumulative lengths consist of the length of the token plus the separator.
"""
function intervals(lengths, width)
value = UnitRange{Int}[]
start = 1
offset = 0
cumlengths = cumsum(lengths .+ 1)
while (endpoint = searchsortedlast(cumlengths, offset + width)) != 0
push!(value, start:endpoint)
(start = endpoint + 1) <= length(cumlengths) || break
offset = cumlengths[endpoint] - 1
end
value
end
function word_wrap(str::AbstractString, maxlen = 92)
tokens = split(str)
join([join(view(tokens, i), ' ') for i in
intervals(length.(tokens), maxlen+1)], '\n')
end
This is a classic job for reducefoldl (thanks @tkf!):
function wrap_foldl(str::AbstractString, maxlen = 92)
foldl(((k,s),w) -> (k+=m=length(w*=" ")) > maxlen+1 ?
(m,s*"\n"*w) : (k,s*w), split(str); init=(0,""))[2]
end
Alternatively just use a regex:
function wrap_regex(str::AbstractString, maxlen = 92)
replace(str, Regex(".{1,$maxlen}( |\$)") => @s_str "\\0\n")
end
Testing it:
julia> s = join("x".^rand(1:5,20), " ")
"xxxx xxxx x xxxx x xx x x xx x xxx xxx x xxxx xxx xxx xxxxx xxxxx x xxx "
julia> println(word_wrap(s, 20)) # from original post
xxxx xxxx x xxxx x
xx x x xx x xxx xxx
x xxxx xxx xxx xxxxx
xxxxx x xxx
julia> println(wrap_foldl(s, 20))
xxxx xxxx x xxxx x
xx x x xx x xxx xxx
x xxxx xxx xxx xxxxx
xxxxx x xxx
julia> println(wrap_regex(s, 20))
xxxx xxxx x xxxx x
xx x x xx x xxx xxx
x xxxx xxx xxx xxxxx
xxxxx x xxx
julia> s = join("x".^rand(1:5,20), " ")
"xxxx xxxx x xxxx x xx x x xx x xxx xxx x xxxx xxx xxx xxxxx xxxxx x xxx "
julia> word_wrap!(s, 20)
julia> println(s)
xxxx xxxx x xxxx x
xx x x xx x xxx xxx
x xxxx xxx xxx xxxxx
xxxxx x xxx
The solution is clever, but I think that unsafe_store and especially pointer may be problematic, as an AbstractString could have an implementation totally different from a String and not support either.
Also, squeezing everything into a single line is not necessary, standard line breaking and indentation would make your code more readable.