Help speed up the function


#1

ArLog - Global array

ArLog = Array{String,1}()
push!(ArLog, "Drop/BC/1.bin")
push!(ArLog, "Drop/BC/2.bin")
....
searchHash = function(n::BigInt,C::BitArray)
		for line in ArLog
			s1 = open(line,"r+")
			A2 = Mmap.mmap(s1, BitArray, (64,10^9));
			for i in eachindex(A2)
				for k in eachindex(C)
					if(A2[1:64,i] == C[1:64,k] )
						SaveRes(arLine[2],string(i),string(n))
					end
					k == 438 && break
				end
				i == 10^9 && break
			end
			finalize(A2)
			close(s1)
			GC.gc();
		end	
	end

Now this function is very slow.


#2

const


#3

what?


#4

const ArLog


#5

Also note that the way you’re defining functions will make calling them very slow because they are also not const.


#6

Perhaps this will speed execution, but the bottleneck is clearly here.

for i in eachindex(A2)
   for k in eachindex(C)
	if(A2[1:64,i] == C[1:64,k] )
		SaveRes(arLine[2],string(i),string(n))
	end
	k == 438 && break
   end
   i == 10^9 && break
end

#7

Since ArLog is not type stable, the type of line can not be inferred along with s1, A2, i, and k. This results in a massive reductions in speed. Your bottleneck will run faster if you make that change.


#8

Two low-hanging fruits are

  1. providing the ArLog as an argument to the function,
  2. using the standard way of defining functions (instead of defining a closure and assigning it to a variable like you are doing — where did you get that idea?)

Eg

function search_hash(n::BigInt,C::BitArray, ar_log)
    ...
end

Also, you should consider using standard naming conventions, and read the performance tips.


#9

Here is a more detailed code.

@everywhere struct Point{T}
    x::T
    y::T
end
function bigfunc(ArPub,ArLog)

	searchHash = function(n::BigInt,C::BitArray)
		for line in ArLog
			s1 = open(line,"r+")
			A2 = Mmap.mmap(s1, BitArray, (64,10^9));
			for i in eachindex(A2)
				for k in eachindex(C)
					if(A2[1:64,i] == C[1:64,k] )
						SaveRes(arLine[2],string(i),string(n))
					end
					k == length(ArPub) && break
				end
				i == 10^9 && break
			end
			finalize(A2)
			close(s1)
			GC.gc();
		end	
	end
	GenC = function(n::BigInt)
		C = falses(64,length(ArPub))
		...
		searchHash(n,C)	
		...
	end
	GenC(big(10)^20)
end	

ArPub = Point{BigInt}[]
open("Points.txt") do f
     for line in eachline(f)
		if line[1:2] == "04"
			point = Point{BigInt}(parse(BigInt, line[3:66], base=16), parse(BigInt, line[67:end], base=16))
			push!(ArPub, point)
		end
    end	
end

ArLog = Array{String,1}()
open("Drop/BC/log.txt") do f
     for line in eachline(f)
		arLine = split(line," ")
		push!(ArLog, string(arLine[2]))
    end
end
bigfunc(ArPub,ArLog)

#10

You haven’t made any of the changes that was suggested?

This is useful reading https://docs.julialang.org/en/v1/manual/performance-tips/index.html, many of the problems with the code is described there.


#11

I made all the changes proposed above. This gave a slight increase in performance.
The bottleneck still the is here.

for i in eachindex(A2)
	chone = A2[1:64,i]
	for k in eachindex(C)
		if chone == C[1:64,k]
			SaveRes(arLine[2],string(i),string(n))
		end
		k == length(ArPub) && break
	end
	i == 10^9 && break
end

I removed from the second cycle “A2 [1: 64, i]” into the variable chone, this gave a noticeable performance boost.
But still the function works very slowly.


#12

I don’t mean to be rude, but if you’re expecting us to help you speed up your code, you could at least provide a Minimum Working Example that we can use to benchmark on our local machines. Otherwise, all we can do is point out anything obvious that we see in the code you’ve posted, which won’t likely won’t get you nearly the results you desire.


#13

What you have posted is not a function.

Please provide a self-contained MWE.