I made a script to detect piracy, and I’d like feedback. Eventually I hope to put the functionality in Aqua.jl. There is already an old thread with a similar script here: Pirate Hunter. However, the script to longer works for me, as it both have quite many false positives, and it also fails to detect some pretty blatant cases of piracy, so I rewrote it. Thanks to @oxinabox for the original implementation
How it works
Load a package MyPkg
, then do hunt(MyPkg)
- First, it traverses all objects reachable from the given module which are of type
Union{Function, Type}
, and callsmethods
on it, filtering away all methods originating inCore
orBase
, as these are assumed to not be piracy - It filters to keep methods that 1) originate from
MyPkg
, and 2) are pirates - Pirate methods is defined as a method defined in package
X
where neither the function, nor any of its arguments are from packageX
. The definition can get a little tricky with parametric types, varargs and unions, but I think my definition makes sense.
Current problems
-
Its suuuper slow, since it needs to traverse and examinate every reachable object. It takes about 2 minutes. Not sure how to improve it, but feedback is very welcomeEdit: Thanks to comment from @kristoffer.carlsson, it’s now reasonably fast! - I’m not 100% sure my definition of piracy is correct for edge cases. I tried on a few of my own packages, but I’d like feedback
const Callable = Union{Function, Type}
const DEFAULT_PKGS = (Base.PkgId(Base), Base.PkgId(Core))
function all_methods(
mod::Module,
done_modules::Base.IdSet{Module}, # cached to prevent inf loops
done_callables::Base.IdSet{Callable}, # cached to prevent inf loops
result::Vector{Method},
filter_default::Bool
)::Vector{Method}
push!(done_modules, mod)
for name in names(mod; all=true, imported=true)
# names can list undefined symbols which cannot be eval'd
isdefined(mod, name) || continue
# Skip closures
first(String(name)) == '#' && continue
val = Core.eval(mod, name)
if val isa Module
if !in(val, done_modules)
all_methods(val, done_modules, done_callables, result, filter_default)
end
elseif val isa Callable
if !in(val, done_callables)
for method in methods(val)
# Default filtering removes all methods defined in DEFAULT_PKGs,
# since these may pirate each other.
if !(filter_default && in(Base.PkgId(method.module), DEFAULT_PKGS))
push!(result, method)
end
end
end
push!(done_callables, val)
end
end
result
end
function all_methods(mod::Module; filter_default::Bool=true)
all_methods(mod, Base.IdSet{Module}(), Base.IdSet{Callable}(), Method[], filter_default)
end
##################################
# Generic fallback
is_foreign(@nospecialize(x), pkg::Base.PkgId) = is_foreign(typeof(x), pkg)
is_foreign(mod::Module, pkg::Base.PkgId) = Base.PkgId(mod) != pkg
function is_foreign(@nospecialize(T::DataType), pkg::Base.PkgId)
params = T.parameters
# For Type{Foo}, we consider it to originate from the same as Foo
if Base.typename(T).wrapper === Type
return is_foreign(only(params), pkg)
else
# Both the type itself and all of its parameters must be foreign
return is_foreign(T.name.module, pkg) && all(params) do param
is_foreign(param, pkg)
end
end
end
function is_foreign(@nospecialize(U::UnionAll), pkg::Base.PkgId)
# We do not consider extending Set{T} to be piracy, if T is not foreign.
# Extending it goes against Julia style, but it's not piracy IIUC.
is_foreign(U.body, pkg) && is_foreign(U.var, pkg)
end
is_foreign(@nospecialize(T::TypeVar), pkg::Base.PkgId) = is_foreign(T.ub, pkg)
is_foreign(@nospecialize(T::Core.TypeofVararg), pkg::Base.PkgId) = is_foreign(T.T, pkg)
function is_foreign(@nospecialize(U::Union), pkg::Base.PkgId)
# Even if Foo is local, overloading f(::Union{Foo, Int}) with foreign f
# is piracy.
any(Base.uniontypes(U)) do T
is_foreign(T, pkg)
end
end
function is_pirate(meth::Method)
method_pkg = Base.PkgId(meth.module)
signature = meth.sig
while signature isa UnionAll
signature = signature.body
end
all(signature.parameters) do parameter
is_foreign(parameter, method_pkg)
end
end
#######################################
hunt(;from::Module=Main) = filter(is_pirate, all_methods(from))
hunt(mod::Module; from::Module=Main) = hunt(Base.PkgId(mod); from=from)
hunt(pkg::Base.PkgId; from::Module=Main) = filter(all_methods(from)) do method
is_pirate(method) &&
Base.PkgId(method.module) === pkg
end