I’m excited to announce OhMyArtifacts.jl, a dynamic artifact system that lives in scratchspace. It has API similar to Artifacts.jl, but it use sha256 hash and cache every files. The usage of each cache is tracked, so it can also remove unused cache automatically.
Here is the iris example with OhMyArtifacts:
julia> using OhMyArtifacts
[ Info: Precompiling OhMyArtifacts [cf8be1f4-309d-442e-839d-29d2a0af6cb7]
# Register and get the Artifacts.toml
julia> myartifacts_toml = @my_artifacts_toml!();
# Query the Artifacts.toml for the hash bound to "iris"
julia> iris_hash = my_artifact_hash("iris", myartifacts_toml)
# If not bound
julia> if isnothing(iris_hash)
iris_hash = create_my_artifact() do working_dir
iris_url_base = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris"
download("$iris_url_base/iris.data", joinpath(working_dir, "iris.csv"))
download("$iris_url_base/bezdekIris.data", joinpath(working_dir, "bezdekIris.csv"))
download("$iris_url_base/iris.names", joinpath(working_dir, "iris.names"))
# explicitly return the path
return working_dir
end
bind_my_artifact!(myartifacts_toml, "iris", iris_hash)
end
julia> iris_hash
SHA256("83c1aca5f0e9d222dee51861b3def4e789e57b17b035099570c54b51182853d4")
julia> my_artifact_exists(iris_hash)
true
# Get the artifact path
julia> iris_dataset_path = my_artifact_path(iris_hash);
julia> readdir(iris_dataset_path)
3-element Vector{String}:
"bezdekIris.csv"
"iris.csv"
"iris.names"
julia> readline(joinpath(iris_dataset_path, "iris.names"))
"1. Title: Iris Plants Database"
# Every subfile is a symlink
julia> all(islink, readdir(iris_dataset_path, join=true))
true
julia> iris_name_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.names";
# Helper function that combine create and bind
julia> iris_name_hash = download_my_artifact!(Base.download, iris_name_url, "iris.names", myartifacts_toml)
SHA256("38043f885d7c8cfb6d2cec61020b9bc6946c5856aadad493772ee212ef5ac891")
# Same value
julia> readline(my_artifact_path(iris_name_hash))
"1. Title: Iris Plants Database"
# Same file
julia> readlink(joinpath(iris_dataset_path, "iris.names")) == my_artifact_path(iris_name_hash)
true
# Unbind iris dataset
julia> unbind_my_artifact!(myartifacts_toml, "iris")
julia> using Dates
# Recycle: "iris/iris.names" is also used by "iris.names", only
# remove 2 file ("iris/iris.csv", "iris/bezdekIris.csv") and 1 folder ("iris")
julia> OhMyArtifacts.find_orphanages(; collect_delay=Hour(0))
[ Info: 3 MyArtifacts deleted (24.889 KiB)
# "iris.names" still exists
julia> my_artifact_exists(iris_name_hash)
true
julia> readline(my_artifact_path(iris_name_hash))
"1. Title: Iris Plants Database"
# Iris dataset is removed
julia> my_artifact_exists(iris_hash)
false
julia> isdir(iris_dataset_path)
false
# Unbind and recycle
julia> unbind_my_artifact!(myartifacts_toml, "iris.names")
# When `using OhMyArtifacts`, this function is called if we haven't do it for 7 days, so
# geneally we don't need to manually call it.
julia> OhMyArtifacts.find_orphanages(; collect_delay=Hour(0))
[ Info: 1 MyArtifact deleted (10.928 KiB)