Using DataDeps for multiple files in multiple subdirectories

Here is an example

register(DataDep(
    "Pi3",
    "Some message",
    [
        "https://www.angio.net/pi/digits/10.txt",
        "https://www.angio.net/pi/digits/100.txt",
        [
           "https://www.angio.net/pi/digits/1000.txt",
           "https://www.angio.net/pi/digits/10000.txt",
           "https://www.angio.net/pi/digits/100000.txt"
        ]
    ],
    sha2_256,
    post_fetch_method = [
        # 1st applies to 1st file, i.e 10.txt
        filename -> mv(filename, joinpath(mkpath("ten"), basename(filename))),
        # 2nd applies to 2nd listed file, i.e 100.txt
        filename -> mv(filename, joinpath(mkpath("hundred"), basename(filename))),
        # Applies to all things in 3rd (the inner vector) ie. 1000.txt, 10000.txt, and 100000.txt)
        # alt could have written a vector of 3 function here to treat those differently
        filename -> mv(filename, joinpath(mkpath("lots"), basename(filename))),
    ]
))

readdir(datadep"Pi3")
readdir(datadep"Pi3/ten")
readdir(datadep"Pi3/lots")

Output at end is

julia> readdir(datadep"Pi3")
3-element Vector{String}:
 "hundred"
 "lots"
 "ten"

julia> readdir(datadep"Pi3/ten")
1-element Vector{String}:
 "10.txt"

julia> readdir(datadep"Pi3/lots")
3-element Vector{String}:
 "1000.txt"
 "10000.txt"
 "100000.txt"

In post_fetch_method you can run whatever code you like to derive the subfolder name from the filename. But the filename won’t have the subfolder embedded in it – blame RFC 6266 I guess.

1 Like