How can I get glob to search within a complex directory structure to extract headers recursively? So far, glob seems to work only in the main directory.
using HDF5
using CSV
using DataFrames
using Glob
# Define the directory containing the HDF5 files and the output CSV file path
#directory_path = raw"/Users/silva/Desktop/H5_data_header_test"
directory_path = raw"/Volumes/data/Backup_data/Z0422-17_VNC_1/raw_h5/Merlin-6049/2022/06/22/09/"
csv_file_path = raw"/Users/silvam2/Desktop/H5_data_header_test/test.csv"
# Specify the attributes you want to print
attributes_to_print = ["0-0-0", "0-0-1", "0-0-2", "0-1-0", "0-1-1", "0-1-2", "0-2-0", "0-2-1", "0-2-2"]
# Initialize a DataFrame to store the output with FileName and Attribute columns
df = DataFrame(FileName=String[], Attribute=String[])
# Collect all possible keys to create DataFrame columns
all_keys = Set{String}()
# Collect all attribute data
attribute_data = []
# Get the list of all HDF5 files in the directory
h5_files = glob("*.h5", directory_path)
if length(h5_files) == 0
println("No HDF5 files found in the directory.")
end
for file_path in h5_files
# Get the file name from the file path
file_name = basename(file_path)
println("Processing file: ", file_name)
h5open(file_path, "r") do h5f
for attr in attributes_to_print
if haskey(h5f, attr)
attr_keys = keys(attrs(h5f[attr]))
attr_values = values(attrs(h5f[attr]))
attr_dict = Dict("Attribute" => attr, "FileName" => file_name)
for (key, value) in zip(attr_keys, attr_values)
attr_dict[key] = string(value)
push!(all_keys, key)
end
push!(attribute_data, attr_dict)
println("Collected attribute: ", attr)
else
println("Attribute not found: ", attr)
attr_dict = Dict("Attribute" => attr, "FileName" => file_name)
push!(attribute_data, attr_dict)
end
end
end
end
# Add all possible keys as columns to the DataFrame
for key in all_keys
df[!, key] = String[]
end
# Fill the DataFrame with the collected attribute data
for data in attribute_data
row = [get(data, "FileName", ""), get(data, "Attribute", "")]
for key in all_keys
push!(row, get(data, key, ""))
end
push!(df, row)
end
# Write the DataFrame to a CSV file
CSV.write(csv_file_path, df)
println("CSV file written to: ", csv_file_path)