Iterative Filter Loop Help:

I was able to create a single filter quickly such that every station that matched was collected and written to a file for that particular epoch. This is how I wrote it:

# Individual Filter for GPS time series: 

c_glob = glob("*.tenv3", "/path/to/directory/julia/combined_tenv3/");

mathold = [];

for file in c_glob
    c_hold = readdlm(file)
    ffilt = c_hold[:, 3] .=== 2001.0021 # change this to desired epoch
    fdata = c_hold[ffilt, :]
    #println(fdata)

    if !isempty(fdata)
        # println("Match in Dataset")
        push!(mathold, fdata)        
    end
end

writedlm("2001_0021.txt", mathold)

I wrote this that out as an exercise to make sure I was able to create the filter, and that it did what I wanted. The contents of the files look like this, for a better idea:

340Ă—19 Matrix{Any}:
 "AGMT"  "01JAN01"  2001.0  51910  -0.987322   0.811743  …  -0.000411744  0.967775  0.942858  1.52094
 "AHID"  "01JAN01"  2001.0  51910   0.583295   0.639153      0.0016273    1.09953   1.02615   1.37711
 "ALAM"  "01JAN01"  2001.0  51910   0.351062   0.590224     -0.000398521  0.982547  0.991641  1.03577
 "ALBH"  "01JAN01"  2001.0  51910  -0.013043  -0.002192     -0.00154369   1.123     1.17158   1.02575
 "ALGO"  "01JAN01"  2001.0  51910   0.738829   0.093624     -0.00367479   0.967708  0.985713  1.03082
 "AMC2"  "01JAN01"  2001.0  51910  -0.30581    0.127186  …  -0.00377606   1.10882   1.14695   1.41331
 "AOA1"  "01JAN01"  2001.0  51910  -0.415567   0.564857      0.000589432  0.958284  1.0022    1.03322
 "APEX"  "01JAN01"  2001.0  51910  -0.52982    0.759745      0.000119521  0.947128  0.933431  0.960929
 "ARGU"  "01JAN01"  2001.0  51910  -0.172066   0.855207      0.000294808  1.19319   1.05862   1.28072
 ⋮                                             ⋮         ⋱   ⋮                                
 "WINN"  "01JAN01"  2001.0  51910   0.319715   0.184243     -0.0111245    1.97467   1.29327   0.990721
 "WIS5"  "01JAN01"  2001.0  51910  -0.348374   0.696203     -0.00400599   0.92948   1.09301   1.37689
 "WKPK"  "01JAN01"  2001.0  51910  -0.593569   0.530484     -0.00214284   2.60192   1.07491   1.18526
 "WLCI"  "01JAN01"  2001.0  51910   0.293657   0.620889     -0.00626683   1.1358    1.00141   0.998941
 "WMAP"  "01JAN01"  2001.0  51910  -1.02753    0.897148  …  -0.000160677  0.853714  0.96139   1.02961
 "WNFL"  "01JAN01"  2001.0  51910   0.160059   0.497349      0.0037802    1.10311   1.0942    1.01522
 "WNRA"  "01JAN01"  2001.0  51910   0.319324   0.376334      0.00509105   0.969372  1.05677   0.972638
 "YBHB"  "01JAN01"  2001.0  51910  -0.103833   0.694075      0.00383063   1.00266   1.50803   1.29484
 "YOU5"  "01JAN01"  2001.0  51910   0.275709   0.207637     -0.00390316   1.02063   1.22445   1.3385                             

And continue down for however many stations meet the criteria. Usually between 340 and 370 rows.

Anyway, my next goal was to try and create a loop that would iterate through every epoch in the time frame I’m working in, which is from 2001.0021 to 2005.9986, with 1826 total time steps.

I feel like I am super close to getting it, but, when I try to write the files, they are not coming out as I would like. Either everything gets written as a single, gigantic, text file, or, I’ll only get the last station in the file at each epoch written to the file. I’m sure I’m missing something simple.

Here is what I’ve implemented:

epochdir0 = "/path/to/write/to/Epochs/";
c_glob = glob("*.tenv3", "/path/to/files/julia/combined_tenv3/");
mathold = [];

function matchyr(file)

    c_hold = readdlm(file)

    return file_hold = [c_hold[:, 3] c_hold[:, 16] c_hold[:, 10] c_hold[:, 1]]

end


for (i,file) in enumerate(c_glob)

    file_hold = matchyr(file)

    for yr in all_years
        yrfilt = file_hold[:, 1] .===  yr
        yrdata = file_hold[yrfilt, :]

        if !isempty(yrdata)
            push!(mathold, yrdata)
            # writedlm(joinpath(epochdir0, "$yr[1]" *  "_epoch.txt"), mathold)
        end
        # writedlm(joinpath(epochdir0, "$yr" *  "_epoch.txt"), mathold)
    end
    writedlm(joinpath(epochdir0, "$i" *  "_epoch.txt"), mathold)
end

# writedlm(joinpath(epochdir0,  "_epoch.txt"), mathold)

As you can see, I’ve tried to move the writedlm() operation to various places within my loops, but it seems that writing this iteration is fundamentally different than the single loop filter I created.

Oh, and the variable all_years was defined further up in my code in a different process. It contains all the decimal years in the time-frame, as I’m sure y’all assumed anyway. Just wanted to point it out to avoid any potential confusion.

Any help/guidance would be greatly appreciated.

Thank you in advance!

  • Rob

######################################################
Update: I got it to work
######################################################

So, on the last attempt of the night, I decided to throw an extra !isempty() in the loop, and now it’s working?

Can someone explain to me why this is writing it correctly, albeit very slowly. lol

epochdir0 = "/home/rob/Documents/julia/Epochs/";

function matchyr(yr)
    mathold = [];
    c_glob = glob("*.tenv3", "/home/rob/Documents/julia/combined_tenv3/");
    for file in c_glob
        c_hold = readdlm(file)
        ffilt = c_hold[:, 3] .=== yr # change this to desired epoch
        fdata = c_hold[ffilt, :]
        #println(fdata)
    

        if !isempty(fdata)
            println("Match in Dataset")
            push!(mathold, fdata)        
        end
    end
    return mathold
end

for yr in all_years
    mathold = matchyr(yr)
    if !isempty(mathold)
        writedlm(joinpath(epochdir0, "$yr" * "_epoch.txt"), mathold)
    end
end

I 100% got lucky with this, but, at least it works. I forgot to comment out println("Match in Dataset"). Woops.

At the current write speed, this may take an hour or so? If anyone can help me understand why it worked, and ways to improve the efficiency, I would greatly appreciate it!

I thought I had an “ah-ha!” moment after I posted, but, It still didn’t work. But, maybe I’m a step closer. :man_shrugging:

epochdir0 = "/path/to/write/julia/Epochs/";

function matchyr(yr)

    c_glob = glob("*.tenv3", "/path/to/directory/julia/combined_tenv3/");
    mathold = []
    for file in c_glob
        c_hold = readdlm(file)
        ffilt = c_hold[:, 3] .=== yr # change this to desired epoch
        fdata = c_hold[ffilt, :]
        #println(fdata)

        if !isempty(fdata)
            # println("Match in Dataset")
            push!(mathold, fdata)        
        end
    end
    return mathold
end

for yr in all_years
    yrdata = matchyr(yr)
    writedlm(joinpath(epochdir0, "$yr" * "_epoch.txt"), mathold)
end

The idea was that I should just turn the original filter I wrote into a function, and then just used a loop that way, it would work. But this literally does not write anything, get stuck in an infinite loop, and writes all empty files. But the have the correct names, at least. lol

Please consider adding some context to your first post.

What kind of filter are you talking about? And you start the post with

every station that matched

but we don’t know what is a station is in this context.

It seems that you’ve got two problems that you should tackle seperately:

  1. Create a filtered data set from some file
  2. Write out that filtered data set to a new file

You’ve already broken up your problem into these two steps above, so you can test them separately: does

matchyr(2001.0)

return what you want? If so, you can do

all_years = [matchyr(y) for y in all_years]

to make sure it works for all of your years.

If you’ve satisfied yourself that that works, you can write out a single file:

writedlm(joinpath(epochdir0, "2001.0_epoch.txt"), matchyr(2001.0))

and make sure that that works. If it does, you should be able to put it all together!

1 Like