Function works correctly for single file, but not in a loop?

Let’s just start with generating some sample data.

DIR0="$(ENV["HOME"])/Documents/twosigma_demo"
# Create directory and parent directories
mkpath(DIR0)

# Create alpha_2sigma.txt with two rows
write(
    joinpath(DIR0, "alpha_2sigma.txt"),
    """
    0 0 0
    1 1 1
    """
)

# Create beta_2sigma.txt with no rows
write(
    joinpath(DIR0, "beta_2sigma.txt"),
    """
    """
)

# Create gamma_2sigma.txt with 150 rows and 3 columns
write(
    joinpath(DIR0, "gamma_2sigma.txt"),
    join(join.(eachrow(rand(0:9, 150,3)), " "), "\n") * "\n"
)

# Create delta_2sigma.txt with 160 rows and 4 columns
write(
    joinpath(DIR0, "delta_2sigma.txt"),
    join(join.(eachrow(rand(0:9, 160,4)), " "), "\n") * "\n"
)

Next let’s inspect the results.

julia> DIR0="$(ENV["HOME"])/Documents/twosigma_demo"
"/home/mkitti/Documents/twosigma_demo"

julia> readdir(DIR0)
4-element Vector{String}:
 "alpha_2sigma.txt"
 "beta_2sigma.txt"
 "gamma_2sigma.txt"
 "delta_2sigma.txt"

julia> println(read(joinpath(DIR0, "alpha_2sigma.txt"), String))
0 0 0
1 1 1

julia> println(read(joinpath(DIR0, "beta_2sigma.txt"), String))

julia> println(read(joinpath(DIR0, "gamma_2sigma.txt"), String))
7 4 0
1 8 1
8 4 0
0 1 6
9 5 0
0 0 5
...
# abbreviated

julia> println(read(joinpath(DIR0, "delta_2sigma.txt"), String))
2 4 1 1
4 1 7 2
1 3 2 0
3 6 2 7
2 4 6 4
7 5 5 6
1 3 0 7
5 8 2 8
7 5 1 4
2 1 5 9
...
# abbreviated

Now let’s try some simple for loops.

julia> for file in readdir(DIR0)
           println(file)
       end
alpha_2sigma.txt
beta_2sigma.txt
delta_2sigma.txt
gamma_2sigma.txt

julia> for file in readdir(DIR0)
           println(joinpath(DIR0, file))
       end
/home/mkitti/Documents/twosigma_demo/alpha_2sigma.txt
/home/mkitti/Documents/twosigma_demo/beta_2sigma.txt
/home/mkitti/Documents/twosigma_demo/delta_2sigma.txt
/home/mkitti/Documents/twosigma_demo/gamma_2sigma.txt

julia> for file in readdir(DIR0)
           file = joinpath(DIR0, file)
           lines = readlines(file)
           println(length(lines))
       end
2
0
160
150

Instead of printing the number of lines in each file, let’s try to collect that into a Vector by pushing.

julia> nrows = Int[]
Int64[]

julia> for file in readdir(DIR0)
           file = joinpath(DIR0, file)
           lines = readlines(file)
           push!(nrows, length(lines))
       end

julia> nrows
3-element Vector{Int64}:
   2
   0
 160
 150

There’s a simpler way to do the above via map:

julia> map(readdir(DIR0)) do file
           file = joinpath(DIR0, file)
           lines = readlines(file)
           return length(lines)
       end
3-element Vector{Int64}:
   2
   0
 160
 150

Now, let’s say I wanted to print the name of the file which has 150 or more lines.

julia> for file in readdir(DIR0)
           file = joinpath(DIR0, file)
           lines = readlines(file)
           if length(lines) >= 150
               println(file)
           end
       end
/home/mkitti/Documents/twosigma_demo/delta_2sigma.txt
/home/mkitti/Documents/twosigma_demo/gamma_2sigma.txt

We could then try to examine the number of columns.

julia> for file in readdir(DIR0)
           file = joinpath(DIR0, file)
           lines = readlines(file)
           if length(lines) > 0
               first_line = first(lines)
               columns_in_first_line = split(first_line, " ")
               println(length(columns_in_first_line))
           else
               println("No rows!")
           end
       end
3
No rows!
4
3

Next let’s print out the files that more than 150 rows and more than 3 columns.

julia> for file in readdir(DIR0)
           file = joinpath(DIR0, file)
           lines = readlines(file)
           NR = length(lines)
           if NR >= 150
               first_line = first(lines)
               columns_in_first_line = split(first_line, " ")
               NC = length(columns_in_first_line)
               if NC >= 3
                   println(file, " has ", NR, " rows and ", NC, " columns")
               end
           end
       end
/home/mkitti/Documents/twosigma_demo/delta_2sigma.txt has 160 rows and 4 columns
/home/mkitti/Documents/twosigma_demo/gamma_2sigma.txt has 150 rows and 3 columns

Instead of printing this, let’s put it into a Vector.

julia> large_files = String[]
String[]

julia> for file in readdir(DIR0)
           file = joinpath(DIR0, file)
           lines = readlines(file)
           NR = length(lines)
           if NR >= 150
               first_line = first(lines)
               columns_in_first_line = split(first_line, " ")
               NC = length(columns_in_first_line)
               if NC >= 3
                   push!(large_files, file)
               end
           end
       end

julia> large_files
2-element Vector{String}:
 "/home/mkitti/Documents/twosigma_demo/delta_2sigma.txt"
 "/home/mkitti/Documents/twosigma_demo/gamma_2sigma.txt"

Now let’s make this a function.

julia> function get_large_files(DIR0)
           large_files = String[]
           for file in readdir(DIR0)
               file = joinpath(DIR0, file)
               lines = readlines(file)
               NR = length(lines)
               if NR >= 150
                   first_line = first(lines)
                   columns_in_first_line = split(first_line, " ")
                   NC = length(columns_in_first_line)
                   if NC >= 3
                       push!(large_files, file)
                   end
               end
           end
           return large_files
       end

get_large_files (generic function with 1 method)

julia> get_large_files(DIR0)
2-element Vector{String}:
 "/home/mkitti/Documents/twosigma_demo/delta_2sigma.txt"
 "/home/mkitti/Documents/twosigma_demo/gamma_2sigma.txt"

We can also return the number of rows and columns.

julia> large_files, nrows, ncols = get_large_files_nrows_ncols(DIR0)
(["/home/mkitti/Documents/twosigma_demo/delta_2sigma.txt", "/home/mkitti/Documents/twosigma_demo/gamma_2sigma.txt"], [160, 150], [4, 3])

julia> large_files
2-element Vector{String}:
 "/home/mkitti/Documents/twosigma_demo/delta_2sigma.txt"
 "/home/mkitti/Documents/twosigma_demo/gamma_2sigma.txt"

julia> nrows
2-element Vector{Int64}:
 160
 150

julia> ncols
2-element Vector{Int64}:
 4
 3

Alternatively, we could return the rows and columns as a tuple.

julia> function get_large_files_and_sizes(DIR0)
           large_files = String[]
           large_file_sizes = Tuple{Int,Int}[]
           for file in readdir(DIR0)
               file = joinpath(DIR0, file)
               lines = readlines(file)
               NR = length(lines)
               if NR >= 150
                   first_line = first(lines)
                   columns_in_first_line = split(first_line, " ")
                   NC = length(columns_in_first_line)
                   if NC >= 3
                       push!(large_files, file)
                       push!(large_file_sizes, (NR, NC))
                   end
               end
           end
           return large_files, large_file_sizes
       end
get_large_files_and_sizes (generic function with 1 method)

julia> large_files, sizes = get_large_files_and_sizes(DIR0)
(["/home/mkitti/Documents/twosigma_demo/delta_2sigma.txt", "/home/mkitti/Documents/twosigma_demo/gamma_2sigma.txt"], [(160, 4), (150, 3)])

julia> large_files
2-element Vector{String}:
 "/home/mkitti/Documents/twosigma_demo/delta_2sigma.txt"
 "/home/mkitti/Documents/twosigma_demo/gamma_2sigma.txt"

julia> sizes
2-element Vector{Tuple{Int64, Int64}}:
 (160, 4)
 (150, 3)

Before continuing I want to comment on comparing tuples.

The following may be surprising.

julia> (160, 4) >= (150, 5)
true

I think you you might want to do the following to explitly compare each pair of numbers and determine they are all greater than or equal to the number on the right.

julia> (160, 4) .>= (150, 5)
(true, false)

julia> all((160, 4) .>= (150, 5))
false

Using the above, let’s rewrite the function

julia> function get_large_files_and_sizes_2(DIR0)
           large_files = String[]
           large_file_sizes = Tuple{Int,Int}[]
           for file in readdir(DIR0)
               file = joinpath(DIR0, file)
               lines = readlines(file)
               NR = length(lines)
               NC = 0
               if NR > 0
                   first_line = first(lines)
                   columns_in_first_line = split(first_line, " ")
                   NC = length(columns_in_first_line)
               end
               _size = (NR, NC)
               if all(_size .>= (150, 3))
                   push!(large_files, file)
                   push!(large_file_sizes, _size)
               end
           end
           return large_files, large_file_sizes
       end
get_large_files_and_sizes_2 (generic function with 1 method)

julia> large_files, large_file_sizes = get_large_files_and_sizes_2(DIR0)
(["/home/mkitti/Documents/twosigma_demo/delta_2sigma.txt", "/home/mkitti/Documents/twosigma_demo/gamma_2sigma.txt"], [(160, 4), (150, 3)])

julia> large_files
2-element Vector{String}:
 "/home/mkitti/Documents/twosigma_demo/delta_2sigma.txt"
 "/home/mkitti/Documents/twosigma_demo/gamma_2sigma.txt"

julia> large_file_sizes
2-element Vector{Tuple{Int64, Int64}}:
 (160, 4)
 (150, 3)

Now let’s try this with GMT.jl.

julia> using GMT

julia> gmtread(joinpath(DIR0, "alpha_2sigma.txt"))
BoundingBox: [0.0, 1.0, 0.0, 1.0, 0.0, 1.0]

2×3 GMTdataset{Float64, 2}
 Row │ col.1  col.2  col.3 
─────┼─────────────────────
   1 │   0.0    0.0    0.0
   2 │   1.0    1.0    1.0

julia> gmtread(joinpath(DIR0, "beta_2sigma.txt"))
gmtread [WARNING]: File /home/mkitti/Documents/twosigma_demo/beta_2sigma.txt is empty!
┌ Warning: 	file "/home/mkitti/Documents/twosigma_demo/beta_2sigma.txt" is empty or has no data after the header.
└ @ GMT ~/.julia/packages/GMT/SI3aF/src/gmtreadwrite.jl:189
String[]

julia> gmtread(joinpath(DIR0, "gamma_2sigma.txt"))
BoundingBox: [0.0, 9.0, 0.0, 9.0, 0.0, 9.0]

150×3 GMTdataset{Float64, 2}
 Row │ col.1  col.2  col.3 
─────┼─────────────────────
   1 │   7.0    4.0    0.0
   2 │   1.0    8.0    1.0
   3 │   8.0    4.0    0.0
   4 │   0.0    1.0    6.0
   5 │   9.0    5.0    0.0

julia> gmtread(joinpath(DIR0, "delta_2sigma.txt"))
BoundingBox: [0.0, 9.0, 0.0, 9.0, 0.0, 9.0, 0.0, 9.0]

160×4 GMTdataset{Float64, 2}
 Row │ col.1  col.2  col.3  col.4 
─────┼────────────────────────────
   1 │   2.0    4.0    1.0    1.0
   2 │   4.0    1.0    7.0    2.0
   3 │   1.0    3.0    2.0    0.0
   4 │   3.0    6.0    2.0    7.0

Let’s make sure we can examine the sizes as we would expect.

julia> for file in readdir(DIR0)
           file = joinpath(DIR0, file)
           gmt_hold = gmtread(file)
           println(size(gmt_hold))
       end
(2, 3)
gmtread [WARNING]: File /home/mkitti/Documents/twosigma_demo/beta_2sigma.txt is empty!
┌ Warning: 	file "/home/mkitti/Documents/twosigma_demo/beta_2sigma.txt" is empty or has no data after the header.
└ @ GMT ~/.julia/packages/GMT/SI3aF/src/gmtreadwrite.jl:189
(0, 0)
(160, 4)
(150, 3)

Let’s write a function to retrieve large datasets.

julia> function get_large_datasets_gmt(DIR0)
           large_datasets = GMTdataset{Float64, 2}[]
           for file in readdir(DIR0)
               file = joinpath(DIR0, file)
               dataset = gmtread(file)
               if all(size(dataset) .>= (150,3))
                   push!(large_datasets, dataset)
               end
           end
           return large_datasets
       end
get_large_datasets_gmt (generic function with 1 method)

julia> large_datasets = get_large_datasets_gmt(DIR0);
gmtread [WARNING]: File /home/mkitti/Documents/twosigma_demo/beta_2sigma.txt is empty!
┌ Warning: 	file "/home/mkitti/Documents/twosigma_demo/beta_2sigma.txt" is empty or has no data after the header.
└ @ GMT ~/.julia/packages/GMT/SI3aF/src/gmtreadwrite.jl:189

julia> length(large_datasets)
2

julia> large_datasets[1]
BoundingBox: [0.0, 9.0, 0.0, 9.0, 0.0, 9.0, 0.0, 9.0]

160×4 GMTdataset{Float64, 2}
 Row │ col.1  col.2  col.3  col.4 
─────┼────────────────────────────
   1 │   2.0    4.0    1.0    1.0
   2 │   4.0    1.0    7.0    2.0
   3 │   1.0    3.0    2.0    0.0
   4 │   3.0    6.0    2.0    7.0
   5 │   2.0    4.0    6.0    4.0
# abbreviated

julia> large_datasets[2]
BoundingBox: [0.0, 9.0, 0.0, 9.0, 0.0, 9.0]

150×3 GMTdataset{Float64, 2}
 Row │ col.1  col.2  col.3 
─────┼─────────────────────
   1 │   7.0    4.0    0.0
   2 │   1.0    8.0    1.0
   3 │   8.0    4.0    0.0
   4 │   0.0    1.0    6.0
   5 │   9.0    5.0    0.0
   6 │   0.0    0.0    5.0
   7 │   7.0    9.0    3.0

I will also note that there is a much more compact way of writing this.

julia> function get_large_datasets_gmt_2(DIR0)
           filter(gmtread.(joinpath.(DIR0, readdir(DIR0)))) do dataset
                  all(size(dataset) .>= (150,3))
           end
       end
get_large_datasets_gmt_2 (generic function with 1 method)

julia> datasets = get_large_datasets_gmt(DIR0);
gmtread [WARNING]: File /home/mkitti/Documents/twosigma_demo/beta_2sigma.txt is empty!
┌ Warning: 	file "/home/mkitti/Documents/twosigma_demo/beta_2sigma.txt" is empty or has no data after the header.
└ @ GMT ~/.julia/packages/GMT/SI3aF/src/gmtreadwrite.jl:189

julia> datasets[1]
BoundingBox: [0.0, 9.0, 0.0, 9.0, 0.0, 9.0, 0.0, 9.0]

160×4 GMTdataset{Float64, 2}
 Row │ col.1  col.2  col.3  col.4 
─────┼────────────────────────────
   1 │   2.0    4.0    1.0    1.0
   2 │   4.0    1.0    7.0    2.0
   3 │   1.0    3.0    2.0    0.0
   4 │   3.0    6.0    2.0    7.0
   5 │   2.0    4.0    6.0    4.0
# abbreviated

julia> datasets[2]
BoundingBox: [0.0, 9.0, 0.0, 9.0, 0.0, 9.0]

150×3 GMTdataset{Float64, 2}
 Row │ col.1  col.2  col.3 
─────┼─────────────────────
   1 │   7.0    4.0    0.0
   2 │   1.0    8.0    1.0
   3 │   8.0    4.0    0.0
   4 │   0.0    1.0    6.0
   5 │   9.0    5.0    0.0

# abbreviated

As for the error that you are getting, the issue arises if you attempt to do the following.

julia> epoch_hold = nothing

julia> epoch_hold[:,1]
ERROR: MethodError: no method matching getindex(::Nothing, ::Colon, ::Int64)
Stacktrace:
 [1] top-level scope
   @ REPL[201]:1

We could avoid the above error by testing for nothing:

julia> epoch_hold = gmtread(joinpath(DIR0, "alpha_2sigma.txt"))
BoundingBox: [0.0, 1.0, 0.0, 1.0, 0.0, 1.0]

2×3 GMTdataset{Float64, 2}
 Row │ col.1  col.2  col.3 
─────┼─────────────────────
   1 │   0.0    0.0    0.0
   2 │   1.0    1.0    1.0
julia> if !isnothing(epoch_hold)
           epoch_hold[:,1]
       end
2-element Vector{Float64}:
 0.0
 1.0

julia> epoch_hold = nothing

julia> if !isnothing(epoch_hold)
           my_dataset[:,1]
       end

Benny pointed out that there is a flaw in your function as written. We can demonstrate the flaw as follows.

julia> function gmt_read(file)
           epoch_hold = gmtread(file, incols = "0,1,2");
           if length(epoch_hold) > 100
               return epoch_hold
           end
       end
gmt_read (generic function with 1 method)

julia> gmt_read(joinpath(DIR0, "gamma_2sigma.txt"))[:,1]
150-element Vector{Float64}:
 7.0
 1.0
 8.0
# abbreviated

julia> gmt_read(joinpath(DIR0, "delta_2sigma.txt"))[:,1]
160-element Vector{Float64}:
 2.0
 4.0
# abbreviated 

julia> gmt_read(joinpath(DIR0, "alpha_2sigma.txt"))[:,1]
ERROR: MethodError: no method matching getindex(::Nothing, ::Colon, ::Int64)
Stacktrace:
 [1] top-level scope
   @ REPL[229]:1

julia> gmt_read(joinpath(DIR0, "beta_2sigma.txt"))[:,1]
gmtread [WARNING]: File /home/mkitti/Documents/twosigma_demo/beta_2sigma.txt is empty!
┌ Warning: 	file "/home/mkitti/Documents/twosigma_demo/beta_2sigma.txt" is empty or has no data after the header.
└ @ GMT ~/.julia/packages/GMT/SI3aF/src/gmtreadwrite.jl:189
ERROR: MethodError: no method matching getindex(::Nothing, ::Colon, ::Int64)
Stacktrace:
 [1] top-level scope
   @ REPL[230]:1

The flaw here is that for small datasets your function returns nothing and then you try to index the result.

julia> alpha = gmt_read(joinpath(DIR0, "alpha_2sigma.txt"))

julia> isnothing(alpha)
true

julia> alpha[:, 1]
ERROR: MethodError: no method matching getindex(::Nothing, ::Colon, ::Int64)
Stacktrace:
 [1] top-level scope
   @ REPL[234]:1

The reason this happens is that your function is identical to the following equivalent function definitions.

julia> function gmt_read(file)
           epoch_hold = gmtread(file, incols = "0,1,2");
           if length(epoch_hold) > 100
               return epoch_hold
           end
           return nothing
       end
gmt_read (generic function with 1 method)

julia> function gmt_read(file)
           epoch_hold = gmtread(file, incols = "0,1,2");
           if length(epoch_hold) > 100
               return epoch_hold
           else
               return nothing
           end
       end
gmt_read (generic function with 1 method)

We can reproduce the error as follows.

julia> function gmt_read(file)
           epoch_hold = gmtread(file, incols = "0,1,2");
           if length(epoch_hold) > 100
               return epoch_hold
           else
               return nothing
           end
       end
gmt_read (generic function with 1 method)

julia> for file in readdir(DIR0)
           file = joinpath(DIR0, file)
           gmt_hold = gmt_read(file)
           gmt_hold[:,1]
       end
ERROR: MethodError: no method matching getindex(::Nothing, ::Colon, ::Int64)
Stacktrace:
 [1] top-level scope
   @ ./REPL[243]:4

We can correct it as follows.

julia> function gmt_read(file)
           epoch_hold = gmtread(file, incols = "0,1,2");
           return epoch_hold
       end
gmt_read (generic function with 1 method)

julia> for file in readdir(DIR0)
           file = joinpath(DIR0, file)
           gmt_hold = gmt_read(file)
           if length(gmt_hold) > 100
               println(gmt_hold[:,1])
           end
       end
gmtread [WARNING]: File /home/mkitti/Documents/twosigma_demo/beta_2sigma.txt is empty!
┌ Warning: 	file "/home/mkitti/Documents/twosigma_demo/beta_2sigma.txt" is empty or has no data after the header.
└ @ GMT ~/.julia/packages/GMT/SI3aF/src/gmtreadwrite.jl:189
[2.0, 4.0, 1.0, 3.0, 2.0, 7.0, 1.0, 5.0, 7.0, 2.0, 5.0, 8.0, 6.0, 1.0, 8.0, 5.0, 5.0, 3.0, 9.0, 7.0, 7.0, 6.0, 2.0, 6.0, 4.0, 4.0, 7.0, 8.0, 9.0, 0.0, 8.0, 6.0, 7.0, 6.0, 6.0, 7.0, 4.0, 4.0, 1.0, 9.0, 2.0, 4.0, 0.0, 2.0, 6.0, 2.0, 8.0, 5.0, 8.0, 4.0, 8.0, 0.0, 2.0, 3.0, 1.0, 4.0, 6.0, 1.0, 5.0, 1.0, 0.0, 5.0, 6.0, 4.0, 6.0, 6.0, 4.0, 0.0, 0.0, 0.0, 4.0, 8.0, 5.0, 6.0, 3.0, 4.0, 9.0, 7.0, 5.0, 8.0, 0.0, 7.0, 8.0, 0.0, 2.0, 0.0, 7.0, 5.0, 3.0, 8.0, 5.0, 0.0, 9.0, 2.0, 0.0, 1.0, 8.0, 2.0, 3.0, 9.0, 4.0, 9.0, 9.0, 9.0, 3.0, 5.0, 7.0, 1.0, 6.0, 8.0, 3.0, 9.0, 3.0, 7.0, 7.0, 9.0, 1.0, 9.0, 6.0, 0.0, 0.0, 8.0, 3.0, 0.0, 8.0, 2.0, 2.0, 5.0, 1.0, 1.0, 1.0, 0.0, 7.0, 8.0, 6.0, 9.0, 3.0, 4.0, 1.0, 5.0, 3.0, 3.0, 8.0, 1.0, 1.0, 3.0, 1.0, 5.0, 8.0, 7.0, 9.0, 0.0, 0.0, 3.0, 9.0, 7.0, 2.0, 6.0, 1.0, 5.0]
[7.0, 1.0, 8.0, 0.0, 9.0, 0.0, 7.0, 0.0, 8.0, 7.0, 9.0, 6.0, 9.0, 3.0, 8.0, 3.0, 6.0, 1.0, 8.0, 0.0, 5.0, 9.0, 0.0, 7.0, 5.0, 9.0, 3.0, 7.0, 6.0, 9.0, 5.0, 0.0, 1.0, 4.0, 6.0, 6.0, 4.0, 5.0, 5.0, 0.0, 1.0, 9.0, 3.0, 9.0, 6.0, 8.0, 4.0, 7.0, 3.0, 6.0, 8.0, 4.0, 5.0, 0.0, 5.0, 8.0, 5.0, 9.0, 0.0, 7.0, 1.0, 5.0, 6.0, 4.0, 6.0, 7.0, 2.0, 2.0, 2.0, 1.0, 7.0, 9.0, 7.0, 7.0, 9.0, 5.0, 7.0, 0.0, 9.0, 3.0, 4.0, 3.0, 8.0, 6.0, 9.0, 6.0, 4.0, 3.0, 4.0, 7.0, 4.0, 3.0, 5.0, 7.0, 4.0, 4.0, 4.0, 6.0, 9.0, 3.0, 0.0, 2.0, 6.0, 4.0, 0.0, 4.0, 0.0, 6.0, 1.0, 5.0, 4.0, 8.0, 5.0, 9.0, 8.0, 1.0, 4.0, 0.0, 6.0, 9.0, 1.0, 6.0, 8.0, 4.0, 1.0, 0.0, 6.0, 2.0, 6.0, 4.0, 6.0, 4.0, 2.0, 9.0, 5.0, 1.0, 5.0, 0.0, 1.0, 1.0, 9.0, 8.0, 1.0, 9.0, 2.0, 7.0, 8.0, 0.0, 5.0, 0.0]
3 Likes