ERROR: BoundsError

question
statistics

#1

Hi,

I was trying to implement HybridNB so that i can save the model for further computing. Below is the code I’m trying to execute.

using NaiveBayes
using DataFrames
using Requests

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data"
data = readtable(Requests.get_streaming(url))

features = data[1:size(data)[2]-1]
labels = data[size(data)[2]]

col_types = colwise(typeof, features)
int_types = [x == DataType[DataArrays.DataArray{Int64,1}] for x in col_types]

labels = convert(Vector, labels)
training_features_continuous = restructure_matrix(convert(Matrix, features[!int_types]))
training_features_discrete = restructure_matrix(convert(Matrix, features[int_types]))

hy_model = train(HybridNB, training_features_continuous, training_features_discrete, labels)

Below is the error i was getting

ERROR: BoundsError: attempt to access 2-element Array{Float64,1} at index [[1,3,5,7,10,12,18,20,21,27,28,29,30,32,33,34,35,36,40,41,42,44,46,47,49,50,51,52,54,55,57,58,59,60,62,63,65,67,68,69,71,73,74,75,76,77,79,80,81,82,83,85,86,87,89,90,91,92,94,95,96,97,98,101,102,103,104,105,106,107,108,112,113,117,118,119,121,122,123,126,127,133,134,135,136,137,138,139,140,141,142,144,145,146,147,148,149,150,151,153,156,157,158,160,161,162,163,166,167,168,169,172,173,174,176,178,180,181,182,183,184,190,191,194,196,200,201,202,203,204,205,208,210,211,212,217,222,223,224,225,226,228,229,232,233,234,239,240,241,244,246,247,248,249,250,251,252,253,256,257,258,260,262,263,265,267,268,271,272,273,274,275,277,278,279,281,282,285,286,288,289,290,294,295,297,299,302,304,305,307,310,311,313,315,316,318,320,324,325,327,329,330,331,333,334,335,336,340,341,342,343,344,345,346,347,348,350,351,352,353,354,358,361,362,364,365,367,368,371,372,373,374,376,377,379,380,381,382,383,384,385,389,390,392,393,395,396,398,401,403,405,407,410,411,412,413,416,418,420,421,422,423,426,428,430,431,432,433,434,436,437,438,439,441,442,446,447,449,450,452,453,454,456,457,459,460,461,462,463,464,465,466,467,469,470,471,472,473,474,475,477,478,479,481,482,483,486,487,488,489,490,491,492,494,495,496,497,499,500,501,503,504,505,507,508,509,511,512,513,514,517,518,519,520,521,522,524,525,526,527,528,529,530,531,532,533,534,536,537,538,543,544,547,548,549,550,551,552,553,554,555,556,557,558,559,562,563,564,565,566,567,568,570,571,572,573,574,575,576,578,581,582,583,585,587,589,591,593,594,596,597,599,600,601,602,605,607,608,609,610,613,615,616,617,620,621,622,623,624,625,626,627,628,629,631,632,633,634,636,637,639,640,641,643,644,645,649,650,651,652,653,654,656,657,658,660,665,668,669,670,671,672,673,674,677,679,680,682,684,685,686,687,688,690,692,694,697,698,699,700,703,704,705,707,710,711,713,714,717,718,720,721,723,724,725,726,727,728,729,733,734,735,736,737,738,741,742,744,745,747,751,752,756,758,760,762,763,764,765,767]]
 in throw_boundserror(::Array{Float64,1}, ::Tuple{Array{Int64,1}}) at ./abstractarray.jl:355
 in checkbounds at ./abstractarray.jl:284 [inlined]
 in _getindex at ./multidimensional.jl:270 [inlined]
 in getindex at ./abstractarray.jl:752 [inlined]
 in fit(::NaiveBayes.HybridNB{Int64,Symbol}, ::Dict{Symbol,Array{Float64,1}}, ::Dict{Symbol,Array{Int64,1}}, ::Array{Int64,1}) at /home/ravi/.julia/v0.5/NaiveBayes/src/hybrid.jl:12
 in train(::Type{NaiveBayes.HybridNB}, ::Dict{Symbol,Array{Float64,1}}, ::Dict{Symbol,Array{Int64,1}}, ::Array{Int64,1}) at /home/ravi/.julia/v0.5/NaiveBayes/src/hybrid.jl:27

Am I doing something wrong?


#2

Changing the below code worked

training_features_continuous = restructure_matrix(convert(Matrix, features[!int_types])')
training_features_discrete = restructure_matrix(convert(Matrix, features[int_types])')

as arrays and matrices are COLUMN major