I cannot use my gpu despite trying everything

I get this error (more detailed error on bottom)
WARNING: Error while freeing DeviceBuffer(1024 bytes at 0x0000000205003000): CUDA.CuError(code=CUDA.cudaError_enum(0x000002bc), details=CUDA.Optional{String}(data=nothing))

and I have tried everything such as reinstalling all packages,updating them, checking cuda toolkit version and trying to run the script directly on pc terminal/julia

Side note I am using VScode.
I will add my code as a comment because I get error trying to create topic

more detailed error: WARNING: Error while freeing DeviceBuffer(1024 bytes at 0x0000000205003400):
CUDA.CuError(code=CUDA.cudaError_enum(0x000002bc), details=CUDA.Optional{String}(data=nothing))

Stacktrace:
[1] throw_api_error(res::CUDA.cudaError_enum)
@ CUDA C:\Users\User.julia\packages\CUDA\YIj5X\lib\cudadrv\libcuda.jl:27
[2] check
@ C:\Users\User.julia\packages\CUDA\YIj5X\lib\cudadrv\libcuda.jl:34 [inlined]
[3] cuMemFreeAsync
@ C:\Users\User.julia\packages\CUDA\YIj5X\lib\utils\call.jl:26 [inlined]
[4] free(buf::CUDA.Mem.DeviceBuffer; stream::Nothing)
@ CUDA.Mem C:\Users\User.julia\packages\CUDA\YIj5X\lib\cudadrv\memory.jl:97 [inlined]
[5] free
@ C:\Users\User.julia\packages\CUDA\YIj5X\lib\cudadrv\memory.jl:92 [inlined]
[6] #actual_free#1001
@ C:\Users\User.julia\packages\CUDA\YIj5X\src\pool.jl:78 [inlined]
[7] actual_free
@ C:\Users\User.julia\packages\CUDA\YIj5X\src\pool.jl:75 [inlined]
[8] #_free#1026
@ C:\Users\User.julia\packages\CUDA\YIj5X\src\pool.jl:506 [inlined]
[9] _free
@ C:\Users\User.julia\packages\CUDA\YIj5X\src\pool.jl:493 [inlined]
[10] macro expansion
@ C:\Users\User.julia\packages\CUDA\YIj5X\src\pool.jl:478 [inlined]
[11] macro expansion
@ .\timing.jl:395 [inlined]
[12] #free#1025
@ C:\Users\User.julia\packages\CUDA\YIj5X\src\pool.jl:477 [inlined]
[13] free
@ C:\Users\User.julia\packages\CUDA\YIj5X\src\pool.jl:466 [inlined]
[14] (::CUDA.var"#1032#1033"{CUDA.Mem.DeviceBuffer, Bool})()
@ CUDA C:\Users\User.julia\packages\CUDA\YIj5X\src\array.jl:101
[15] #context!#915
@ C:\Users\User.julia\packages\CUDA\YIj5X\lib\cudadrv\state.jl:170 [inlined]
[16] context!(ctx::CuContext)
@ CUDA C:\Users\User.julia\packages\CUDA\YIj5X\lib\cudadrv\state.jl:165 [inlined]
[17] _free_buffer(buf::CUDA.Mem.DeviceBuffer, early::Bool)
@ CUDA C:\Users\User.julia\packages\CUDA\YIj5X\src\array.jl:89
[18] release(rc::GPUArrays.RefCounted{CUDA.Mem.DeviceBuffer}, args::Bool)
@ GPUArrays C:\Users\User.julia\packages\GPUArrays\dAUOE\src\host\abstractarray.jl:42
[19] unsafe_free!
@ GPUArrays C:\Users\User.julia\packages\GPUArrays\dAUOE\src\host\abstractarray.jl:90 [inlined]
[20] unsafe_finalize!(xs::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer})
@ CUDA C:\Users\User.julia\packages\CUDA\YIj5X\src\array.jl:113
[21] exit
@ .\initdefs.jl:28 [inlined]
[22] exec_options(opts::Base.JLOptions)
@ Base .\client.jl:323
[23] _start()
@ Base .\client.jl:552

using Flux, MLDatasets, NNlib
using CUDA, cuDNN
using Flux: onehotbatch, onecold, Optimiser
using ProgressBars
using MLUtils: DataLoader
using StatsBase
using BSON: @save,@load
using NNlib
using Wandb, Dates, Logging
using Parameters
using Random
using CUDA
using Augmentor


using Parameters

#using CUDA
#CUDA.CuDevice(0)
#CuDevice(0): NVIDIA GeForce RTX 3060


abstract type  LossReg end

@with_kw mutable struct loss_reg <: LossReg
    loc_accuracy = 0.f0
    n::Int = 0
end


function reset!(acc::loss_reg)
    acc.loc_accuracy = 0
    acc.n = 0
end

function update!(acc::loss_reg, value)
    acc.loc_accuracy += value
    acc.n += 1
end

function get_avg(acc::loss_reg)
    return acc.loc_accuracy/acc.n 
end





@kwdef struct training_args
    Wandb_Name::String = "Final_deneme1"
    project_name::String = "kocak"
    in_channel::Int = 3
    η::Float64 = 3e-4
    patch_size::Int = 2
    kernel_size::Int = 7
    embedding_dim::Int = 256
    depth::Int = 32
    use_cuda::Bool = true
    CudaDevice::Int = 1
    n_epochs::Int = 100
    num_classes::Int = 10
    seed::Int = 0
    batch_size::Int = 64
end
 

function ConvMixer(in_channels::Int64, kernel_size::Int64, patch_size::Int64, dim::Int64, depth::Int64, N_classes::Int64; activation::Function)
    model = Chain(
            Conv((patch_size, patch_size), in_channels=>dim, activation; stride=patch_size),
            BatchNorm(dim),
            [
                Chain(
                    SkipConnection(Chain(Conv((kernel_size,kernel_size), dim=>dim,  activation; pad=SamePad(), groups=dim), BatchNorm(dim)), +),
                    Chain(Conv((1,1), dim=>dim, activation), BatchNorm(dim))
                ) 
                for i in 1:depth
            ]...,
            AdaptiveMeanPool((1,1)),
            Flux.flatten,
            Dense(dim,N_classes)
        )
    return model
end

function get_statistics(dataset::DataType)
    data_set = dataset(:train)[:][1]
    return mean(data_set, dims = [1, 2, 4]), std(data_set, dims = [1,2, 4])
end

  

function get_data(batchsize::Int64; dataset = MLDatasets.CIFAR10)
    ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" 

    xtrain, ytrain = dataset(:train)[:]
    xtest, ytest = dataset(:test)[:]
    

    # Normalize -- these dudes may be recalculated for each run--
    m, s = dataset |> get_statistics
    xtrain = @. (xtrain - m)/s
    xtest = @. (xtest - m)/s

    ytrain, ytest = onehotbatch(ytrain, 0:9), onehotbatch(ytest, 0:9)

    train_loader = DataLoader((xtrain, ytrain), batchsize=batchsize, shuffle=true, parallel = true, buffer= true)
    test_loader = DataLoader((xtest, ytest), batchsize=batchsize, parallel = true, buffer= true)
    @info "Dataset preprocessing is done!!!"
    return train_loader, test_loader
end


function train(args::training_args)

    ## Extract params from args
    η = args.η
    in_channel = args.in_channel
    patch_size = args.patch_size
    kernel_size = args.kernel_size
    embedding_dim = args.embedding_dim
    depth = args.depth
    use_cuda = args.use_cuda
    cuda_device = args.CudaDevice
    num_classes = args.num_classes
    rng_seed = args.seed
    n_epochs = args.n_epochs
    batch_size = args.batch_size
    project_name = args.project_name
    Wandb_Name = args.Wandb_Name


    train_loader, test_loader = get_data(batch_size)

    if use_cuda
        device = gpu
        device!(CUDA.CuDevice(0))
        #CUDA.device!(cuda_device)
        #@info "Training on GPU:$cuda_device"
        @info "Training on GPU"
    else
        device = cpu
        @info "Training on CPU"
    end
    
    
    model = begin
        Random.seed!(rng_seed)
        ConvMixer(in_channel, kernel_size, patch_size, embedding_dim, depth, num_classes, activation = gelu) |> device    
    end
    

    opt = Optimiser(
            WeightDecay(1f-3), 
            ClipNorm(1.0),
            ADAM(η)
            )
    opt_state = Flux.setup(opt, model)

    # Start a new run, tracking hyperparameters in config
    lg = WandbLogger(project = project_name, name = Wandb_Name*"-$(now())", config = Dict("architecture" => "CNN", "dataset" => "CIFAR-10"))
    # Use LoggingExtras.jl to log to multiple loggers together
    global_logger(lg)
    # -- #
    train_loss = loss_reg()
    val_loss = loss_reg()
    
    for epoch in 1:n_epochs

        for (x,y) in  ProgressBar(train_loader)
            x,y = map(device, [x,y])
            y = Flux.label_smoothing(y, 0.1f0)
            loss, grads = Flux.withgradient(model) do model
                Flux.logitcrossentropy(model(x), y)                
            end
            update!(train_loss, loss |> cpu)
            Flux.update!(opt_state, model, grads[1])
        end

        acc = 0.f0
        m = 0
        for (x,y) in test_loader
            x,y = map(device, [x,y])
            z = model(x)
            temp_validation_loss = Flux.logitcrossentropy(model(x), y) 
            update!(val_loss, temp_validation_loss |> cpu)
            acc += sum(onecold(z).==onecold(y)) |> cpu
            m += size(x)[end]
        end

        
        
        #logging
        Wandb.log(lg, Dict("loss" => get_avg(train_loss), "acc"=>acc/m, "validation_loss" => get_avg(val_loss)))
        map(reset!, [train_loss, val_loss])
    end
    close(lg)
end

args = training_args()
train(args)


#=
if abspath(PROGRAM_FILE) == @_FILE_
    args = training_args()
    train(args)
end
=#

If you enclose your code with triple back-ticks you will not have this problem. Like ```.
Or a short statement in single back ticks, like @time sin(4).

What is the output of:

versioninfo()

What is the output of

using Pkg
Pkg.status()

Which graphics card do you have?

Firstly thank you so much for taking time to help me out.
1)
versioninfo()
outputs:

Julia Version 1.10.0
Commit 3120989f39 (2023-12-25 18:01 UTC)
Build Info:
Official https://julialang.org/ release
Platform Info:
OS: Windows (x86_64-w64-mingw32)
CPU: 20 × 12th Gen Intel(R) Core™ i7-12700K
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-15.0.7 (ORCJIT, alderlake)
Threads: 1 on 20 virtual cores
Environment:
JULIA_EDITOR = code
JULIA_NUM_THREADS =

using Pkg
Pkg.status()
outputs:

Status C:\Users\User\Desktop\julia\Project.toml
[02898b10] Augmentor v0.6.6
[fbb218c0] BSON v0.3.7
[052768ef] CUDA v5.1.1
[587475ba] Flux v0.14.8
[eb30cadb] MLDatasets v0.7.14
[f1d291b0] MLUtils v0.4.4
[872c559c] NNlib v0.9.10
[d96e819e] Parameters v0.12.3
[49802e3a] ProgressBars v1.5.1
⌅ [2913bbd2] StatsBase v0.33.21
[ad70616a] Wandb v0.5.1
[02a925ec] cuDNN v1.2.1
Info Packages marked with ⌅ have new versions available but compatibility constraints restrict them from upgrading. To see why use status --outdated

My Gpu is rtx 3060 12gb , I recently got it with valid warranty

I couldnt do the “”" thing yet, but I am going to look online and do it.

You could even copy and paste my triple back-ticks if you do not find them on your keyboard… Three in a separate line before the code and three after…

See also: Typing the Backtick key on non-US Keyboards | spaghettidba

1 Like

I solved it after the online research it is properly formatted now, I have also provided answers to your questions in the previous reply.

1 Like

I will test my NVidia GPU when I am at home… Perhaps someone else has access to such a GPU right now and can provide some advice how to test it?

1 Like

What is the output if you type in a terminal:

mkdir test
cd test
julia --project="."

and than in Julia

using Pkg
Pkg.add("CUDA")
using CUDA

CUDA.versioninfo()

?

1 Like

First one:

julia> mkdir test
ERROR: ParseError:
# Error @ REPL[2]:1:6
mkdir test
#    └───┘ ── extra tokens after end of expression
Stacktrace:
 [1] top-level scope
   @ none:1

julia> cd test
ERROR: ParseError:
# Error @ REPL[3]:1:3
cd test
# └───┘ ── extra tokens after end of expression
Stacktrace:
 [1] top-level scope
   @ none:1

julia> julia --project="."
ERROR: ParseError:
# Error @ REPL[4]:1:7
julia --project="."
#     └┘ ── invalid operator
Stacktrace:
 [1] top-level scope
   @ none:1

Second one:

  Resolving package versions...
  No Changes to `C:\Users\User\Desktop\julia\Project.toml`
  No Changes to `C:\Users\User\Desktop\julia\Manifest.toml`
CUDA runtime 12.3, artifact installation
CUDA driver 12.3
NVIDIA driver 546.33.0

CUDA libraries:
- CUBLAS: 12.3.4
- CURAND: 10.3.4
- CUFFT: 11.0.12
- CUSOLVER: 11.5.4
- CUSPARSE: 12.2.0
- CUPTI: 21.0.0
- NVML: 12.0.0+546.33

Julia packages:
- CUDA: 5.1.1
- CUDA_Driver_jll: 0.7.0+1
- CUDA_Runtime_jll: 0.10.1+0

Toolchain:
- Julia: 1.10.0
- LLVM: 15.0.7

1 device:
  0: NVIDIA GeForce RTX 3060 (sm_86, 4.090 GiB / 12.000 GiB available)

first one in windows terminal:

1 Like

Well, please enter the first commands in a terminal window, not in the Julia REPL…

In VSCode you can choose the menu entry Terminal->new Terminal to get a terminal window. But you can also get on in Windows directly, probably just type terminal somewhere?

1 Like

I just understood my mistake and corrected it its on results are on bottom picture.

Good. Now just enter the second set of commands in this window.

The idea is, just to install CUDA in a clean environment so that we can be sure there are no old packages that hold CUDA back at an old version.

1 Like
julia> using Pkg

julia> Pkg.add("CUDA")
   Resolving package versions...
    Updating `C:\Users\User\test\Project.toml`
  [052768ef] + CUDA v5.1.1
    Updating `C:\Users\User\test\Manifest.toml`
  [621f4979] + AbstractFFTs v1.5.0
⌅ [79e6a3ab] + Adapt v3.7.2
  [a9b6321e] + Atomix v0.1.0
⌅ [ab4f0b2a] + BFloat16s v0.4.2
  [fa961155] + CEnum v0.5.0
  [052768ef] + CUDA v5.1.1
  [1af6417a] + CUDA_Runtime_Discovery v0.2.2
  [3da002f7] + ColorTypes v0.11.4
  [5ae59095] + Colors v0.12.10
  [34da2185] + Compat v4.10.1
  [a8cc5b0e] + Crayons v4.1.1
  [9a962f9c] + DataAPI v1.15.0
  [a93c6f00] + DataFrames v1.6.1
  [864edb3b] + DataStructures v0.18.15
  [e2d170a0] + DataValueInterfaces v1.0.0
  [e2ba6199] + ExprTools v0.1.10
  [53c48c17] + FixedPointNumbers v0.8.4
⌅ [0c68f7d7] + GPUArrays v9.1.0
⌅ [46192b85] + GPUArraysCore v0.1.5
  [61eb1bfa] + GPUCompiler v0.25.0
  [842dd82b] + InlineStrings v1.4.0
  [41ab1584] + InvertedIndices v1.3.0
  [82899510] + IteratorInterfaceExtensions v1.0.0
  [692b3bcd] + JLLWrappers v1.5.0
  [63c18a36] + KernelAbstractions v0.9.15
  [929cbde3] + LLVM v6.4.2
  [8b046642] + LLVMLoopInfo v1.0.0
  [b964fa9f] + LaTeXStrings v1.3.1
  [1914dd2f] + MacroTools v0.5.12
  [e1d29d7a] + Missings v1.1.0
  [5da4648a] + NVTX v0.3.3
  [bac558e1] + OrderedCollections v1.6.3
  [69de0a69] + Parsers v2.8.1
  [2dfb63ee] + PooledArrays v1.4.3
  [aea7be01] + PrecompileTools v1.2.0
  [21216c6a] + Preferences v1.4.1
  [08abe8d2] + PrettyTables v2.3.1
  [74087812] + Random123 v1.6.2
  [e6cf234a] + RandomNumbers v1.5.3
  [189a3867] + Reexport v1.2.2
  [ae029012] + Requires v1.3.0
  [6c6a2e73] + Scratch v1.2.1
  [91c51154] + SentinelArrays v1.4.1
  [a2af1166] + SortingAlgorithms v1.2.1
  [90137ffa] + StaticArrays v1.9.0
  [1e83bf80] + StaticArraysCore v1.4.2
  [892a3eda] + StringManipulation v0.3.4
  [3783bdb8] + TableTraits v1.0.1
  [bd369af6] + Tables v1.11.1
  [a759f4b9] + TimerOutputs v0.5.23
  [013be700] + UnsafeAtomics v0.2.1
  [d80eeb9a] + UnsafeAtomicsLLVM v0.1.3
  [4ee394cb] + CUDA_Driver_jll v0.7.0+1
⌅ [76a88914] + CUDA_Runtime_jll v0.10.1+0
  [9c1d0b0a] + JuliaNVTXCallbacks_jll v0.2.1+0
  [dad2f222] + LLVMExtra_jll v0.0.27+1
  [e98f9f5b] + NVTX_jll v3.1.0+2
  [0dad84c5] + ArgTools v1.1.1
  [56f22d72] + Artifacts
  [2a0f44e3] + Base64
  [ade2ca70] + Dates
  [f43a241f] + Downloads v1.6.0
  [7b1f6079] + FileWatching
  [9fa8497b] + Future
  [b77e0a4c] + InteractiveUtils
  [4af54fe1] + LazyArtifacts
  [b27032c2] + LibCURL v0.6.3
  [76f85450] + LibGit2
  [8f399da3] + Libdl
  [37e2e46d] + LinearAlgebra
  [56ddb016] + Logging
  [d6f4376e] + Markdown
  [ca575930] + NetworkOptions v1.2.0
  [44cfe95a] + Pkg v1.9.2
  [de0858da] + Printf
  [3fa0cd96] + REPL
  [9a3f8284] + Random
  [ea8e919c] + SHA v0.7.0
  [9e88b42a] + Serialization
  [6462fe0b] + Sockets
  [2f01184e] + SparseArrays
  [10745b16] + Statistics v1.9.0
  [fa267f1f] + TOML v1.0.3
  [a4e569a6] + Tar v1.10.0
  [8dfed614] + Test
  [cf7118a7] + UUIDs
  [4ec0a83e] + Unicode
  [e66e0078] + CompilerSupportLibraries_jll v1.0.5+0
  [deac9b47] + LibCURL_jll v7.84.0+0
  [29816b5a] + LibSSH2_jll v1.10.2+0
  [c8ffd9c3] + MbedTLS_jll v2.28.2+0
  [14a3606d] + MozillaCACerts_jll v2022.10.11
  [4536629a] + OpenBLAS_jll v0.3.21+4
  [bea87d4a] + SuiteSparse_jll v5.10.1+6
  [83775a58] + Zlib_jll v1.2.13+0
  [8e850b90] + libblastrampoline_jll v5.8.0+0
  [8e850ede] + nghttp2_jll v1.48.0+0
  [3f19e933] + p7zip_jll v17.4.0+0
        Info Packages marked with ⌅ have new versions available but compatibility constraints restrict them from upgrading. To see why use `status --outdated -m`
Precompiling project...
  3 dependencies successfully precompiled in 35 seconds. 65 already precompiled.

julia> using CUDA

julia>

julia> CUDA.versioninfo()
CUDA runtime 12.3, artifact installation
CUDA driver 12.3
NVIDIA driver 546.33.0

CUDA libraries:
- CUBLAS: 12.3.4
- CURAND: 10.3.4
- CUFFT: 11.0.12
- CUSOLVER: 11.5.4
- CUSPARSE: 12.2.0
- CUPTI: 21.0.0
- NVML: 12.0.0+546.33

Julia packages:
- CUDA: 5.1.1
- CUDA_Driver_jll: 0.7.0+1
- CUDA_Runtime_jll: 0.10.1+0

Toolchain:
- Julia: 1.9.3
- LLVM: 14.0.6

1 device:
  0: NVIDIA GeForce RTX 3060 (sm_86, 4.001 GiB / 12.000 GiB available)

This looks good. As next step you can run the tests:

using Pkg
Pkg.test("CUDA")

as explained here: Home · CUDA.jl

1 Like

I will share the result as a text file because Its much larger than this sites limit. Here is link for accessing it from my googledrive: https://drive.google.com/file/d/1Bw1CIzcAFqJnwEPazvhJnms9QWyLkI_v/view?usp=drive_link

If you prefer another method just let me know and I will provide the test results that way

I requested access…

1 Like

I have provided you access, and I just want to thank you so much for your continued support in this. I was really stuck for days and I really appreciate your efforts.

At some point you said you are using Julia 1.10.0, but in the test results that you shared I see a lot of lines like:

jfptr_exit_39510.clone_1 at C:\Users\User\AppData\Local\Programs\Julia-1.9.3\lib\julia\sys.dll (unknown line)

Which relate to Julia 1.9.3.

Perhaps you should decide which version you want to use?

1 Like

Perhaps also look at this issue: Testsuite could be more careful about parallel testing · Issue #2192 · JuliaGPU/CUDA.jl · GitHub

Could you try to run:

using Pkg
Pkg.("CUDA",test_args=`--jobs=2`)

?

1 Like

I have done some checks based on my suspicion and here is the reason:

  1. The file I am working on is on anaconda navigator->julia enviroment-> vscode
  2. But I since I had vscode installed few months prior to that in my original vs code when I run versioninfo() on a random file I get Julia Version 1.9.3

But I am currently working on Julia Version 1.10.0 (I double checked)