AMDGPU.jl issues

Hello, good evening.
Im having issue to work with amdgpu.jl and ROCM Hip. The issue happens when

	device = MLDataDevices.AMDGPUDevice()  # get device.
	
    # Multilayer Perceptron:
    model = Chain(
        Dense(2 => 3, tanh),      
        BatchNorm(3),
        Dense(3 => 2)) |> device  # Move data to device.

Worked with cuda, but for amd I cant put to work. May someone help please? Gemini cant help also.

MethodError: no method matching amdgpu_array_adapt(::AMDGPUDevice{Nothing, Missing}, ::Matrix{Float32}) The function `amdgpu_array_adapt` exists, but no method is defined for this combination of argument types. Closest candidates are: amdgpu_array_adapt(::Type{T}, ::Any) where T @ AMDGPUExt [~/.julia/packages/MLDataDevices/4qHOT/ext/AMDGPUExt.jl:81](http://localhost:8888/home/jgardona/.julia/packages/MLDataDevices/4qHOT/ext/AMDGPUExt.jl#line=80)

This worked for me. If somene has a solution to use the methods above, please show me.

	# Move manually:
	model = fmap(x -> x isa AbstractArray ? AMDGPU.ROCArray(x) : x, model)

Here is the solution I could build. I think the issue with the code above is a bug in get_device()/AMDGPUDevice(). Using low level functions for amd rocm worked as expected, and I have migrate the flux xor example for AMD Rocm.

let
	noisy = rand(Float32, 2, 1000)
	truth = [xor(col[1]>0.5, col[2]>0.5) for col in eachcol(noisy)]
	target = Flux.onehotbatch(truth, [true, false])
	
	gpu(x) = fmap(AMDGPU.roc, x) 
	cpu(x) = fmap(Array, x)
	
	model = Chain(
	    Dense(2 => 3, tanh),      
	    BatchNorm(3),
	    Dense(3 => 2)) |> gpu
	
	out1 = model(noisy |> gpu)
	probs1 = softmax(out1) |> cpu
	
	loader = Flux.DataLoader((noisy, target), batchsize=64, shuffle=true);
	opt_state = Flux.setup(Flux.Adam(0.01), model)
	
	losses = []
	for epoch in 1:1_000
	    for (x_cpu, y_cpu) in loader
	        x, y = x_cpu |> gpu, y_cpu |> gpu
	        
	        loss, grads = Flux.withgradient(model) do m
	            y_hat = m(x)
	            Flux.logitcrossentropy(y_hat, y)
	        end
	        Flux.update!(opt_state, model, grads[1])
	        push!(losses, loss)  
	    end
	end
	
	out2 = model(noisy |> gpu)         
	probs2 = softmax(out2) |> cpu         
	acc = mean((probs2[1,:] .> 0.5) .== truth)
	@info "Acurácia Final: $acc"

	# Plots
	p_true = scatter(noisy[1,:], noisy[2,:], zcolor=truth, title="True")
	p_raw =  scatter(noisy[1,:], noisy[2,:], zcolor=probs1[1,:], title="Untrained", clims=(0,1))
	p_done = scatter(noisy[1,:], noisy[2,:], zcolor=probs2[1,:], title="Trained")
	plosses = plot(losses; xaxis=(:log10, "iteration"), yaxis="loss", label="per batch")
	n = length(loader)
	plot!(n:n:length(losses), mean.(Iterators.partition(losses, n)),
    label="epoch mean", dpi=200)
	plot(p_true, p_raw, p_done, plosses, layout=(2,3), size=(1000,330))
end