Why is this raytracer slow?

Oscar_Smith · April 14, 2021, 3:26pm

No, this still happens. in this case. There is some black magic in the parser that causes x^-3 to get lowered to Base.literal_pow(^,x,Val(-3) automatically.

linwaytin · April 14, 2021, 3:29pm

I added @inline to most functions for a test.
The result is about 83ms, versus 113ms when there is no @inline. The 83ms is similar to the speed of the C code on my machine.
It seems @inline does speed up the code, but I don’t really know why. Does that mean the function call overhead is not small?

linwaytin · April 14, 2021, 3:36pm

Yes, but other programs also use float64, so I think keeping this is good for the comparison.

Oscar_Smith · April 14, 2021, 3:45pm

Inlining is important for 2 reasons. Firstly, function call overhead is not large, but isn’t tiny. It’s somewhere around the cost of a single complex instruction (like a div instruction). More importantly, however inlining allows other optimizations such as vectorization which can be a major win.

greatpet · April 14, 2021, 3:57pm

Does that mean Julia (as currently implemented) is less good at automatic inlining than e.g. GCC?

Oscar_Smith · April 14, 2021, 3:58pm

Generally Julia inlines very aggressively compared to C. It’s not perfect though. Cost modeling is hard.

lmiq · April 19, 2021, 12:20am

One followup from this thread. The Fortran version of the code uses a single type for the “Things”, because they have the same fields:

    type TThing
        integer type ! 1 for plane, 2 for sphere
        integer surfaceType
        type(TVector) :: centerOrNormal
        real(8)       :: radiusOrOffset
    end type

of course that facilitates avoiding run-time dispatch. Using the same structure in the Julia code makes it much less allocating and a little faster. Here the Fortran version runs in 90ms and the Julia one in 115ms.

This is the Julia version (I have played a little bit with it, but from the point of view of performance that one is the most important difference - the type-unstable version runs in 130ms with 1.4m allocs):

Code

using StaticArrays
using LinearAlgebra

@enum Surface begin
    Shiny        = 1
    CheckerBoard = 2
end

struct Vec3{T} <: FieldVector{3,T}
    x::T
    y::T
    z::T
end
# dot from LinearAlgebra is much slower
dot(a::Vec3, b::Vec3) =  a.x * b.x + a.y * b.y + a.z * b.z

struct Color{T} <: FieldVector{3,T}
    r::T
    g::T
    b::T
end
dot(a::Color, b::Color) =  a.x * b.x + a.y * b.y + a.z * b.z

struct RGBColor <: FieldVector{4,UInt8}
    b::UInt8
    g::UInt8
    r::UInt8
    a::UInt8
end

struct Image
    width::Int
    height::Int
    data::Vector{RGBColor}
end
function Image(w::Int, h::Int)
  data = Vector{RGBColor}(undef, w * h);
  Image(w, h, data)
end

struct Camera{T}
    pos::Vec3{T}
    forward::Vec3{T}
    right::Vec3{T}
    up::Vec3{T}
end
function Camera{T}(pos::Vec3{T}, lookAt::Vec3{T}) where T
    _down = Vec3{T}(0.0, -1.0, 0.0);
    _forward = lookAt - pos
    forward = normalize(_forward)
    right = normalize(cross(forward, _down)) * 1.5
    up = normalize(cross(forward, right)) * 1.5   
    Camera{T}(pos, forward, right, up)
end

struct Ray{T}
    start::Vec3{T}
    dir::Vec3{T}
end

struct Thing{T}
  type::Int # 1 plane, 2 sphere
  vec::Vec3{T} # normal or center
  r::T # offset or radius
  surface::Surface
end
  
struct Intersection{T}
    thing::Thing{T}
    ray::Ray{T}
    dist::T
end

struct Light{T}
    pos::Vec3{T}
    color::Color{T}
end

struct SurfaceProperties{T}
    diffuse::Color{T}
    specular::Color{T}
    reflect::T
    roughness::T
end

const WORD  = UInt16
const DWORD = UInt32
const LONG  = Int32

struct BITMAPINFOHEADER
    biSize::DWORD
    biWidth::LONG
    biHeight::LONG
    biPlanes::WORD
    biBitCount::WORD
    biCompression::DWORD
    biSizeImage::DWORD
    biXPelsPerMeter::LONG
    biYPelsPerMeter::LONG
    biClrUsed::DWORD
    biClrImportant::DWORD
end
function BITMAPINFOHEADER(w::LONG, h::LONG)
    size = sizeof(BITMAPINFOHEADER)
    sizeImage = w * h * 4
    BITMAPINFOHEADER(size, w, -h, 1, 32, 0, sizeImage, 0, 0, 0, 0)
end

struct BITMAPFILEHEADER
    bfType::WORD
    bfSize::DWORD
    bfReserved::DWORD
    bfOffBits::DWORD
end
function BITMAPFILEHEADER(imageSize::DWORD)
    bfType =  0x4D42 # 'B' + ('M' << 8)
    offBits = 54
    size = convert(DWORD, offBits + imageSize);
    BITMAPFILEHEADER(bfType, size, 0, offBits)
end

struct Scene{T}
    things::Vector{Thing{T}}
    lights::Vector{Light{T}}
    camera::Camera{T}
    function Scene(T)
        things = [ Thing{T}(1,Vec3{T}(0.0, 1.0, 0.0), 0.0,  CheckerBoard), # plane
                   Thing{T}(2,Vec3{T}(0.0, 1.0, -0.25), 1.0, Shiny),       # sphere
                   Thing{T}(2,Vec3{T}(-1.0, 0.5, 1.5), 0.5*0.5, Shiny) ]   # sphere
        lights = [
            Light{T}(Vec3{T}(-2.0, 2.5, 0.0), Color{T}(0.49, 0.07, 0.07))
            Light{T}(Vec3{T}(1.5, 2.5, 1.5), Color{T}(0.07, 0.07, 0.49))
            Light{T}(Vec3{T}(1.5, 2.5, -1.5), Color{T}(0.07, 0.49, 0.071))
            Light{T}(Vec3{T}(0.0, 3.5, 0.0), Color{T}(0.21, 0.21, 0.35))
        ]
        camera = Camera{T}(Vec3{T}(3.0, 2.0, 4.0), Vec3{T}(-1.0, 0.5, 0.0));
        new{T}(things, lights, camera)
    end
end

const maxDepth = 5
White(T) = Color{T}(1,1,1)
Grey(T) = Color{T}(0.5,0.5,0.5)
Black(T) = Color{T}(0,0,0)
Background(T) = Black(T)
Defaultcolor(T) = Black(T)

function GetNormal(t::Thing, pos::Vec3) 
  t.type == 1 && return t.vec
  t.type == 2 && return normalize(pos-t.vec)
end

function ToRgbColor(c::Color)
    b = floor(UInt8, clamp(c.b, 0.0, 1.0) * 255.0)
    g = floor(UInt8, clamp(c.g, 0.0, 1.0) * 255.0)
    r = floor(UInt8, clamp(c.r, 0.0, 1.0) * 255.0)
    a = convert(UInt8, 255)
    return RGBColor(b, g, r, a)
end

function GetPoint(camera::Camera, x::Int, y::Int, screenWidth::Int, screenHeight::Int)
    recenterX = (x - (screenWidth / 2.0)) / 2.0 / screenWidth
    recenterY = -(y - (screenHeight / 2.0)) / 2.0 / screenHeight
    return normalize(camera.forward + ((camera.right * recenterX) + (camera.up * recenterY)))
end

@inline function GetInteresection(t::Thing{T}, ray::Ray{T}) where T
  if t.type == 1
    denom = dot(t.vec, ray.dir)
    if denom <= 0
      dist = (dot(t.vec, ray.start) + t.r) / (-denom)
      return Intersection(t, ray, dist)
    end
  elseif t.type == 2
    eo = (t.vec - ray.start)
    v = dot(eo, ray.dir)
    dist = zero(T)
    if (v >= 0)
        disc = t.r - (dot(eo, eo) - v * v)
        dist = (disc >= 0) ? v - sqrt(disc) : dist
    end
    if dist > 0
      return Intersection(t, ray, dist)
    end
  end
  nothing
end

function GetSurfaceProperties(s::Surface, pos::Vec3{T}) where T
    if s == Shiny
        return SurfaceProperties(White(T), Grey(T), T(0.7), T(250.0))
    else
        xz = floor(pos.x) + floor(pos.z)
        condition = mod(xz, 2) != 0
        color = condition ? White(T) : Black(T)
        reflect = condition ? 0.1 : 0.7
        SurfaceProperties(color, White(T), T(reflect), T(250.0))
    end
end

function Save(image::Image, fileName::String)

    w = convert(LONG, image.width)
    h = convert(LONG, image.height)

    infoHeader = BITMAPINFOHEADER(w, h)

    offBits = convert(DWORD, 54)
    size  = convert(DWORD, offBits + infoHeader.biSizeImage)

    fileHeader = BITMAPFILEHEADER(infoHeader.biSizeImage) 

    f = open(fileName, "w")
    write(f, fileHeader.bfType)
    write(f, fileHeader.bfSize)
    write(f, fileHeader.bfReserved)
    write(f, fileHeader.bfOffBits)
  
    write(f, infoHeader.biSize)
    write(f, infoHeader.biWidth)
    write(f, infoHeader.biHeight)
    write(f, infoHeader.biPlanes)
    write(f, infoHeader.biBitCount)
    write(f, infoHeader.biCompression)
    write(f, infoHeader.biSizeImage)
    write(f, infoHeader.biXPelsPerMeter)
    write(f, infoHeader.biYPelsPerMeter)
    write(f, infoHeader.biClrUsed)
    write(f, infoHeader.biClrImportant)

    for c in image.data
        write(f, c.b)
        write(f, c.g)
        write(f, c.r)
        write(f, c.a)
    end
    close(f)
end

function GetClosestIntersection(scene::Scene{T}, ray::Ray{T}) where T
    closest = floatmax(T)
    closestInter = nothing
    for i in eachindex(scene.things)
       inter = GetInteresection(scene.things[i], ray)
       if (inter != nothing && inter.dist < closest) 
           closestInter = inter
           closest = inter.dist
       end
    end
    return closestInter;
end

function TraceRay(scene::Scene{T}, ray::Ray, depth::Int) where T
    isect = GetClosestIntersection(scene, ray);
    isect != nothing ? Shade(scene, isect, depth) : Background(T)
end 

function Shade(scene::Scene{T}, isect::Intersection, depth::Int) where T
    d = isect.ray.dir;
    pos = (d * isect.dist) + isect.ray.start;
    normal = GetNormal(isect.thing, pos);

    reflectDir = normalize(d .- (2*(normal * dot(normal,d))))

    surface =  GetSurfaceProperties(isect.thing.surface, pos);

    naturalColor = Background(T) + GetNaturalColor(scene, surface, pos, normal, reflectDir);
    reflectedColor = (depth >= maxDepth) ? Grey(T) : GetReflectionColor(scene, surface, pos, reflectDir, depth);

    return naturalColor + reflectedColor;
end 

function GetReflectionColor(scene::Scene, surface::SurfaceProperties, pos::Vec3, reflectDir::Vec3, depth::Int)
    ray = Ray(pos, reflectDir)
    color = TraceRay(scene, ray, depth + 1)
    return color*surface.reflect
end 

function GetNaturalColor(scene::Scene{T}, surface::SurfaceProperties, pos::Vec3, normv::Vec3, reflectDir::Vec3) where T
    result = Black(T)

    for light in scene.lights
        ldist = light.pos - pos
        livec = normalize(ldist)
        ray = Ray(pos, livec)

        neatIsect = GetClosestIntersection(scene, ray)
        isInShadow = neatIsect != nothing && neatIsect.dist < norm(ldist)

        if (!isInShadow) 
            illum    = dot(livec,normv)
            specular = dot(livec,reflectDir)

            lcolor = (illum > 0) ? light.color*illum : Defaultcolor(T)
            scolor = (specular > 0) ? light.color*(specular^surface.roughness) : Defaultcolor(T)
            result = result + (lcolor .* surface.diffuse) + (scolor .* surface.specular)            
        end
    end
    return result
end 

function Render(scene::Scene{T}) where T  
    camera = scene.camera
    image = Image(500, 500)

    w = image.width - 1
    h = image.height - 1

    for y in 0:h, x in 0:w
        pt    = GetPoint(camera, x, y, w, h)
        ray   = Ray{T}(scene.camera.pos, pt)
        color = TraceRay(scene, ray, 0)
        image.data[y * image.width + x + 1] = ToRgbColor(color)
    end
    return image
end

scene = Scene(Float64)
image = Render(scene)
Save(image, "julia-ray64.bmp")

using BenchmarkTools
@btime begin
    scene = Scene(Float64)
    image = Render($scene)
end

If anyone is interested, what I call the type-unstable version is this one. It is somewhat edited from the original one from the site, but the performance is the same.

Code

using StaticArrays
using LinearAlgebra

@enum Surface begin
    Shiny        = 1
    CheckerBoard = 2
end

struct Vec3{T} <: FieldVector{3,T}
    x::T
    y::T
    z::T
end
dot(a::Vec3, b::Vec3) =  a.x * b.x + a.y * b.y + a.z * b.z

struct Color{T} <: FieldVector{3,T}
    r::T
    g::T
    b::T
end
dot(a::Color, b::Color) =  a.x * b.x + a.y * b.y + a.z * b.z

struct RGBColor <: FieldVector{4,UInt8}
    b::UInt8
    g::UInt8
    r::UInt8
    a::UInt8
end

struct Image
    width::Int
    height::Int
    data::Vector{RGBColor}
end
function Image(w::Int, h::Int)
  data = Vector{RGBColor}(undef, w * h);
  Image(w, h, data)
end

struct Camera{T}
    pos::Vec3{T}
    forward::Vec3{T}
    right::Vec3{T}
    up::Vec3{T}
end
function Camera{T}(pos::Vec3{T}, lookAt::Vec3{T}) where T
    _down = Vec3{T}(0.0, -1.0, 0.0);
    _forward = lookAt - pos
    forward = normalize(_forward)
    right = normalize(cross(forward, _down)) * 1.5
    up = normalize(cross(forward, right)) * 1.5   
    Camera{T}(pos, forward, right, up)
end

struct Ray{T}
    start::Vec3{T}
    dir::Vec3{T}
end

abstract type Thing{T} end

struct Plane{T} <: Thing{T}
    normal::Vec3{T}
    offset::T
    surface::Surface
end

struct Sphere{T} <: Thing{T}
    center::Vec3{T}
    radius2::T
    surface::Surface
end

struct Intersection{T}
    thing::Thing{T}
    ray::Ray{T}
    dist::T
end

struct Light{T}
    pos::Vec3{T}
    color::Color{T}
end

struct SurfaceProperties{T}
    diffuse::Color{T}
    specular::Color{T}
    reflect::T
    roughness::T
end

const WORD  = UInt16
const DWORD = UInt32
const LONG  = Int32

struct BITMAPINFOHEADER
    biSize::DWORD
    biWidth::LONG
    biHeight::LONG
    biPlanes::WORD
    biBitCount::WORD
    biCompression::DWORD
    biSizeImage::DWORD
    biXPelsPerMeter::LONG
    biYPelsPerMeter::LONG
    biClrUsed::DWORD
    biClrImportant::DWORD
end
function BITMAPINFOHEADER(w::LONG, h::LONG)
    size = sizeof(BITMAPINFOHEADER)
    sizeImage = w * h * 4
    BITMAPINFOHEADER(size, w, -h, 1, 32, 0, sizeImage, 0, 0, 0, 0)
end

struct BITMAPFILEHEADER
    bfType::WORD
    bfSize::DWORD
    bfReserved::DWORD
    bfOffBits::DWORD
end
function BITMAPFILEHEADER(imageSize::DWORD)
    bfType =  0x4D42 # 'B' + ('M' << 8)
    offBits = 54
    size = convert(DWORD, offBits + imageSize);
    BITMAPFILEHEADER(bfType, size, 0, offBits)
end

struct Scene{T}
    things::Vector{Vector{Thing{T}}}
    lights::Vector{Light{T}}
    camera::Camera{T}
    function Scene(T)
        things = [
            Plane{T}[ Plane{T}(Vec3{T}(0.0, 1.0, 0.0), 0.0,  CheckerBoard) ],
            Sphere{T}[ Sphere{T}(Vec3{T}(0.0, 1.0, -0.25), 1.0, Shiny)
                       Sphere{T}(Vec3{T}(-1.0, 0.5, 1.5), 0.5*0.5, Shiny) ]
        ]
        lights = [
            Light{T}(Vec3{T}(-2.0, 2.5, 0.0), Color{T}(0.49, 0.07, 0.07))
            Light{T}(Vec3{T}(1.5, 2.5, 1.5), Color{T}(0.07, 0.07, 0.49))
            Light{T}(Vec3{T}(1.5, 2.5, -1.5), Color{T}(0.07, 0.49, 0.071))
            Light{T}(Vec3{T}(0.0, 3.5, 0.0), Color{T}(0.21, 0.21, 0.35))
        ]
        camera = Camera{T}(Vec3{T}(3.0, 2.0, 4.0), Vec3{T}(-1.0, 0.5, 0.0));
        new{T}(things, lights, camera)
    end
end

const maxDepth = 5
White(T) = Color{T}(1,1,1)
Grey(T) = Color{T}(0.5,0.5,0.5)
Black(T) = Color{T}(0,0,0)
Background(T) = Black(T)
Defaultcolor(T) = Black(T)

GetNormal(p::Plane, pos::Vec3) = p.normal
GetNormal(s::Sphere, pos::Vec3) = normalize(pos - s.center)

function ToRgbColor(c::Color)
    b = floor(UInt8, clamp(c.b, 0.0, 1.0) * 255.0)
    g = floor(UInt8, clamp(c.g, 0.0, 1.0) * 255.0)
    r = floor(UInt8, clamp(c.r, 0.0, 1.0) * 255.0)
    a = convert(UInt8, 255)
    return RGBColor(b, g, r, a)
end

function GetPoint(camera::Camera, x::Int, y::Int, screenWidth::Int, screenHeight::Int)
    recenterX = (x - (screenWidth / 2.0)) / 2.0 / screenWidth
    recenterY = -(y - (screenHeight / 2.0)) / 2.0 / screenHeight
    return normalize(camera.forward + ((camera.right * recenterX) + (camera.up * recenterY)))
end

function GetInteresection(p::Plane{T}, ray::Ray{T}) where T
    denom = dot(p.normal, ray.dir)
    denom > 0.0 && return nothing
    dist = (dot(p.normal, ray.start) + p.offset) / (-denom)
    Intersection(p, ray, dist)
end

function GetInteresection(s::Sphere{T}, ray::Ray{T}) where T
    eo = (s.center - ray.start)
    v = dot(eo, ray.dir)
    dist = 0.0
    if (v >= 0.0)
        disc = s.radius2 - (dot(eo, eo) - v * v)
        dist = (disc >= 0.0) ? v - sqrt(disc) : dist
    end
    (dist == 0.0) ? nothing : Intersection(s, ray, dist)
end

function GetSurfaceProperties(s::Surface, pos::Vec3{T}) where T
    if s == Shiny
        return SurfaceProperties(White(T), Grey(T), T(0.7), T(250.0))
    else
        xz = floor(pos.x) + floor(pos.z)
        condition = mod(xz, 2) != 0
        color = condition ? White(T) : Black(T)
        reflect = condition ? 0.1 : 0.7
        SurfaceProperties(color, White(T), T(reflect), T(250.0))
    end
end

function Save(image::Image, fileName::String)

    w = convert(LONG, image.width)
    h = convert(LONG, image.height)

    infoHeader = BITMAPINFOHEADER(w, h)

    offBits = convert(DWORD, 54)
    size  = convert(DWORD, offBits + infoHeader.biSizeImage)

    fileHeader = BITMAPFILEHEADER(infoHeader.biSizeImage) 

    f = open(fileName, "w")
    write(f, fileHeader.bfType)
    write(f, fileHeader.bfSize)
    write(f, fileHeader.bfReserved)
    write(f, fileHeader.bfOffBits)
  
    write(f, infoHeader.biSize)
    write(f, infoHeader.biWidth)
    write(f, infoHeader.biHeight)
    write(f, infoHeader.biPlanes)
    write(f, infoHeader.biBitCount)
    write(f, infoHeader.biCompression)
    write(f, infoHeader.biSizeImage)
    write(f, infoHeader.biXPelsPerMeter)
    write(f, infoHeader.biYPelsPerMeter)
    write(f, infoHeader.biClrUsed)
    write(f, infoHeader.biClrImportant)

    for c in image.data
        write(f, c.b)
        write(f, c.g)
        write(f, c.r)
        write(f, c.a)
    end
    close(f)
end

function GetClosestIntersection(scene::Scene{T}, ray::Ray{T}) where T
    closest = floatmax(T)
    closestInter = nothing
    for thing_vec in scene.things
       for thing in thing_vec
          inter = GetInteresection(thing, ray)
          if (inter != nothing && inter.dist < closest) 
              closestInter = inter
              closest = inter.dist
          end
       end
    end
    return closestInter;
end

function TraceRay(scene::Scene{T}, ray::Ray, depth::Int) where T
    isect = GetClosestIntersection(scene, ray);
    isect != nothing ? Shade(scene, isect, depth) : Background(T)
end 

function Shade(scene::Scene{T}, isect::Intersection, depth::Int) where T
    d = isect.ray.dir;
    pos = (d * isect.dist) + isect.ray.start;
    normal = GetNormal(isect.thing, pos);

    reflectDir = normalize(d .- (2*(normal * dot(normal,d))))

    surface =  GetSurfaceProperties(isect.thing.surface, pos);

    naturalColor = Background(T) + GetNaturalColor(scene, surface, pos, normal, reflectDir);
    reflectedColor = (depth >= maxDepth) ? Grey(T) : GetReflectionColor(scene, surface, pos, reflectDir, depth);

    return naturalColor + reflectedColor;
end 

function GetReflectionColor(scene::Scene, surface::SurfaceProperties, pos::Vec3, reflectDir::Vec3, depth::Int)
    ray = Ray(pos, reflectDir)
    color = TraceRay(scene, ray, depth + 1)
    return color*surface.reflect
end 

function GetNaturalColor(scene::Scene{T}, surface::SurfaceProperties, pos::Vec3, normv::Vec3, reflectDir::Vec3) where T
    result = Black(T)

    for light in scene.lights
        ldist = light.pos - pos
        livec = normalize(ldist)
        ray = Ray(pos, livec)

        neatIsect = GetClosestIntersection(scene, ray)
        isInShadow = neatIsect != nothing && neatIsect.dist < norm(ldist)

        if (!isInShadow) 
            illum    = dot(livec,normv)
            specular = dot(livec,reflectDir)

            lcolor = (illum > 0) ? light.color*illum : Defaultcolor(T)
            scolor = (specular > 0) ? light.color*(specular^surface.roughness) : Defaultcolor(T)
            result = result + (lcolor .* surface.diffuse) + (scolor .* surface.specular)            
        end
    end
    return result
end 

function Render(scene::Scene{T}) where T  
    camera = scene.camera
    image = Image(500, 500)

    w = image.width - 1
    h = image.height - 1

    for y in 0:h, x in 0:w
        pt    = GetPoint(camera, x, y, w, h)
        ray   = Ray{T}(scene.camera.pos, pt)
        color = TraceRay(scene, ray, 0)
        image.data[y * image.width + x + 1] = ToRgbColor(color)
    end
    return image
end

scene = Scene(Float64)
image = Render(scene)
Save(image, "julia-ray64.bmp")

using BenchmarkTools
@btime begin
    scene = Scene(Float64)
    image = Render($scene)
end

sdanisch · May 5, 2021, 10:13am

Btw, I got the Julia version to run in ~50ms, while the nim version runs at 25ms…
I checked the nim version, and there are quite a few algorithmic differences which I tried to port, but I either misunderstand the Nim code or have a bug in my code, since my version only shows a black picture.
I’m fairly optimistic, that Julia will be just as fast, when correctly porting the optimizations from the Nim version
I will try to find some time this week to update the code and post it…

lmiq · May 5, 2021, 1:04pm

One thing to take into account here is something I learned recently in another of these compiled vs. julia comparisons:

In general, in the compiled codes we are comparing, the “data” is hard-coded. Thus, the sizes of the arrays and their types are known at compile time. Some parameters happen to be constant at compile time as well. In this particular case, for example, the compiler can do optimizations in the compiled codes taking into account the fact that the scene has only three “things”.

This is not a realistic scenario, as any other scene would have to be hard-coded as well.

In the Julia code the same thing does not happen: the scene can be changed and the same tracer functions will be called. This is comparable, probably, to a compiled version running on a scene defined by user-input data. It would be interesting to test what is the performance of all these codes if the Scene is defined at run time, not compile time.

In Julia it is possible (yet artificial) to inform the compiler of the number of elements of the scene, by putting the number in the type-signature of the “Scene”. Then the compilation of the tracer functions would be possible with the same level of optimization that occurs in hard-coded cases. New tracer functions would be compiled for every new scene, but that can even be an interesting option in some applications if the scenes do not change much.

lmiq · May 5, 2021, 3:11pm

I tried reading the data in the Fortran code and it did not make a perceivable difference. But note that the comparisons are dependent on the number of things in the scene. The original julia version runs faster than the original Fortran version if the number of “things” is 10.

The above version that I posted (the one that mimicked the Fortran version by using only one type of thing with a label, instead of different types), and is thus faster, is almost as fast as the nim version with 10 objects:

Nim:

nim c -d:danger -d:lto -d:intpow -d:quake --passC:"-march=nativ
e" -r nim10.nim 
Hint: used config file '/etc/nim/nim.cfg' [Conf]
Hint: operation successful (300 lines compiled; 0.043 sec total; 5.129MiB peakmem; Dangerous Release B
uild) [SuccessX]
Hint: /home/leandro/Drive/Work/JuliaPlay/nim10  [Exec]
Completed in 154.819779 ms

(and the times becomes similar if the intpow and quake compiler options are removed.

Julia:

leandro@pitico:~/Drive/Work/JuliaPlay% julia RayTracer_LMSingle10.jl 
  178.229 ms (40 allocations: 979.20 KiB)

Fortran:

leandro@pitico:~/Drive/Work/JuliaPlay% ./ray10.exe 
Render time:   253.07 ms.

The original Julia version from the site runs in 229ms for 10 objects here. Thus, faster than the Fortran version and 50% slower than the optimized nim version.

Stressing that the only thing I did there was to increse the number of objects in the nim and Fortran versions, like this:

diff RayTracer.nim nim10.nim 
153,154c153,161
<       initSphere(Vector(x: -1.0, y: 0.5, z: 1.5), 0.5, ShinySurface),
<       initSphere(Vector(x: 0.0, y: 1.0, z: -0.25), 1.0, ShinySurface)
---
>       initPlane(Vector(x: 1.0, y: 1.0, z: 0.0), 0.0, CheckerBoardSurface),
>       initPlane(Vector(x: 2.0, y: 1.0, z: 0.0), 0.0, CheckerBoardSurface),
>       initPlane(Vector(x: 3.0, y: 1.0, z: 0.0), 0.0, CheckerBoardSurface),
>       initPlane(Vector(x: 4.0, y: 1.0, z: 0.0), 0.0, CheckerBoardSurface),
>       initSphere(Vector(x: 0.0, y: 0.5, z: 1.5), 0.5, ShinySurface),
>       initSphere(Vector(x: -1.0, y: 1.0, z: -0.25), 1.0, ShinySurface),
>       initSphere(Vector(x: -2.0, y: 1.0, z: -0.25), 1.0, ShinySurface),
>       initSphere(Vector(x: -3.0, y: 1.0, z: -0.25), 1.0, ShinySurface),
>       initSphere(Vector(x: -4.0, y: 1.0, z: -0.25), 1.0, ShinySurface)

diff main.f95 main_10.f95 
538c538
<     allocate(scene%things(3))
---
>     allocate(scene%things(10))
543,544c543,551
<     scene%things(2) = CreateSphere(TVector( 0.0, 1.0, -0.25), 1.0d+0, SHINY_SURFACE)
<     scene%things(3) = CreateSphere(TVector(-1.0, 0.5,  1.5 ), 0.5d+0, SHINY_SURFACE)
---
>     scene%things(2) = CreatePlane (TVector( 1.0, 1.0,  0.0 ), 0.0d+0, CHECKERBOARD_SURFACE)
>     scene%things(3) = CreatePlane (TVector( 2.0, 1.0,  0.0 ), 0.0d+0, CHECKERBOARD_SURFACE)
>     scene%things(4) = CreatePlane (TVector( 3.0, 1.0,  0.0 ), 0.0d+0, CHECKERBOARD_SURFACE)
>     scene%things(5) = CreatePlane (TVector( 4.0, 1.0,  0.0 ), 0.0d+0, CHECKERBOARD_SURFACE)
>     scene%things(6) = CreateSphere(TVector( 0.0, 1.0, -0.25), 1.0d+0, SHINY_SURFACE)
>     scene%things(7) = CreateSphere(TVector(-1.0, 0.5,  1.5 ), 0.5d+0, SHINY_SURFACE)
>     scene%things(8) = CreateSphere(TVector(-2.0, 0.5,  1.5 ), 0.5d+0, SHINY_SURFACE)
>     scene%things(9) = CreateSphere(TVector(-3.0, 0.5,  1.5 ), 0.5d+0, SHINY_SURFACE)
>     scene%things(10) = CreateSphere(TVector(-4.0, 0.5,  1.5 ), 0.5d+0, SHINY_SURFACE)
565c572

The Julia version I am running here is:

Code

using StaticArrays
using LinearAlgebra

@enum Surface begin
    Shiny        = 1
    CheckerBoard = 2
end

struct Vec3{T} <: FieldVector{3,T}
    x::T
    y::T
    z::T
end
dot(a::Vec3, b::Vec3) =  a.x * b.x + a.y * b.y + a.z * b.z

struct Color{T} <: FieldVector{3,T}
    r::T
    g::T
    b::T
end
dot(a::Color, b::Color) =  a.x * b.x + a.y * b.y + a.z * b.z

struct RGBColor <: FieldVector{4,UInt8}
    b::UInt8
    g::UInt8
    r::UInt8
    a::UInt8
end

struct Image
    width::Int
    height::Int
    data::Vector{RGBColor}
end
function Image(w::Int, h::Int)
  data = Vector{RGBColor}(undef, w * h);
  Image(w, h, data)
end

struct Camera{T}
    pos::Vec3{T}
    forward::Vec3{T}
    right::Vec3{T}
    up::Vec3{T}
end
function Camera{T}(pos::Vec3{T}, lookAt::Vec3{T}) where T
    _down = Vec3{T}(0.0, -1.0, 0.0);
    _forward = lookAt - pos
    forward = normalize(_forward)
    right = normalize(cross(forward, _down)) * 1.5
    up = normalize(cross(forward, right)) * 1.5   
    Camera{T}(pos, forward, right, up)
end

struct Ray{T}
    start::Vec3{T}
    dir::Vec3{T}
end

struct Thing{T}
  type::Int # 1 plane, 2 sphere
  vec::Vec3{T} # normal or center
  r::T # offset or radius
  surface::Surface
end
  
struct Intersection{T}
    thing::Thing{T}
    ray::Ray{T}
    dist::T
end

struct Light{T}
    pos::Vec3{T}
    color::Color{T}
end

struct SurfaceProperties{T}
    diffuse::Color{T}
    specular::Color{T}
    reflect::T
    roughness::T
end

const WORD  = UInt16
const DWORD = UInt32
const LONG  = Int32

struct BITMAPINFOHEADER
    biSize::DWORD
    biWidth::LONG
    biHeight::LONG
    biPlanes::WORD
    biBitCount::WORD
    biCompression::DWORD
    biSizeImage::DWORD
    biXPelsPerMeter::LONG
    biYPelsPerMeter::LONG
    biClrUsed::DWORD
    biClrImportant::DWORD
end
function BITMAPINFOHEADER(w::LONG, h::LONG)
    size = sizeof(BITMAPINFOHEADER)
    sizeImage = w * h * 4
    BITMAPINFOHEADER(size, w, -h, 1, 32, 0, sizeImage, 0, 0, 0, 0)
end

struct BITMAPFILEHEADER
    bfType::WORD
    bfSize::DWORD
    bfReserved::DWORD
    bfOffBits::DWORD
end
function BITMAPFILEHEADER(imageSize::DWORD)
    bfType =  0x4D42 # 'B' + ('M' << 8)
    offBits = 54
    size = convert(DWORD, offBits + imageSize);
    BITMAPFILEHEADER(bfType, size, 0, offBits)
end

struct Scene{T}
    things::Vector{Thing{T}}
    lights::Vector{Light{T}}
    camera::Camera{T}
    function Scene(T)
        things = [ Thing{T}(1,Vec3{T}(0.0, 1.0, 0.0), 0.0,  CheckerBoard), # plane
                   Thing{T}(1,Vec3{T}(1.0, 1.0, 0.0), 0.0,  CheckerBoard), # plane
                   Thing{T}(1,Vec3{T}(2.0, 1.0, 0.0), 0.0,  CheckerBoard), # plane
                   Thing{T}(1,Vec3{T}(3.0, 1.0, 0.0), 0.0,  CheckerBoard), # plane
                   Thing{T}(1,Vec3{T}(4.0, 1.0, 0.0), 0.0,  CheckerBoard), # plane
                   Thing{T}(2,Vec3{T}(0.0, 1.0, -0.25), 1.0, Shiny),       # sphere
                   Thing{T}(2,Vec3{T}(-1.0, 0.5, 1.5), 0.5*0.5, Shiny),
                   Thing{T}(2,Vec3{T}(-2.0, 0.5, 1.5), 0.5*0.5, Shiny), 
                   Thing{T}(2,Vec3{T}(-3.0, 0.5, 1.5), 0.5*0.5, Shiny), 
                   Thing{T}(2,Vec3{T}(-4.0, 0.5, 1.5), 0.5*0.5, Shiny) 
             ] 
        lights = [
            Light{T}(Vec3{T}(-2.0, 2.5, 0.0), Color{T}(0.49, 0.07, 0.07))
            Light{T}(Vec3{T}(1.5, 2.5, 1.5), Color{T}(0.07, 0.07, 0.49))
            Light{T}(Vec3{T}(1.5, 2.5, -1.5), Color{T}(0.07, 0.49, 0.071))
            Light{T}(Vec3{T}(0.0, 3.5, 0.0), Color{T}(0.21, 0.21, 0.35))
        ]
        camera = Camera{T}(Vec3{T}(3.0, 2.0, 4.0), Vec3{T}(-1.0, 0.5, 0.0));
        new{T}(things, lights, camera)
    end
end

const maxDepth = 5
White(T) = Color{T}(1,1,1)
Grey(T) = Color{T}(0.5,0.5,0.5)
Black(T) = Color{T}(0,0,0)
Background(T) = Black(T)
Defaultcolor(T) = Black(T)

function GetNormal(t::Thing, pos::Vec3) 
  t.type == 1 && return t.vec
  t.type == 2 && return normalize(pos-t.vec)
end

function ToRgbColor(c::Color)
    b = floor(UInt8, clamp(c.b, 0.0, 1.0) * 255.0)
    g = floor(UInt8, clamp(c.g, 0.0, 1.0) * 255.0)
    r = floor(UInt8, clamp(c.r, 0.0, 1.0) * 255.0)
    a = convert(UInt8, 255)
    return RGBColor(b, g, r, a)
end

function GetPoint(camera::Camera, x::Int, y::Int, screenWidth::Int, screenHeight::Int)
    recenterX = (x - (screenWidth / 2.0)) / 2.0 / screenWidth
    recenterY = -(y - (screenHeight / 2.0)) / 2.0 / screenHeight
    return normalize(camera.forward + ((camera.right * recenterX) + (camera.up * recenterY)))
end

@inline function GetInteresection(t::Thing{T}, ray::Ray{T}) where T
  if t.type == 1
    denom = dot(t.vec, ray.dir)
    if denom <= 0
      dist = (dot(t.vec, ray.start) + t.r) / (-denom)
      return Intersection(t, ray, dist)
    end
  elseif t.type == 2
    eo = (t.vec - ray.start)
    v = dot(eo, ray.dir)
    dist = zero(T)
    if (v >= 0)
        disc = t.r - (dot(eo, eo) - v * v)
        dist = (disc >= 0) ? v - sqrt(disc) : dist
    end
    if dist > 0
      return Intersection(t, ray, dist)
    end
  end
  nothing
end

function GetSurfaceProperties(s::Surface, pos::Vec3{T}) where T
    if s == Shiny
        return SurfaceProperties(White(T), Grey(T), T(0.7), T(250.0))
    else
        xz = floor(pos.x) + floor(pos.z)
        condition = mod(xz, 2) != 0
        color = condition ? White(T) : Black(T)
        reflect = condition ? 0.1 : 0.7
        SurfaceProperties(color, White(T), T(reflect), T(250.0))
    end
end

function Save(image::Image, fileName::String)

    w = convert(LONG, image.width)
    h = convert(LONG, image.height)

    infoHeader = BITMAPINFOHEADER(w, h)

    offBits = convert(DWORD, 54)
    size  = convert(DWORD, offBits + infoHeader.biSizeImage)

    fileHeader = BITMAPFILEHEADER(infoHeader.biSizeImage) 

    f = open(fileName, "w")
    write(f, fileHeader.bfType)
    write(f, fileHeader.bfSize)
    write(f, fileHeader.bfReserved)
    write(f, fileHeader.bfOffBits)
  
    write(f, infoHeader.biSize)
    write(f, infoHeader.biWidth)
    write(f, infoHeader.biHeight)
    write(f, infoHeader.biPlanes)
    write(f, infoHeader.biBitCount)
    write(f, infoHeader.biCompression)
    write(f, infoHeader.biSizeImage)
    write(f, infoHeader.biXPelsPerMeter)
    write(f, infoHeader.biYPelsPerMeter)
    write(f, infoHeader.biClrUsed)
    write(f, infoHeader.biClrImportant)

    for c in image.data
        write(f, c.b)
        write(f, c.g)
        write(f, c.r)
        write(f, c.a)
    end
    close(f)
end

function GetClosestIntersection(scene::Scene{T}, ray::Ray{T}) where T
    closest = floatmax(T)
    closestInter = nothing
    for i in eachindex(scene.things)
       inter = GetInteresection(scene.things[i], ray)
       if (inter != nothing && inter.dist < closest) 
           closestInter = inter
           closest = inter.dist
       end
    end
    return closestInter;
end

function TraceRay(scene::Scene{T}, ray::Ray, depth::Int) where T
    isect = GetClosestIntersection(scene, ray);
    isect != nothing ? Shade(scene, isect, depth) : Background(T)
end 

function Shade(scene::Scene{T}, isect::Intersection, depth::Int) where T
    d = isect.ray.dir;
    pos = (d * isect.dist) + isect.ray.start;
    normal = GetNormal(isect.thing, pos);

    reflectDir = normalize(d .- (2*(normal * dot(normal,d))))

    surface =  GetSurfaceProperties(isect.thing.surface, pos);

    naturalColor = Background(T) + GetNaturalColor(scene, surface, pos, normal, reflectDir);
    reflectedColor = (depth >= maxDepth) ? Grey(T) : GetReflectionColor(scene, surface, pos, reflectDir, depth);

    return naturalColor + reflectedColor;
end 

function GetReflectionColor(scene::Scene, surface::SurfaceProperties, pos::Vec3, reflectDir::Vec3, depth::Int)
    ray = Ray(pos, reflectDir)
    color = TraceRay(scene, ray, depth + 1)
    return color*surface.reflect
end 

function GetNaturalColor(scene::Scene{T}, surface::SurfaceProperties, pos::Vec3, normv::Vec3, reflectDir::Vec3) where T
    result = Black(T)

    for light in scene.lights
        ldist = light.pos - pos
        livec = normalize(ldist)
        ray = Ray(pos, livec)

        neatIsect = GetClosestIntersection(scene, ray)
        isInShadow = neatIsect != nothing && neatIsect.dist < norm(ldist)

        if (!isInShadow) 
            illum    = dot(livec,normv)
            specular = dot(livec,reflectDir)

            lcolor = (illum > 0) ? light.color*illum : Defaultcolor(T)
            scolor = (specular > 0) ? light.color*(specular^surface.roughness) : Defaultcolor(T)
            result = result + (lcolor .* surface.diffuse) + (scolor .* surface.specular)            
        end
    end
    return result
end 

function Render(scene::Scene{T}) where T  
    camera = scene.camera
    image = Image(500, 500)

    w = image.width - 1
    h = image.height - 1

    for y in 0:h, x in 0:w
        pt    = GetPoint(camera, x, y, w, h)
        ray   = Ray{T}(scene.camera.pos, pt)
        color = TraceRay(scene, ray, 0)
        image.data[y * image.width + x + 1] = ToRgbColor(color)
    end
    return image
end

scene = Scene(Float64)
image = Render(scene)
Save(image, "julia-ray64.bmp")

using BenchmarkTools
@btime begin
    scene = Scene(Float64)
    image = Render($scene)
end

Topic		Replies	Views
Ray Tracing in a week-end - Julia vs SIMD-optimized C++ Performance gpu , graphics , benchmark , pluto , staticarrays	81	14021	August 9, 2024
I translated Peter Shirley's Raytracer to Julia. C++ 1min -> Julia 2m30s -> 15s (@threads) Visualization images , graphics	27	3189	February 21, 2022
Raytracing weekend: debugging unexpected allocations New to Julia performance	3	453	June 22, 2022
Slower execution and more GC in command prompt than VSCode Performance	5	210	May 14, 2023
Julia 1.11 beta high latency Internals & Design	25	2824	September 17, 2024

Why is this raytracer slow?

Related topics