Even more bare baremodule

Is there a way to create a baremodule that doesn’t import Core? Currently, baremodule does using Core, so it’s namespace really isn’t as bare as one might like because Core exports 97 names:

julia> names(Core)
97-element Vector{Symbol}:
 :<:
 :(===)
 :AbstractArray
 :AbstractChar
 :AbstractFloat
 :AbstractString
 :Any
 :ArgumentError
 :Array
 :AssertionError
 :Bool
 :BoundsError
 :Char
 :Core
 :Cvoid
 :DataType
 :DenseArray
 :DivideError
 :DomainError
 :ErrorException
 :Exception
 :Expr
 :Float16
 :Float32
 :Float64
 :Function
 :GlobalRef
 :IO
 :InexactError
 ⋮
 :UInt32
 :UInt64
 :UInt8
 :UndefInitializer
 :UndefKeywordError
 :UndefRefError
 :UndefVarError
 :Union
 :UnionAll
 :Unsigned
 :Vararg
 :VecElement
 :WeakRef
 :applicable
 :eval
 :fieldtype
 :getfield
 :ifelse
 :invoke
 :isa
 :isdefined
 :nfields
 :nothing
 :setfield!
 :throw
 :tuple
 :typeassert
 :typeof
 :undef

which are all visible in the module’s namespace:

julia> baremodule M end
Main.M

julia> M.Expr
Expr

julia> M.undef
UndefInitializer(): array initializer with undefined values

Is there some way to create a module with a truly empty namespace? The idea would be that the only way to put anything in this module would be to eval from the outside.

This came up on Zulip when @ColinCaine was trying to build a sandboxed environment: Zulip

4 Likes

Sandboxes are useful for defining smaller teaching languages (see Racket and the HTDP2e book) and, if they can be made secure, for evaluating untrusted code in a capability programming paradigm.

Here’s my attempt at building a sandbox with the currently available tools. Big thanks to Mason (who wrote the first version) and Thautwarm (who wrote MLStyle.jl).

Executing this code should dump you in a Sandbox REPL where the only literals available are true and false and the only function is nand(a, b). You are invited to build your own computer from there :wink:

It’s just a proof of concept, so don’t mind the slightly strange design decisions.

# With thanks to Mason Protter and Thautwarm :)

using ReplMaker, MLStyle

baremodule Sandbox end
@eval Sandbox nand(x, y) = $(!)($(&)(x, y))


name_shadow_dict = let 
    # ccall isn't in Core, it's a special-cased thing that appears in the AST
    # as a regular function call. I think it's the only one but idk.
    forbidden_names = setdiff([:ccall, names(Core; all=true, imported=true)...],
                              names(Sandbox; all=true, imported=true))
    Dict(n => gensym() for n in forbidden_names)
end
inverse_name_shadow_dict = Dict(reverse.(collect(name_shadow_dict)))


function sandbox_eval_expr(ex)
    filterer(ex) = @match ex begin
        x::Bool => x

        x::LineNumberNode => x

        # Ban everything.
        # Edit anything you want to allow.

        # It's a bit complicated to work out when a symbol is being used as a
        # reference and what scope it is in. I think the JuliaVariables people
        # have maybe done something with that?
        #
        # Anyway, we can keep it simple and just shadow all symbols accessible
        # in a baremodule.
        s::Symbol => get(name_shadow_dict, s, s)

        # Not allowed to import stuff
        Expr(:using, _...) ||
        Expr(:import, _...) => error("Imports are not permitted in the sandbox: $ex")

        # Julia literals, these are mostly or entirely harmless (I think)
        # but you might want to omit them or rewrite them to something else or whatever.
        ::Bool ||
        ::Number ||
        ::String ||
        Expr(:string, _...) || # string interpolation
        ::QuoteNode ||
        Expr(:quote, _...) ||
        Expr(:ref, _...) || # a[i], but also Int[]
        Expr(:typed_vcat, _...) ||
        Expr(:typed_hcat, _...) ||
        Expr(:vect, _...) ||
        Expr(:vcat, _...) ||
        Expr(:hcat, _...) ||
        Expr(:tuple, _...) || # also covers named tuples
        Expr(:comprehension, _...) ||
        Expr(:typed_comprehension, _...) ||
        Expr(:generator, _...) => error("This literal is not permitted in the sandbox: $ex")

        # Recurse
        Expr(head, args...) => Expr(head, filterer.(args)...)

        x => error("Unknown node type: $x")
    end
    # The try-catch is just fixing up error messages, you can ignore it.
    :(
        try
            (@eval Sandbox $(filterer(ex)))
        catch e
            if e isa UndefVarError && haskey(inverse_name_shadow_dict, e.var)
                rethrow(UndefVarError(inverse_name_shadow_dict[e.var]))
            else
                rethrow()
            end
        end
    )
end


function sandbox_parser(s::String)
    ex = Meta.parse(s)
    sandbox_eval_expr(ex)
end


function valid_julia(s)
    input = String(take!(copy(ReplMaker.LineEdit.buffer(s))))
    ex = Meta.parse(input)
    !(ex isa Expr && ex.head == :incomplete)
end


sandbox_mode = initrepl(sandbox_parser;
                        prompt_text="Sandbox> ",
                        prompt_color = :yellow,
                        startup_text = false,
                        mode_name = :sandbox,
                        valid_input_checker = valid_julia)


enter_mode!(sandbox_mode)

Edit: you can also escape this with module X end; X.eval(...), but that’s trivial to ban.

6 Likes

Edit: v3 is basically the same but with better tests, and available as a package here: GitHub - cmcaine/Sandboxes.jl

A v2 with slightly clearer code:

module Sandboxes

using MLStyle: @match
using ReplMaker: ReplMaker, enter_mode!, initrepl

"""
    baremod()

Return an anonymous baremodule
"""
baremod() = @eval baremodule $(gensym()) end

# TODO: check you don't need `names(Core; all=true)` here.
const core_name_remaps = Dict(n => gensym() for n in [:ccall, names(Core)...])
const inverse_core_name_remaps = Dict(reverse.(collect(core_name_remaps)))

"""
    sandboxify(ex:Expr)

Return an expression that, when evaluated in a baremodule, cannot:

- access identifiers in `Core`
- import other modules
- define new modules (this would allow escape by `eval`)

To punch holes in the sandbox, interpolate values into the expression, like this:

```
sandboxify( :( nand(a, b) = \$(~)(\$(&)(a, b)) ) )
```

The above example will return an expression for a function that will internally
call `~` and `&` as defined in the scope you created the expression in, rather
than as defined in the sandbox.
"""
function sandboxify(ex)
    filterer(ex) = @match ex begin
        # Map names that are in Core to generated symbols so that they cannot be accessed.
        s::Symbol => get(core_name_remaps, s, s)

        # This is to prevent `baremodule X end; X.Core`.
        # QuoteNode remapping will unfortunately make it impossible to access a
        # property with a reserved name on a struct you've imported into the
        # sandbox and will make code that deals with literal QuoteNodes pretty
        # weird and error prone.
        # Maybe this can be done with less breakage.
        # QuoteNode(s) => QuoteNode(get(core_name_remaps, s, s))

        # For now let's ban all modules so we don't need to rewrite QuoteNodes
        Expr(:module, _...) => error("You cannot define modules in this sandbox.")

        # Not allowed to import stuff
        Expr(:using, _...) ||
        Expr(:import, _...) => error("Imports are not permitted in this sandbox: $ex")

        # This allows trivial escape via `eval` and `Base`
        Expr(:module, true, _...) => error("You cannot define non-bare modules in this sandbox.")

        # Recurse
        Expr(head, args...) => Expr(head, filterer.(args)...)

        # Fallback
        x => x
    end
    filterer(ex)
end

"""
    sandboxed_eval_expr(m::Module, ex::Expr)

Return an expression that, when evaluated, will safely evaluate some transformation of `ex` in `m`.
"""
function sandboxed_eval_expr(m::Module, ex)
    quote
        try
            @eval $m $(sandboxify(ex))
        catch e
            if e isa UndefVarError && haskey($inverse_core_name_remaps, e.var)
                rethrow(UndefVarError($inverse_core_name_remaps[e.var]))
            else
                rethrow()
            end
        end
    end
end

# REPL inside sandbox

"""
    repl_in(m::Module)

Define a REPL that eval's sandboxed code inside the baremodule `m`.
"""
function repl_in(m::Module)
    function valid_julia(s)
        input = String(take!(copy(ReplMaker.LineEdit.buffer(s))))
        ex = Meta.parse(input)
        !(ex isa Expr && ex.head == :incomplete)
    end

    function sandbox_parser(s::String)
        ex = Meta.parse(s)
        sandboxed_eval_expr(m, ex)
    end

    sandbox_mode = initrepl(sandbox_parser;
                            prompt_text="Sandbox> ",
                            prompt_color = :yellow,
                            startup_text = false,
                            mode_name = :sandbox,
                            valid_input_checker = valid_julia)
end

"""
    enter_repl(m::Module)

Start a sandboxed REPL mode inside baremodule `m`.
"""
function enter_repl(m::Module)
    enter_mode!(repl_in(m))
end

end

module TestSandboxes

using ..Sandboxes: baremod, sandboxify, sandboxed_eval_expr

using Test

"Test `ex` evaluates without throwing"
macro test_nothrow(ex)
    :(@test ($ex; true))
end

s = baremod()

@test eval(sandboxed_eval_expr(s, :( nand(a, b) = $(~)($(&)(a, b)) ) )) == s.nand
@test eval(sandboxed_eval_expr(s, :( not(a) = nand(a, true) ) )) == s.not
@test eval(sandboxed_eval_expr(s, :( not(false) ) )) == true

# We can access properties and create symbols with names in Core
@test_nothrow sandboxify( :( X.write ) )
@test_nothrow sandboxify( :( :write ) )
@test_nothrow sandboxify( :( write = "foo" ) )

@test_throws Exception sandboxify( :( baremodule X end ))
@test_throws Exception sandboxify( :( baremodule X end; X.Core ))
@test_throws Exception sandboxify( :( module X end; X.eval ))
# Access to properties of `Core` is denied
@test_throws UndefVarError eval(sandboxed_eval_expr(s, :( write ) ))

end

# Demo

S = Sandboxes

sandbox = S.baremod()
eval(sandboxed_eval_expr(sandbox, :( nand(a, b) = $(~)($(&)(a, b)) )))
S.enter_repl(sandbox)

The only hacky bit is the symbol rewriting, so it would be nice if there was support in Base for defining modules with absolutely no imports, then that hack wouldn’t be required.

7 Likes

This is a cool experiment! Yeah I agree there should be a way to get an even barer module. I suppose we have to macro it, e.g. @bare baremodule ... end? (any other ideas?) Then there is a boolean argument in the module expression that says whether to add standard imports. We could allow nothing there to make it totally empty, then just thread that through to jl_new_module.

3 Likes

I think that since you can always just eval new definitions in from the outside, it’d make sense to have this functionality just create a totally empty module with no imports and people can then do what they want.

How about @emptymodule Foo ?

Out of curiosity, is there a way to do this from the user side? I’ve tried

ccall(:jl_new_module, Module, (Symbol,), :Foo)

but this segfaults :sweat_smile:

True, this could be done with an argument to Module(). That probably makes more sense than having syntax.

3 Likes

It just segfaults when trying to show the module, since the parent module is set to NULL. You can use jl_f_new_module, which automatically sets the parent to Main.

It’s actually fairly easy to support even barer baremodules, all you need to change is a couple of lines in module.c:

diff --git a/src/module.c b/src/module.c
index 20c119bedc..e5caa10b28 100644
--- a/src/module.c
+++ b/src/module.c
@@ -11,7 +11,7 @@
 extern "C" {
 #endif
 
-JL_DLLEXPORT jl_module_t *jl_new_module(jl_sym_t *name)
+JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, int using_core)
 {
     jl_ptls_t ptls = jl_get_ptls_states();
     const jl_uuid_t uuid_zero = {0, 0};
@@ -36,7 +36,7 @@ JL_DLLEXPORT jl_module_t *jl_new_module(jl_sym_t *name)
     htable_new(&m->bindings, 0);
     arraylist_new(&m->usings, 0);
     JL_GC_PUSH1(&m);
-    if (jl_core_module) {
+    if (jl_core_module && using_core) {
         jl_module_using(m, jl_core_module);
     }
     // export own name, so "using Foo" makes "Foo" itself visible
@@ -46,15 +46,20 @@ JL_DLLEXPORT jl_module_t *jl_new_module(jl_sym_t *name)
     return m;
 }
 
+JL_DLLEXPORT jl_module_t *jl_new_module(jl_sym_t *name)
+{
+    return jl_new_module_(name, 1);
+}
+
 uint32_t jl_module_next_counter(jl_module_t *m)
 {
     return jl_atomic_fetch_add(&m->counter, 1);
 }
 
-JL_DLLEXPORT jl_value_t *jl_f_new_module(jl_sym_t *name, uint8_t std_imports)
+JL_DLLEXPORT jl_value_t *jl_f_new_module_(jl_sym_t *name, uint8_t std_imports, int using_core)
 {
     // TODO: should we prohibit this during incremental compilation?
-    jl_module_t *m = jl_new_module(name);
+    jl_module_t *m = jl_new_module_(name, using_core);
     JL_GC_PUSH1(&m);
     m->parent = jl_main_module; // TODO: this is a lie
     jl_gc_wb(m, m->parent);
@@ -65,6 +70,11 @@ JL_DLLEXPORT jl_value_t *jl_f_new_module(jl_sym_t *name, uint8_t std_imports)
     return (jl_value_t*)m;
 }
 
+JL_DLLEXPORT jl_value_t *jl_f_new_module(jl_sym_t *name, uint8_t std_imports)
+{
+    return jl_f_new_module_(name, std_imports, 1);
+}
+
 JL_DLLEXPORT void jl_set_module_nospecialize(jl_module_t *self, int on)
 {
     self->nospecialize = (on ? -1 : 0);

This allows you to do some fun things like:

julia> Foo = ccall(:jl_f_new_module_, Module, (Symbol, UInt8, Cint), :Foo, 0, 0)
Main.Foo

julia> Foo.Core
ERROR: UndefVarError: Core not defined
Stacktrace:
 [1] getproperty(x::Module, f::Symbol)
   @ Base ./Base.jl:26
 [2] top-level scope
   @ REPL[6]:1

julia> Core.eval(Foo, :(a === b = $(!==)(a, b)))
=== (generic function with 1 method)

julia> Core.eval(Foo, :(1 === 2))
true
8 Likes

Great :slight_smile:

I’m happy to do a PR for this. I would add a new argument to Module() rather than using syntax and would implement it as @simeonschaub has suggested (assuming that I can’t cause any bugs with that!)

I’ll open one within the next week, crediting as appropriate unless someone beats me to it.

2 Likes

PR: Allow modules to optionally import nothing at all by cmcaine · Pull Request #40110 · JuliaLang/julia · GitHub

5 Likes