I am trying to change some functions that insert into my MariaDB database so that they will work in batches instead of row-by-row. I am having somewhat of a hard time because I don’t really understand how to prepare statements correctly with MySQL.jl.
Here are my current functions that only do it row-by-row:
import MySQL
import DataFrames: AbstractDataFrame, eachrow, names
import Iterators: partition
function database_insertstatement(
connection::MySQL.DBInterface.Connection,
tablename::AbstractString,
columns::AbstractVector{<:AbstractString};
primarykeys::AbstractVector{<:AbstractString}=[""]
)
fields = "(" * join(columns, ",") * ")"
values = "(" * join(map(x -> "?", columns), ",") * ")"
value_update = join(map(x -> x * " = VALUE(" * x * ")", setdiff(columns, primarykeys)), ",")
sql = (
"INSERT INTO " * tablename * " " * fields * " VALUES " * values
* " ON DUPLICATE KEY UPDATE " * value_update * ";"
)
return MySQL.DBInterface.prepare(connection, sql)
end
function database_insertintotable(
connection::MySQL.DBInterface.Connection,
tablename::AbstractString,
datatoinsert::AbstractDataFrame;
primarykeys::AbstractVector{<:AbstractString}=[""]
)
statement = database_insertstatement(
connection, tablename, names(datatoinsert);
primarykeys=primarykeys
)
MySQL.transaction(connection) do
for row in eachrow(datatoinsert)
MySQL.DBInterface.execute(statement, row)
end
end
MySQL.DBInterface.close!(statement)
return nothing
end
First, I tried to modify database_insertstatement
to allow a batch::Bool
keyword argument to allow the statement to receive more parameters for the ‘VALUES (?, ?, ?, …)’ segment of the statement to become ‘VALUES (?, ?, ?, …), (?, ?, ?, ?), …’ instead, but I failed to do this.
Then, I tried ignoring the first function and just resort to defining the statement to execute for every batch to be executed, but then I get string formatting errors. Initially I though it was because I was passing a string object, but I tried building a statement with MySQL.jl’s prepare
function and I got the same error. Here is the code for the attempt:
function database_insertintotable(
connection::MySQL.DBInterface.Connection,
tablename::AbstractString,
datatoinsert::AbstractDataFrame;
primarykeys::AbstractVector{<:AbstractString}=[""],
batch_size::Int=1000
)
rows = collect(eachrow(datatoinsert))
table_columns = names(datatoinsert)
fields = "(" * join(table_columns, ",") * ")"
value_update = join(map(x -> x * " = VALUE(" * x * ")", setdiff(table_columns, primarykeys)), ",")
MySQL.transaction(connection) do
for batch in partition(rows, batch_size)
values_list = join(["(" * join(row, ", ") * ")" for row in batch], ", ")
sql = (
"INSERT INTO " * tablename * " " * fields * " VALUES " * values_list
* " ON DUPLICATE KEY UPDATE " * value_update * ";"
)
statement = MySQL.DBInterface.prepare(connection, sql)
MySQL.DBInterface.execute(statement)
MySQL.DBInterface.close!(statement)
end
end
return nothing
end
The error I’m getting is the following:
ERROR: (1064): You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'Gas, 1751.0, PT), (2021-11-04, 1, Fossil Hard coal, 282.0, PT), (2021-11-04, ...' at line 1
Is there a simpler way of achieving this?