Yes! To pass arrow data back and forth, you need to first convert it to a byte array in the sender, and then interpret that byte array in the receiver. Here are some functions I wrote to do so, with dataframes:
Python:
import pyarrow as pa
def convert_to_arrow_bytes(df: pd.DataFrame) -> bytearray:
"""
Efficiently convert a dataframe to arrow bytes in memory
For transfer to other processes
Modified from https://github.com/JuliaData/Arrow.jl/blob/main/test/pyarrow_roundtrip.jl
"""
batch = pa.record_batch(df)
sink = pa.BufferOutputStream()
writer = pa.ipc.new_stream(sink, batch.schema)
writer.write_batch(batch)
writer.close()
buf = sink.getvalue()
jbytes = buf.to_pybytes()
return bytearray(jbytes)
def receive_arrow_bytes(byte_array: bytearray) -> pd.DataFrame:
reader = pa.ipc.open_stream(byte_array)
pyarrow_table = reader.read_all()
return pyarrow_table.to_pandas()
Julia:
function load_df(bytearray::Vector{UInt8})::DataFrame
return bytearray |> Arrow.Table |> DataFrame |> disallowmissing!
end
function df_to_arrow_bytes(df)
io = IOBuffer()
Arrow.write(io, df)
seekstart(io)
byte_array = take!(io)
return byte_array
end