Open
Description
Describe the enhancement requested
Flight C++ doesn't support dictionary replacement in DoGet calls and support should be added.
A minimal reproduction of this follows:
import pandas as pd
import pyarrow as pa
import pyarrow.flight
import pyarrow.parquet
schema = pa.schema(
[
pa.field("numbers", pa.int8(), nullable=True),
pa.field("letters", pa.dictionary(pa.int8(), pa.utf8()), nullable=True),
]
)
batches = [
pa.RecordBatch.from_pandas(
pd.DataFrame({"numbers": [10, 11, 12], "letters": ["a", "b", "c"]}),
schema=schema,
),
pa.RecordBatch.from_pandas(
pd.DataFrame({"numbers": [13, 14], "letters": ["d", "e"]}), schema=schema
),
]
flights = {"./table": batches}
class FlightServer(pa.flight.FlightServerBase):
def __init__(self, location="grpc://0.0.0.0:8815", **kwargs):
super(FlightServer, self).__init__(location, **kwargs)
self._location = location
def do_get(self, context, ticket):
dataset_name = ticket.ticket.decode("utf-8")
reader = pa.RecordBatchReader.from_batches(schema, flights[dataset_name])
return pa.flight.RecordBatchStream(reader)
if __name__ == "__main__":
server = FlightServer()
server.serve()
And with the following client code:
import pandas as pd
import pyarrow as pa
import pyarrow.flight
client = pa.flight.connect("grpc://0.0.0.0:8815")
for fl in client.list_flights():
response = client.do_get(fl.endpoints[0].ticket)
print(response.read_all().to_pandas())
An incorrect result is printed:
numbers letters
0 10 a
1 11 b
2 12 c
3 13 a
4 14 b
(a b c d a b
instead of a b c d e f
)
Component(s)
C++, FlightRPC