Skip to content

Commit 18802c8

Browse files
amol-gforsyth
andauthored
feat: add substrait.proto convenience module and document it (#50)
Add a ``substrait.proto`` module that gives access to the Substrait protocol classes removing the need to navigate the hierarchy automatically generated by protobuf. It also provides access to the modules without the ``_pb2`` suffix which is an implementation detail of the protobuf version used. Provides examples on how to generate and read back Substrait plans using the substrait-python module itself. --------- Co-authored-by: Gil Forsyth <[email protected]>
1 parent dc42f18 commit 18802c8

File tree

3 files changed

+173
-14
lines changed

3 files changed

+173
-14
lines changed

README.md

+117-14
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,118 @@ This project is not an execution engine for Substrait Plans.
2727
This is an experimental package that is still under development.
2828

2929
# Example
30-
At the moment, this project contains only generated Python classes for the Substrait protobuf messages. Let's use an existing Substrait producer, [Ibis](https://ibis-project.org), to provide an example using Python Substrait as the consumer.
30+
31+
## Produce a Substrait Plan
32+
The ``substrait.proto`` module provides access to the classes
33+
that represent a substrait Plan, thus allowing to create new plans.
34+
35+
Here is an example plan equivalent to ``SELECT first_name FROM person``
36+
where ``people`` table has ``first_name`` and ``surname`` columns of type ``String``
37+
38+
```
39+
>>> from substrait import proto
40+
>>> plan = proto.Plan(
41+
... relations=[
42+
... proto.PlanRel(
43+
... root=proto.RelRoot(
44+
... names=["first_name"],
45+
... input=proto.Rel(
46+
... read=proto.ReadRel(
47+
... named_table=proto.ReadRel.NamedTable(names=["people"]),
48+
... base_schema=proto.NamedStruct(
49+
... names=["first_name", "surname"],
50+
... struct=proto.Type.Struct(
51+
... types=[
52+
... proto.Type(string=proto.Type.String(nullability=proto.Type.Nullability.NULLABILITY_REQUIRED)),
53+
... proto.Type(string=proto.Type.String(nullability=proto.Type.Nullability.NULLABILITY_REQUIRED))
54+
... ] # /types
55+
... ) # /struct
56+
... ) # /base_schema
57+
... ) # /read
58+
... ) # /input
59+
... ) # /root
60+
... ) # /PlanRel
61+
... ] # /relations
62+
... )
63+
>>> print(plan)
64+
relations {
65+
root {
66+
input {
67+
read {
68+
base_schema {
69+
names: "first_name"
70+
names: "surname"
71+
struct {
72+
types {
73+
string {
74+
nullability: NULLABILITY_REQUIRED
75+
}
76+
}
77+
types {
78+
string {
79+
nullability: NULLABILITY_REQUIRED
80+
}
81+
}
82+
}
83+
}
84+
named_table {
85+
names: "people"
86+
}
87+
}
88+
}
89+
names: "first_name"
90+
}
91+
}
92+
>>> serialized_plan = p.SerializeToString()
93+
>>> serialized_plan
94+
b'\x1aA\x12?\n1\n/\x12#\n\nfirst_name\n\x07surname\x12\x0c\n\x04b\x02\x10\x02\n\x04b\x02\x10\x02:\x08\n\x06people\x12\nfirst_name'
95+
```
96+
97+
## Consume the Substrait Plan
98+
The same plan we generated in the previous example,
99+
can be loaded back from its binary representation
100+
using the ``Plan.ParseFromString`` method:
101+
102+
```
103+
>>> from substrait.proto import Plan
104+
>>> p = Plan()
105+
>>> p.ParseFromString(serialized_plan)
106+
67
107+
>>> p
108+
relations {
109+
root {
110+
input {
111+
read {
112+
base_schema {
113+
names: "first_name"
114+
names: "surname"
115+
struct {
116+
types {
117+
string {
118+
nullability: NULLABILITY_REQUIRED
119+
}
120+
}
121+
types {
122+
string {
123+
nullability: NULLABILITY_REQUIRED
124+
}
125+
}
126+
}
127+
}
128+
named_table {
129+
names: "people"
130+
}
131+
}
132+
}
133+
names: "first_name"
134+
}
135+
}
136+
```
137+
31138
## Produce a Substrait Plan with Ibis
139+
Let's use an existing Substrait producer, [Ibis](https://ibis-project.org),
140+
to provide an example using Python Substrait as the consumer.
141+
32142
```
33143
In [1]: import ibis
34144
@@ -54,21 +164,14 @@ In [5]: compiler = SubstraitCompiler()
54164
55165
In [6]: protobuf_msg = compiler.compile(query).SerializeToString()
56166
57-
In [7]: type(protobuf_msg)
58-
Out[7]: bytes
59-
```
60-
## Consume the Substrait Plan using Python Substrait
61-
```
62-
In [8]: import substrait
167+
In [7]: from substrait.proto import Plan
63168
64-
In [9]: from substrait.gen.proto.plan_pb2 import Plan
169+
In [8]: my_plan = Plan()
65170
66-
In [10]: my_plan = Plan()
171+
In [9]: my_plan.ParseFromString(protobuf_msg)
172+
Out[9]: 186
67173
68-
In [11]: my_plan.ParseFromString(protobuf_msg)
69-
Out[11]: 186
70-
71-
In [12]: print(my_plan)
174+
In [10]: print(my_plan)
72175
relations {
73176
root {
74177
input {
@@ -177,4 +280,4 @@ version {
177280
minor_number: 24
178281
producer: "ibis-substrait"
179282
}
180-
```
283+
```

src/substrait/proto.py

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
def _load():
2+
"""Import all substrait protobuf classes as human friendly.
3+
4+
Instead of forcing users to deal with autogenerated protobuf
5+
modules, importing individual components of the protocol
6+
from submodules etc... this functions loads into the module
7+
all classes representing substrait expressions and loads
8+
the protocol modules with a friendly name making the protocol
9+
more convenient to use.
10+
11+
substrait.gen.proto.extensions.extensions_pb2.SimpleExtensionDeclaration
12+
becomes substrait.proto.SimpleExtensionDeclaration
13+
"""
14+
import sys
15+
import inspect
16+
import pkgutil
17+
import importlib
18+
from substrait.gen import proto as _proto
19+
20+
selfmodule = sys.modules[__name__]
21+
for submodule_info in pkgutil.iter_modules(_proto.__path__):
22+
submodule_name = submodule_info.name
23+
attr_name = submodule_name.replace("_pb2", "")
24+
if submodule_name == "extensions":
25+
# Extensions are in a submodule
26+
submodule_name = "extensions.extensions_pb2"
27+
attr_name = "extensions"
28+
29+
submodule = importlib.import_module(f".{submodule_name}", _proto.__name__)
30+
setattr(selfmodule, attr_name, submodule)
31+
32+
for membername, _ in inspect.getmembers(submodule):
33+
member = getattr(submodule, membername)
34+
if inspect.isclass(member):
35+
setattr(selfmodule, membername, member)
36+
37+
38+
_load()

tests/test_proto.py

+18
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,21 @@ def test_imports():
1212
from substrait.gen.proto.type_expressions_pb2 import DerivationExpression
1313
from substrait.gen.proto.type_pb2 import Type
1414
from substrait.gen.proto.extensions.extensions_pb2 import SimpleExtensionURI
15+
16+
17+
def test_proto_proxy_module():
18+
"""Test that protocol classes are made available in substrait.proto"""
19+
import substrait.proto
20+
21+
assert {"Plan", "Type", "NamedStruct", "RelRoot"} <= set(dir(substrait.proto))
22+
assert {
23+
"algebra",
24+
"capabilities",
25+
"extensions",
26+
"extended_expression",
27+
"function",
28+
"parameterized_types",
29+
"plan",
30+
"type_expressions",
31+
"type",
32+
} <= set(dir(substrait.proto))

0 commit comments

Comments
 (0)