Skip to content

Commit 1df78e5

Browse files
hdikemanfacebook-github-bot
authored andcommitted
feat(parser): Add input/output table extraction to PrestoParser (facebookincubator#804)
Summary: There are usecases for which callers may want to extract some information from a query without needing to resolve all the metadata details required to build a full logical plan. An example could be a client-side check decides where to send a query based on the tables it accesses, or moving ACL checks earlier in query execution by determining accessed tables immediately See facebookincubator#789 for the related issue To enable this, I am adding two APIs to the PrestoParser, one which extracts accessed input tables, and one which extracts output tables, if any exist There are two parts to this changeset: 1. on recommendation of Masha, defined a DefaultTraversalVisitor, which performs a DFS traversal over all nodes in the AST. I used this baseclass for the existing ExprAnalyzer and the new TableVisitor. I can pull this into a separate PR if desired 2. add the TableVisitor, which extracts input tables and the output table for the query, and link it into two new PrestoParser APIs for input and output tables respectively Some things I was unsure about and would like feedback: 1. I exposed two APIs, but I could easily have exposed one (getInputAndOutputTables) and return a struct containing the output of both APIs 2. I implemented the handlers for query types not currently covered by the parser (materialized view statements, some view statements, pure CREATE TABLE), but these cannot be run yet. I can also remove them or leave more comments in PrestoParser.cpp I am also looking for comments on structuring: PrestoParser.cpp is getting big, I can cut it up into a few source/header files in this diff or a follow-up if others agree (but did not want to do so without discussion) Reviewed By: mbasmanova Differential Revision: D91525572
1 parent ef09feb commit 1df78e5

File tree

7 files changed

+376
-2
lines changed

7 files changed

+376
-2
lines changed

axiom/sql/presto/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ add_subdirectory(grammar)
1616
add_subdirectory(ast)
1717
add_subdirectory(example)
1818

19-
add_library(axiom_sql_presto_parser PrestoParser.cpp SqlStatement.cpp)
19+
add_library(axiom_sql_presto_parser PrestoParser.cpp SqlStatement.cpp TableVisitor.cpp)
2020

2121
target_link_libraries(
2222
axiom_sql_presto_parser

axiom/sql/presto/PrestoParser.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "axiom/connectors/ConnectorMetadata.h"
2121
#include "axiom/logical_plan/PlanBuilder.h"
2222
#include "axiom/sql/presto/PrestoParseError.h"
23+
#include "axiom/sql/presto/TableVisitor.h"
2324
#include "axiom/sql/presto/ast/AstBuilder.h"
2425
#include "axiom/sql/presto/ast/AstPrinter.h"
2526
#include "axiom/sql/presto/ast/DefaultTraversalVisitor.h"
@@ -2201,4 +2202,19 @@ SqlStatementPtr PrestoParser::doParse(
22012202
return doPlan(query, defaultConnectorId_, defaultSchema_, parseSql);
22022203
}
22032204

2205+
ReferencedTables PrestoParser::getReferencedTables(std::string_view sql) {
2206+
ParserHelper helper(sql);
2207+
auto* context = helper.parse();
2208+
2209+
AstBuilder astBuilder(false);
2210+
auto statement =
2211+
std::any_cast<std::shared_ptr<Statement>>(astBuilder.visit(context));
2212+
2213+
TableVisitor visitor(defaultConnectorId_, defaultSchema_);
2214+
visitor.process(statement.get());
2215+
return ReferencedTables{
2216+
.inputTables = visitor.inputTables(),
2217+
.outputTable = visitor.outputTable()};
2218+
}
2219+
22042220
} // namespace axiom::sql::presto

axiom/sql/presto/PrestoParser.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,18 @@
1919

2020
namespace axiom::sql::presto {
2121

22+
/// The set of tables that a SQL statement references. Table names
23+
/// are returned in fully-qualified format ('catalog.schema.table').
24+
struct ReferencedTables {
25+
/// The set of tables accessed for reading by the query,
26+
/// or the empty set if the query does not read any tables.
27+
std::unordered_set<std::string> inputTables;
28+
29+
/// Any table which would be modified by the query, or
30+
/// nullopt if the query does not modify any tables.
31+
std::optional<std::string> outputTable;
32+
};
33+
2234
/// SQL Parser compatible with PrestoSQL dialect.
2335
class PrestoParser {
2436
public:
@@ -47,6 +59,13 @@ class PrestoParser {
4759
std::string_view sql,
4860
bool enableTracing = false);
4961

62+
/// Extracts tables referenced in a SQL statement, if any exist. This includes
63+
/// table references which could later be optimized out, if their results
64+
/// do not affect the query output (e.g., an unreferenced CTE).
65+
/// @param sql SQL query statement
66+
/// @return input and output tables which the query references.
67+
ReferencedTables getReferencedTables(std::string_view sql);
68+
5069
private:
5170
SqlStatementPtr doParse(std::string_view sql, bool enableTracing);
5271

axiom/sql/presto/TableVisitor.cpp

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include "axiom/sql/presto/TableVisitor.h"
18+
19+
#include <fmt/format.h>
20+
21+
#include "velox/common/base/Exceptions.h"
22+
23+
namespace axiom::sql::presto {
24+
25+
TableVisitor::TableVisitor(
26+
const std::string& defaultConnectorId,
27+
const std::optional<std::string>& defaultSchema)
28+
: defaultConnectorId_(defaultConnectorId), defaultSchema_(defaultSchema) {}
29+
30+
void TableVisitor::visitWithQuery(WithQuery* node) {
31+
// To cover the case where a CTE aliases an underlying
32+
// table, e.g. 'WITH t AS (SELECT * FROM t)', we need to
33+
// traverse the inner query before tracking the CTE alias.
34+
DefaultTraversalVisitor::visitWithQuery(node);
35+
ctes_.insert(node->name()->value());
36+
}
37+
38+
void TableVisitor::visitTable(Table* node) {
39+
const auto& parts = node->name()->parts();
40+
if (parts.size() == 1 && ctes_.count(parts[0]) > 0) {
41+
return;
42+
}
43+
inputTables_.insert(constructTableName(*node->name()));
44+
DefaultTraversalVisitor::visitTable(node);
45+
}
46+
47+
void TableVisitor::visitInsert(Insert* node) {
48+
setOutputTable(*node->target());
49+
DefaultTraversalVisitor::visitInsert(node);
50+
}
51+
52+
void TableVisitor::visitCreateTableAsSelect(CreateTableAsSelect* node) {
53+
setOutputTable(*node->name());
54+
DefaultTraversalVisitor::visitCreateTableAsSelect(node);
55+
}
56+
57+
void TableVisitor::visitUpdate(Update* node) {
58+
setOutputTable(*node->table());
59+
DefaultTraversalVisitor::visitUpdate(node);
60+
}
61+
62+
void TableVisitor::visitDelete(Delete* node) {
63+
setOutputTable(*node->table());
64+
DefaultTraversalVisitor::visitDelete(node);
65+
}
66+
67+
void TableVisitor::visitCreateTable(CreateTable* node) {
68+
setOutputTable(*node->name());
69+
DefaultTraversalVisitor::visitCreateTable(node);
70+
}
71+
72+
void TableVisitor::visitCreateView(CreateView* node) {
73+
setOutputTable(*node->name());
74+
DefaultTraversalVisitor::visitCreateView(node);
75+
}
76+
77+
void TableVisitor::visitCreateMaterializedView(CreateMaterializedView* node) {
78+
setOutputTable(*node->name());
79+
DefaultTraversalVisitor::visitCreateMaterializedView(node);
80+
}
81+
82+
void TableVisitor::visitDropTable(DropTable* node) {
83+
setOutputTable(*node->tableName());
84+
DefaultTraversalVisitor::visitDropTable(node);
85+
}
86+
87+
void TableVisitor::visitDropView(DropView* node) {
88+
setOutputTable(*node->viewName());
89+
DefaultTraversalVisitor::visitDropView(node);
90+
}
91+
92+
void TableVisitor::visitDropMaterializedView(DropMaterializedView* node) {
93+
setOutputTable(*node->viewName());
94+
DefaultTraversalVisitor::visitDropMaterializedView(node);
95+
}
96+
97+
std::string TableVisitor::constructTableName(const QualifiedName& name) const {
98+
const auto& parts = name.parts();
99+
VELOX_CHECK(!parts.empty(), "Table name cannot be empty");
100+
VELOX_CHECK_LE(
101+
parts.size(),
102+
3,
103+
"Table name must have 1-3 components, '{}'",
104+
name.fullyQualifiedName());
105+
switch (parts.size()) {
106+
case 1:
107+
if (defaultSchema_.has_value()) {
108+
return fmt::format(
109+
"{}.{}.{}", defaultConnectorId_, defaultSchema_.value(), parts[0]);
110+
}
111+
return fmt::format("{}.{}", defaultConnectorId_, parts[0]);
112+
case 2:
113+
return fmt::format("{}.{}.{}", defaultConnectorId_, parts[0], parts[1]);
114+
case 3:
115+
return fmt::format("{}.{}.{}", parts[0], parts[1], parts[2]);
116+
default:
117+
VELOX_UNREACHABLE();
118+
}
119+
}
120+
121+
void TableVisitor::setOutputTable(const QualifiedName& name) {
122+
VELOX_CHECK(!outputTable_.has_value());
123+
outputTable_ = constructTableName(name);
124+
}
125+
126+
} // namespace axiom::sql::presto

axiom/sql/presto/TableVisitor.h

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#pragma once
17+
18+
#include <unordered_set>
19+
20+
#include "axiom/sql/presto/ast/DefaultTraversalVisitor.h"
21+
22+
namespace axiom::sql::presto {
23+
24+
// Analyzes an expression to extract the fully-qualified names of any
25+
// input or output tables or views in the expression. Table accesses
26+
// inside CTEs are included, even if the CTE is never read from.
27+
class TableVisitor : public DefaultTraversalVisitor {
28+
public:
29+
TableVisitor(
30+
const std::string& defaultConnectorId,
31+
const std::optional<std::string>& defaultSchema);
32+
33+
const std::unordered_set<std::string>& inputTables() const {
34+
return inputTables_;
35+
}
36+
37+
const std::optional<std::string>& outputTable() const {
38+
return outputTable_;
39+
}
40+
41+
protected:
42+
void visitWithQuery(WithQuery* node) override;
43+
void visitTable(Table* node) override;
44+
void visitInsert(Insert* node) override;
45+
void visitCreateTableAsSelect(CreateTableAsSelect* node) override;
46+
void visitUpdate(Update* node) override;
47+
void visitDelete(Delete* node) override;
48+
void visitCreateTable(CreateTable* node) override;
49+
void visitCreateView(CreateView* node) override;
50+
void visitCreateMaterializedView(CreateMaterializedView* node) override;
51+
void visitDropTable(DropTable* node) override;
52+
void visitDropView(DropView* node) override;
53+
void visitDropMaterializedView(DropMaterializedView* node) override;
54+
55+
private:
56+
std::string constructTableName(const QualifiedName& name) const;
57+
void setOutputTable(const QualifiedName& name);
58+
59+
const std::string& defaultConnectorId_;
60+
const std::optional<std::string>& defaultSchema_;
61+
std::unordered_set<std::string> ctes_;
62+
std::unordered_set<std::string> inputTables_;
63+
std::optional<std::string> outputTable_;
64+
};
65+
66+
} // namespace axiom::sql::presto

axiom/sql/presto/tests/CMakeLists.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,12 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
add_executable(axiom_sql_presto_test LogicalPlanMatcher.cpp PrestoParserTest.cpp)
15+
add_executable(
16+
axiom_sql_presto_test
17+
LogicalPlanMatcher.cpp
18+
PrestoParserTest.cpp
19+
TableExtractorTest.cpp
20+
)
1621

1722
add_test(axiom_sql_presto_test axiom_sql_presto_test)
1823

0 commit comments

Comments
 (0)