Skip to content

Add support for TABLESAMPLE pipe operator #1860

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions src/ast/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1559,7 +1559,7 @@ impl fmt::Display for TableSampleBucket {
}
impl fmt::Display for TableSample {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, " {}", self.modifier)?;
write!(f, "{}", self.modifier)?;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For better composition of AST building blocks, I remove the whitespace here and introduce it again for the structs that contain a TableSample.

if let Some(name) = &self.name {
write!(f, " {}", name)?;
}
Expand Down Expand Up @@ -1862,7 +1862,7 @@ impl fmt::Display for TableFactor {
write!(f, " WITH ORDINALITY")?;
}
if let Some(TableSampleKind::BeforeTableAlias(sample)) = sample {
write!(f, "{sample}")?;
write!(f, " {sample}")?;
}
if let Some(alias) = alias {
write!(f, " AS {alias}")?;
Expand All @@ -1877,7 +1877,7 @@ impl fmt::Display for TableFactor {
write!(f, "{version}")?;
}
if let Some(TableSampleKind::AfterTableAlias(sample)) = sample {
write!(f, "{sample}")?;
write!(f, " {sample}")?;
}
Ok(())
}
Expand Down Expand Up @@ -2680,6 +2680,10 @@ pub enum PipeOperator {
full_table_exprs: Vec<ExprWithAliasAndOrderBy>,
group_by_expr: Vec<ExprWithAliasAndOrderBy>,
},
/// Selects a random sample of rows from the input table.
/// Syntax: `|> TABLESAMPLE SYSTEM (10 PERCENT)
/// See more at <https://cloud.google.com/bigquery/docs/reference/standard-sql/pipe-syntax#tablesample_pipe_operator>
TableSample { sample: Box<TableSample> },
}

impl fmt::Display for PipeOperator {
Expand Down Expand Up @@ -2731,6 +2735,10 @@ impl fmt::Display for PipeOperator {
PipeOperator::OrderBy { exprs } => {
write!(f, "ORDER BY {}", display_comma_separated(exprs.as_slice()))
}

PipeOperator::TableSample { sample } => {
write!(f, "{}", sample)
}
}
}
}
Expand Down
15 changes: 13 additions & 2 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11047,6 +11047,7 @@ impl<'a> Parser<'a> {
Keyword::LIMIT,
Keyword::AGGREGATE,
Keyword::ORDER,
Keyword::TABLESAMPLE,
])?;
match kw {
Keyword::SELECT => {
Expand Down Expand Up @@ -11109,6 +11110,10 @@ impl<'a> Parser<'a> {
let exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?;
pipe_operators.push(PipeOperator::OrderBy { exprs })
}
Keyword::TABLESAMPLE => {
let sample = self.parse_table_sample(TableSampleModifier::TableSample)?;
pipe_operators.push(PipeOperator::TableSample { sample });
}
unhandled => {
return Err(ParserError::ParserError(format!(
"`expect_one_of_keywords` further up allowed unhandled keyword: {unhandled:?}"
Expand Down Expand Up @@ -12753,7 +12758,13 @@ impl<'a> Parser<'a> {
} else {
return Ok(None);
};
self.parse_table_sample(modifier).map(Some)
}

fn parse_table_sample(
&mut self,
modifier: TableSampleModifier,
) -> Result<Box<TableSample>, ParserError> {
let name = match self.parse_one_of_keywords(&[
Keyword::BERNOULLI,
Keyword::ROW,
Expand Down Expand Up @@ -12835,14 +12846,14 @@ impl<'a> Parser<'a> {
None
};

Ok(Some(Box::new(TableSample {
Ok(Box::new(TableSample {
modifier,
name,
quantity,
seed,
bucket,
offset,
})))
}))
}

fn parse_table_sample_seed(
Expand Down
5 changes: 5 additions & 0 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15155,6 +15155,11 @@ fn parse_pipeline_operator() {
dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC");
dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC, name ASC");

// tablesample pipe operator
dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE BERNOULLI (50)");
dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50 PERCENT)");
dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50) REPEATABLE (10)");

// many pipes
dialects.verified_stmt(
"SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC",
Expand Down