@@ -27,6 +27,7 @@ pub(super) fn postprocess(query: SqlQuery, ctx: &mut Context) -> SqlQuery {
2727fn infer_sorts ( query : SqlQuery , ctx : & mut Context ) -> SqlQuery {
2828 let mut s = SortingInference {
2929 last_sorting : Vec :: new ( ) ,
30+ last_sorting_from_distinct_on : false ,
3031 ctes_sorting : HashMap :: new ( ) ,
3132 main_relation : false ,
3233 ctx,
@@ -37,6 +38,12 @@ fn infer_sorts(query: SqlQuery, ctx: &mut Context) -> SqlQuery {
3738
3839struct SortingInference < ' a > {
3940 last_sorting : Sorting ,
41+ /// True if last_sorting originated from DISTINCT ON (used for row selection).
42+ ///
43+ /// Per the PRQL spec, `group` resets the order - any `sort` inside a group
44+ /// is for internal row selection, not output ordering. This flag tracks such
45+ /// internal sorting so it doesn't propagate past transforms like `join`.
46+ last_sorting_from_distinct_on : bool ,
4047 ctes_sorting : HashMap < TId , CteSorting > ,
4148 main_relation : bool ,
4249 ctx : & ' a mut Context ,
@@ -148,6 +155,12 @@ impl SortingInference<'_> {
148155#[ derive( Debug ) ]
149156struct CteSorting {
150157 sorting : Sorting ,
158+ /// True if the CTE's sorting originated from DISTINCT ON (row selection).
159+ ///
160+ /// Per the PRQL spec, `group` resets the order. DISTINCT ON sorting is
161+ /// internal to the group - it determines which row to keep, not output
162+ /// ordering. This flag ensures such sorting doesn't leak to outer queries.
163+ from_distinct_on : bool ,
151164}
152165
153166impl RqFold for SortingInference < ' _ > { }
@@ -239,7 +252,11 @@ impl PqFold for SortingInference<'_> {
239252 // store sorting to be used later in From references
240253 let sorting = self . last_sorting . drain ( ..) . collect ( ) ;
241254 log:: debug!( "--- sorting {sorting:?}" ) ;
242- let sorting = CteSorting { sorting } ;
255+ let sorting = CteSorting {
256+ sorting,
257+ from_distinct_on : self . last_sorting_from_distinct_on ,
258+ } ;
259+ self . last_sorting_from_distinct_on = false ;
243260 self . ctes_sorting . insert ( cte. tid , sorting) ;
244261
245262 ctes. push ( cte) ;
@@ -305,6 +322,9 @@ impl PqMapper<RelationExpr, RelationExpr, (), ()> for SortingInference<'_> {
305322 transforms : Vec < SqlTransform < RelationExpr , ( ) > > ,
306323 ) -> Result < Vec < SqlTransform < RelationExpr , ( ) > > > {
307324 let mut sorting = Vec :: new ( ) ;
325+ // Track whether sorting originated from DISTINCT ON (internal row selection).
326+ // Per PRQL spec, `group` resets order - internal sorts don't define output order.
327+ let mut sorting_from_distinct_on = false ;
308328
309329 let mut result = Vec :: with_capacity ( transforms. len ( ) + 1 ) ;
310330
@@ -314,17 +334,20 @@ impl PqMapper<RelationExpr, RelationExpr, (), ()> for SortingInference<'_> {
314334 match expr. kind {
315335 RelationExprKind :: Ref ( ref tid) => {
316336 // infer sorting from referenced pipeline
317- if let Some ( cte_sorting) = self . ctes_sorting . get_mut ( tid) {
337+ if let Some ( cte_sorting) = self . ctes_sorting . get ( tid) {
318338 sorting. clone_from ( & cte_sorting. sorting ) ;
339+ sorting_from_distinct_on = cte_sorting. from_distinct_on ;
319340 } else {
320- sorting = Vec :: new ( ) ;
341+ sorting. clear ( ) ;
342+ sorting_from_distinct_on = false ;
321343 } ;
322344 }
323345 RelationExprKind :: SubQuery ( rel) => {
324346 let rel = self . fold_sql_relation ( rel) ?;
325347
326348 // infer sorting from sub-query
327349 sorting = self . last_sorting . drain ( ..) . collect ( ) ;
350+ sorting_from_distinct_on = self . last_sorting_from_distinct_on ;
328351
329352 expr. kind = RelationExprKind :: SubQuery ( rel) ;
330353 }
@@ -337,16 +360,40 @@ impl PqMapper<RelationExpr, RelationExpr, (), ()> for SortingInference<'_> {
337360 // just store sorting and don't emit Sort
338361 SqlTransform :: Sort ( expr) => {
339362 sorting. clone_from ( & expr) ;
363+ // A new explicit Sort clears the DISTINCT ON flag - this is a
364+ // user-requested ordering, not an internal DISTINCT ON sort.
365+ sorting_from_distinct_on = false ;
340366 continue ;
341367 }
342368
343369 // clear sorting
344370 SqlTransform :: Distinct | SqlTransform :: Aggregate { .. } => {
345- sorting = Vec :: new ( ) ;
371+ sorting. clear ( ) ;
372+ sorting_from_distinct_on = false ;
373+ }
374+
375+ // Per PRQL spec: `group` resets order, `join` retains left's order.
376+ // DISTINCT ON sorting is internal to the group (for row selection),
377+ // so it must not propagate past joins. Explicit user sorts are preserved.
378+ // See issue #4633.
379+ SqlTransform :: Join { .. } => {
380+ if sorting_from_distinct_on {
381+ sorting. clear ( ) ;
382+ sorting_from_distinct_on = false ;
383+ }
346384 }
347385
348386 // emit Sort before Take
349- SqlTransform :: Take ( _) | SqlTransform :: DistinctOn ( _) => {
387+ SqlTransform :: Take ( _) => {
388+ result. push ( SqlTransform :: Sort ( sorting. clone ( ) ) ) ;
389+ }
390+
391+ SqlTransform :: DistinctOn ( _) => {
392+ // DISTINCT ON uses sorting for row selection within the group.
393+ // Mark it so this internal sorting doesn't leak to outer queries.
394+ // Note: ROW_NUMBER (used for take > 1 or non-Postgres) doesn't have
395+ // this issue because its sorting is embedded in the window function.
396+ sorting_from_distinct_on = true ;
350397 result. push ( SqlTransform :: Sort ( sorting. clone ( ) ) ) ;
351398 }
352399 _ => { }
@@ -371,6 +418,7 @@ impl PqMapper<RelationExpr, RelationExpr, (), ()> for SortingInference<'_> {
371418
372419 // remember sorting for this pipeline
373420 self . last_sorting = sorting;
421+ self . last_sorting_from_distinct_on = sorting_from_distinct_on;
374422
375423 Ok ( result)
376424 }
0 commit comments