From 099c22bef9ad9d4dc9cb3f679a99c9d320811988 Mon Sep 17 00:00:00 2001
From: Scc_hy <34824890+scchy@users.noreply.github.com>
Date: Fri, 3 Jan 2020 10:40:52 +0800
Subject: [PATCH] ADD NEW SQL SOLUTION
sales_SQL = spark.sql("""SELECT city, year, sum(amount) as amount
,GROUPING_ID(city, year) GFLG
FROM sales_tbl
GROUP BY ROLLUP(city, year)
HAVING 3 != GROUPING_ID(city, year)
ORDER BY city DESC NULLS LAST, year ASC NULLS LAST
""")
---
code/chap07/dataframe_multi_dim_agg_rollup.py | 23 +++++++++++++------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/code/chap07/dataframe_multi_dim_agg_rollup.py b/code/chap07/dataframe_multi_dim_agg_rollup.py
index 7caf003..df86367 100644
--- a/code/chap07/dataframe_multi_dim_agg_rollup.py
+++ b/code/chap07/dataframe_multi_dim_agg_rollup.py
@@ -223,12 +223,11 @@
# Aggregate function: returns the level of grouping, equals to
# (grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + … + grouping(cn)
#
- with_rollup = sales\
- .rollup('city', 'year')\
- .agg(sum('amount').alias('amount'), grouping_id().alias('gid'))\
- .filter(col('gid') != 3)\
- .sort('city', 'year')\
- .select('city', 'year', 'amount', 'gid')
+ with_rollup = sales.rollup('city', 'year')\
+ .agg(sum('amount').alias('amount'), grouping_id().alias('gflg'))\
+ .filter(col('gflg') != 3)
+ with_rollup = with_rollup.sort(with_rollup.city.desc_nulls_last(), with_rollup.year.asc_nulls_last())\
+ .select('city', 'year', 'amount', 'gflg')
#
print("# with_rollup:")
with_rollup.show()
@@ -246,9 +245,19 @@
GROUPING SETS ((city, year), (city))
ORDER BY city DESC NULLS LAST, year ASC NULLS LAST
""")
+ # NEW solution
+ sales_SQL = spark.sql("""SELECT city, year, sum(amount) as amount
+ ,GROUPING_ID(city, year) GFLG
+ FROM sales_tbl
+ GROUP BY ROLLUP(city, year)
+ HAVING 3 != GROUPING_ID(city, year)
+ ORDER BY city DESC NULLS LAST, year ASC NULLS LAST
+ """)
#
print("# with_grouping_sets:")
- with_grouping_sets.show()
+ with_grouping_sets.show()
+ print("# sales_SQL:")
+ sales_SQL.show()
# done!
spark.stop()