Skip to content

Commit 4cb9b38

Browse files
committed
aggr round2
1 parent 4381d9b commit 4cb9b38

37 files changed

+55
-21
lines changed

derby.log

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
----------------------------------------------------------------
2-
Tue Apr 02 13:57:11 UTC 2024:
3-
Booting Derby version The Apache Software Foundation - Apache Derby - 10.14.2.0 - (1828579): instance a816c00e-018e-9f19-7b0a-00000609ec60
2+
Wed Apr 03 07:58:40 UTC 2024:
3+
Booting Derby version The Apache Software Foundation - Apache Derby - 10.14.2.0 - (1828579): instance a816c00e-018e-a2f7-9e04-00000157d978
44
on database directory /workspaces/devcontainer-universal/metastore_db with class loader jdk.internal.loader.ClassLoaders$AppClassLoader@5ffd2b27
55
Loaded from file:/usr/local/sdkman/candidates/spark/3.5.0/jars/derby-10.14.2.0.jar
66
java.vendor=Eclipse Adoptium

generate.ipynb

Lines changed: 53 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,12 @@
3333
"text": [
3434
"Setting default log level to \"WARN\".\n",
3535
"To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
36-
"24/04/02 13:57:05 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n",
37-
"24/04/02 13:57:10 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout does not exist\n",
38-
"24/04/02 13:57:10 WARN HiveConf: HiveConf of name hive.stats.retries.wait does not exist\n",
39-
"24/04/02 13:57:13 WARN ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 2.3.0\n",
40-
"24/04/02 13:57:13 WARN ObjectStore: setMetaStoreSchemaVersion called but recording version is disabled: version = 2.3.0, comment = Set by MetaStore [email protected]\n"
36+
"24/04/03 07:58:26 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n",
37+
"24/04/03 07:58:39 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout does not exist\n",
38+
"24/04/03 07:58:39 WARN HiveConf: HiveConf of name hive.stats.retries.wait does not exist\n",
39+
"24/04/03 07:58:44 WARN ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 2.3.0\n",
40+
"24/04/03 07:58:44 WARN ObjectStore: setMetaStoreSchemaVersion called but recording version is disabled: version = 2.3.0, comment = Set by MetaStore [email protected]\n",
41+
"24/04/03 07:58:45 WARN GarbageCollectionMetrics: To enable non-built-in garbage collector(s) List(G1 Concurrent GC), users should configure it(them) to spark.eventLog.gcMetrics.youngGenerationGarbageCollectors or spark.eventLog.gcMetrics.oldGenerationGarbageCollectors\n"
4142
]
4243
},
4344
{
@@ -46,13 +47,6 @@
4647
"text": [
4748
"[Database(name='default', catalog='spark_catalog', description='Default Hive database', locationUri='file:/workspaces/devcontainer-universal/spark-warehouse')]\n"
4849
]
49-
},
50-
{
51-
"name": "stderr",
52-
"output_type": "stream",
53-
"text": [
54-
"24/04/02 13:57:19 WARN GarbageCollectionMetrics: To enable non-built-in garbage collector(s) List(G1 Concurrent GC), users should configure it(them) to spark.eventLog.gcMetrics.youngGenerationGarbageCollectors or spark.eventLog.gcMetrics.oldGenerationGarbageCollectors\n"
55-
]
5650
}
5751
],
5852
"source": [
@@ -77,7 +71,7 @@
7771
},
7872
{
7973
"cell_type": "code",
80-
"execution_count": 39,
74+
"execution_count": 2,
8175
"metadata": {
8276
"tags": [
8377
"UDFs"
@@ -471,9 +465,17 @@
471465
},
472466
{
473467
"cell_type": "code",
474-
"execution_count": 128,
468+
"execution_count": 3,
475469
"metadata": {},
476470
"outputs": [
471+
{
472+
"name": "stderr",
473+
"output_type": "stream",
474+
"text": [
475+
"24/04/03 07:59:13 WARN SparkStringUtils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.\n",
476+
"24/04/03 07:59:14 WARN SessionState: METASTORE_FILTER_HOOK will be ignored, since hive.security.authorization.manager is set to instance of HiveAuthorizerFactory.\n"
477+
]
478+
},
477479
{
478480
"name": "stdout",
479481
"output_type": "stream",
@@ -523,9 +525,19 @@
523525
},
524526
{
525527
"cell_type": "code",
526-
"execution_count": 120,
528+
"execution_count": 4,
527529
"metadata": {},
528530
"outputs": [
531+
{
532+
"name": "stderr",
533+
"output_type": "stream",
534+
"text": [
535+
"24/04/03 07:59:19 WARN HiveConf: HiveConf of name hive.internal.ss.authz.settings.applied.marker does not exist\n",
536+
"24/04/03 07:59:19 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout does not exist\n",
537+
"24/04/03 07:59:19 WARN HiveConf: HiveConf of name hive.stats.retries.wait does not exist\n",
538+
"[Stage 4:> (0 + 1) / 1]\r"
539+
]
540+
},
529541
{
530542
"name": "stdout",
531543
"output_type": "stream",
@@ -557,6 +569,13 @@
557569
"only showing top 20 rows\n",
558570
"\n"
559571
]
572+
},
573+
{
574+
"name": "stderr",
575+
"output_type": "stream",
576+
"text": [
577+
" \r"
578+
]
560579
}
561580
],
562581
"source": [
@@ -571,7 +590,7 @@
571590
},
572591
{
573592
"cell_type": "code",
574-
"execution_count": 153,
593+
"execution_count": 5,
575594
"metadata": {},
576595
"outputs": [
577596
{
@@ -615,7 +634,7 @@
615634
},
616635
{
617636
"cell_type": "code",
618-
"execution_count": 132,
637+
"execution_count": 6,
619638
"metadata": {},
620639
"outputs": [
621640
{
@@ -730,9 +749,16 @@
730749
},
731750
{
732751
"cell_type": "code",
733-
"execution_count": 157,
752+
"execution_count": 7,
734753
"metadata": {},
735754
"outputs": [
755+
{
756+
"name": "stderr",
757+
"output_type": "stream",
758+
"text": [
759+
"[Stage 17:> (0 + 1) / 1]\r"
760+
]
761+
},
736762
{
737763
"name": "stdout",
738764
"output_type": "stream",
@@ -755,6 +781,13 @@
755781
"+-----------------+---------------+---------------+---------------------------------------+---------------------------------------------+----------------------------------------------+--------------------------------------------+-----------------------------------------+--------------------------------------------------+---------------------------------------------------+---------------------------------------------+--------------------------------------------+\n",
756782
"\n"
757783
]
784+
},
785+
{
786+
"name": "stderr",
787+
"output_type": "stream",
788+
"text": [
789+
" \r"
790+
]
758791
}
759792
],
760793
"source": [
@@ -773,7 +806,8 @@
773806
" consumer_dim.Division_Code_Key, \n",
774807
" consumer_dim.Region_Code_Key,\n",
775808
" # consumer_dim.Affiliate_Code_Key,\n",
776-
" consumer_dim.Market_Code_Key\n",
809+
" consumer_dim.Market_Code_Key,\n",
810+
" consumer_dim.CFDM_Channel_Code_Key\n",
777811
" )\n",
778812
" .agg(\n",
779813
" F.countDistinct(consumer_dim.Consumer_Key).alias('Consumer Aggregate Total Consumer Count'),\n",

metastore_db/db.lck

0 Bytes
Binary file not shown.

metastore_db/log/log.ctrl

0 Bytes
Binary file not shown.

metastore_db/log/log2.dat

0 Bytes
Binary file not shown.

metastore_db/log/logmirror.ctrl

0 Bytes
Binary file not shown.

metastore_db/seg0/c180.dat

0 Bytes
Binary file not shown.

metastore_db/seg0/c191.dat

0 Bytes
Binary file not shown.

metastore_db/seg0/c1a1.dat

0 Bytes
Binary file not shown.

metastore_db/seg0/c1b1.dat

0 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)