|
33 | 33 | "text": [ |
34 | 34 | "Setting default log level to \"WARN\".\n", |
35 | 35 | "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", |
36 | | - "24/04/02 13:57:05 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n", |
37 | | - "24/04/02 13:57:10 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout does not exist\n", |
38 | | - "24/04/02 13:57:10 WARN HiveConf: HiveConf of name hive.stats.retries.wait does not exist\n", |
39 | | - "24/04/02 13:57:13 WARN ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 2.3.0\n", |
40 | | - "24/04/02 13:57:13 WARN ObjectStore: setMetaStoreSchemaVersion called but recording version is disabled: version = 2.3.0, comment = Set by MetaStore [email protected]\n" |
| 36 | + "24/04/03 07:58:26 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n", |
| 37 | + "24/04/03 07:58:39 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout does not exist\n", |
| 38 | + "24/04/03 07:58:39 WARN HiveConf: HiveConf of name hive.stats.retries.wait does not exist\n", |
| 39 | + "24/04/03 07:58:44 WARN ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 2.3.0\n", |
| 40 | + "24/04/03 07:58:44 WARN ObjectStore: setMetaStoreSchemaVersion called but recording version is disabled: version = 2.3.0, comment = Set by MetaStore [email protected]\n", |
| 41 | + "24/04/03 07:58:45 WARN GarbageCollectionMetrics: To enable non-built-in garbage collector(s) List(G1 Concurrent GC), users should configure it(them) to spark.eventLog.gcMetrics.youngGenerationGarbageCollectors or spark.eventLog.gcMetrics.oldGenerationGarbageCollectors\n" |
41 | 42 | ] |
42 | 43 | }, |
43 | 44 | { |
|
46 | 47 | "text": [ |
47 | 48 | "[Database(name='default', catalog='spark_catalog', description='Default Hive database', locationUri='file:/workspaces/devcontainer-universal/spark-warehouse')]\n" |
48 | 49 | ] |
49 | | - }, |
50 | | - { |
51 | | - "name": "stderr", |
52 | | - "output_type": "stream", |
53 | | - "text": [ |
54 | | - "24/04/02 13:57:19 WARN GarbageCollectionMetrics: To enable non-built-in garbage collector(s) List(G1 Concurrent GC), users should configure it(them) to spark.eventLog.gcMetrics.youngGenerationGarbageCollectors or spark.eventLog.gcMetrics.oldGenerationGarbageCollectors\n" |
55 | | - ] |
56 | 50 | } |
57 | 51 | ], |
58 | 52 | "source": [ |
|
77 | 71 | }, |
78 | 72 | { |
79 | 73 | "cell_type": "code", |
80 | | - "execution_count": 39, |
| 74 | + "execution_count": 2, |
81 | 75 | "metadata": { |
82 | 76 | "tags": [ |
83 | 77 | "UDFs" |
|
471 | 465 | }, |
472 | 466 | { |
473 | 467 | "cell_type": "code", |
474 | | - "execution_count": 128, |
| 468 | + "execution_count": 3, |
475 | 469 | "metadata": {}, |
476 | 470 | "outputs": [ |
| 471 | + { |
| 472 | + "name": "stderr", |
| 473 | + "output_type": "stream", |
| 474 | + "text": [ |
| 475 | + "24/04/03 07:59:13 WARN SparkStringUtils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.\n", |
| 476 | + "24/04/03 07:59:14 WARN SessionState: METASTORE_FILTER_HOOK will be ignored, since hive.security.authorization.manager is set to instance of HiveAuthorizerFactory.\n" |
| 477 | + ] |
| 478 | + }, |
477 | 479 | { |
478 | 480 | "name": "stdout", |
479 | 481 | "output_type": "stream", |
|
523 | 525 | }, |
524 | 526 | { |
525 | 527 | "cell_type": "code", |
526 | | - "execution_count": 120, |
| 528 | + "execution_count": 4, |
527 | 529 | "metadata": {}, |
528 | 530 | "outputs": [ |
| 531 | + { |
| 532 | + "name": "stderr", |
| 533 | + "output_type": "stream", |
| 534 | + "text": [ |
| 535 | + "24/04/03 07:59:19 WARN HiveConf: HiveConf of name hive.internal.ss.authz.settings.applied.marker does not exist\n", |
| 536 | + "24/04/03 07:59:19 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout does not exist\n", |
| 537 | + "24/04/03 07:59:19 WARN HiveConf: HiveConf of name hive.stats.retries.wait does not exist\n", |
| 538 | + "[Stage 4:> (0 + 1) / 1]\r" |
| 539 | + ] |
| 540 | + }, |
529 | 541 | { |
530 | 542 | "name": "stdout", |
531 | 543 | "output_type": "stream", |
|
557 | 569 | "only showing top 20 rows\n", |
558 | 570 | "\n" |
559 | 571 | ] |
| 572 | + }, |
| 573 | + { |
| 574 | + "name": "stderr", |
| 575 | + "output_type": "stream", |
| 576 | + "text": [ |
| 577 | + " \r" |
| 578 | + ] |
560 | 579 | } |
561 | 580 | ], |
562 | 581 | "source": [ |
|
571 | 590 | }, |
572 | 591 | { |
573 | 592 | "cell_type": "code", |
574 | | - "execution_count": 153, |
| 593 | + "execution_count": 5, |
575 | 594 | "metadata": {}, |
576 | 595 | "outputs": [ |
577 | 596 | { |
|
615 | 634 | }, |
616 | 635 | { |
617 | 636 | "cell_type": "code", |
618 | | - "execution_count": 132, |
| 637 | + "execution_count": 6, |
619 | 638 | "metadata": {}, |
620 | 639 | "outputs": [ |
621 | 640 | { |
|
730 | 749 | }, |
731 | 750 | { |
732 | 751 | "cell_type": "code", |
733 | | - "execution_count": 157, |
| 752 | + "execution_count": 7, |
734 | 753 | "metadata": {}, |
735 | 754 | "outputs": [ |
| 755 | + { |
| 756 | + "name": "stderr", |
| 757 | + "output_type": "stream", |
| 758 | + "text": [ |
| 759 | + "[Stage 17:> (0 + 1) / 1]\r" |
| 760 | + ] |
| 761 | + }, |
736 | 762 | { |
737 | 763 | "name": "stdout", |
738 | 764 | "output_type": "stream", |
|
755 | 781 | "+-----------------+---------------+---------------+---------------------------------------+---------------------------------------------+----------------------------------------------+--------------------------------------------+-----------------------------------------+--------------------------------------------------+---------------------------------------------------+---------------------------------------------+--------------------------------------------+\n", |
756 | 782 | "\n" |
757 | 783 | ] |
| 784 | + }, |
| 785 | + { |
| 786 | + "name": "stderr", |
| 787 | + "output_type": "stream", |
| 788 | + "text": [ |
| 789 | + " \r" |
| 790 | + ] |
758 | 791 | } |
759 | 792 | ], |
760 | 793 | "source": [ |
|
773 | 806 | " consumer_dim.Division_Code_Key, \n", |
774 | 807 | " consumer_dim.Region_Code_Key,\n", |
775 | 808 | " # consumer_dim.Affiliate_Code_Key,\n", |
776 | | - " consumer_dim.Market_Code_Key\n", |
| 809 | + " consumer_dim.Market_Code_Key,\n", |
| 810 | + " consumer_dim.CFDM_Channel_Code_Key\n", |
777 | 811 | " )\n", |
778 | 812 | " .agg(\n", |
779 | 813 | " F.countDistinct(consumer_dim.Consumer_Key).alias('Consumer Aggregate Total Consumer Count'),\n", |
|
0 commit comments