Skip to content

Commit 083fbde

Browse files
authored
Merge branch 'datahub-project:master' into master
2 parents a81e14d + 20409fd commit 083fbde

File tree

21 files changed

+358
-34
lines changed

21 files changed

+358
-34
lines changed

.github/workflows/pr-labeler.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ jobs:
5151
"acrylJonny",
5252
"chakru-r",
5353
"brock-acryl",
54-
"mminichino"
54+
"mminichino",
55+
"jayacryl"
5556
]'),
5657
github.actor
5758
)

datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/utils.ts

+5
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,11 @@ export function pathMatchesNewPath(fieldPathA?: string | null, fieldPathB?: stri
6969
return fieldPathA === fieldPathB || fieldPathA === downgradeV2FieldPath(fieldPathB);
7070
}
7171

72+
// should use pathMatchesExact when rendering editable info so the user edits the correct field
73+
export function pathMatchesExact(fieldPathA?: string | null, fieldPathB?: string | null) {
74+
return fieldPathA === fieldPathB;
75+
}
76+
7277
// group schema fields by fieldPath and grouping for hierarchy in schema table
7378
export function groupByFieldPath(
7479
schemaRows?: Array<SchemaField>,

datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/AboutFieldTab.tsx

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import {
1010
UsageQueryResult,
1111
} from '../../../../../../../../types.generated';
1212
import { useMutationUrn } from '../../../../../../../entity/shared/EntityContext';
13-
import { pathMatchesNewPath } from '../../../../../../dataset/profile/schema/utils/utils';
13+
import { pathMatchesExact } from '../../../../../../dataset/profile/schema/utils/utils';
1414
import NotesSection from '../../../../../notes/NotesSection';
1515
import FieldDescription from './FieldDescription';
1616
import { FieldDetails } from './FieldDetails';
@@ -55,7 +55,7 @@ export function AboutFieldTab({ properties }: AboutFieldTabProps) {
5555
: undefined;
5656
const editableFieldInfo = properties.editableSchemaMetadata?.editableSchemaFieldInfo.find(
5757
(candidateEditableFieldInfo) =>
58-
pathMatchesNewPath(candidateEditableFieldInfo.fieldPath, expandedField?.fieldPath),
58+
pathMatchesExact(candidateEditableFieldInfo.fieldPath, expandedField?.fieldPath),
5959
);
6060

6161
const notes = properties.notes?.sort((a, b) => moment(b.lastModified.time).diff(moment(a.lastModified.time))) || [];

datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/useDescriptionRenderer.tsx

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import React, { useState } from 'react';
22

33
import { EditableSchemaMetadata, SchemaField, SubResourceType } from '../../../../../../../types.generated';
44
import DescriptionField from '../../../../../dataset/profile/schema/components/SchemaDescriptionField';
5-
import { pathMatchesNewPath } from '../../../../../dataset/profile/schema/utils/utils';
5+
import { pathMatchesExact } from '../../../../../dataset/profile/schema/utils/utils';
66
import { useUpdateDescriptionMutation } from '../../../../../../../graphql/mutations.generated';
77
import { useMutationUrn, useRefetch } from '../../../../../../entity/shared/EntityContext';
88
import { useSchemaRefetch } from '../SchemaContext';
@@ -31,7 +31,7 @@ export default function useDescriptionRenderer(
3131

3232
return (description: string | undefined, record: SchemaField, index: number): JSX.Element => {
3333
const editableFieldInfo = editableSchemaMetadata?.editableSchemaFieldInfo.find((candidateEditableFieldInfo) =>
34-
pathMatchesNewPath(candidateEditableFieldInfo.fieldPath, record.fieldPath),
34+
pathMatchesExact(candidateEditableFieldInfo.fieldPath, record.fieldPath),
3535
);
3636
const { schemaFieldEntity } = record;
3737
const { displayedDescription, sanitizedDescription, isPropagated, sourceDetail } = extractFieldDescription(

datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/useExtractFieldDescriptionInfo.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { pathMatchesNewPath } from '@src/app/entityV2/dataset/profile/schema/utils/utils';
1+
import { pathMatchesExact } from '@src/app/entityV2/dataset/profile/schema/utils/utils';
22
import { EditableSchemaMetadata, SchemaField } from '@src/types.generated';
33
import { getFieldDescriptionDetails } from './getFieldDescriptionDetails';
44
import { sanitizeRichText } from '../../../Documentation/components/editor/utils';
@@ -8,7 +8,7 @@ export default function useExtractFieldDescriptionInfo(
88
) {
99
return (record: SchemaField, description: string | undefined | null = null) => {
1010
const editableFieldInfoB = editableSchemaMetadata?.editableSchemaFieldInfo.find((candidateEditableFieldInfo) =>
11-
pathMatchesNewPath(candidateEditableFieldInfo.fieldPath, record.fieldPath),
11+
pathMatchesExact(candidateEditableFieldInfo.fieldPath, record.fieldPath),
1212
);
1313
const { displayedDescription, isPropagated, sourceDetail } = getFieldDescriptionDetails({
1414
schemaFieldEntity: record.schemaFieldEntity,

datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/useExtractFieldGlossaryTermsInfo.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
import { pathMatchesNewPath } from '@src/app/entityV2/dataset/profile/schema/utils/utils';
1+
import { pathMatchesExact } from '@src/app/entityV2/dataset/profile/schema/utils/utils';
22
import { EditableSchemaMetadata, GlossaryTerms, SchemaField } from '@src/types.generated';
33

44
export default function useExtractFieldGlossaryTermsInfo(
55
editableSchemaMetadata: EditableSchemaMetadata | null | undefined,
66
) {
77
return (record: SchemaField, defaultUneditableTerms: GlossaryTerms | null = null) => {
88
const editableTerms = editableSchemaMetadata?.editableSchemaFieldInfo.find((candidateEditableFieldInfo) =>
9-
pathMatchesNewPath(candidateEditableFieldInfo.fieldPath, record.fieldPath),
9+
pathMatchesExact(candidateEditableFieldInfo.fieldPath, record.fieldPath),
1010
)?.glossaryTerms;
1111

1212
const uneditableTerms = defaultUneditableTerms || record?.glossaryTerms;

datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/useExtractFieldTagsInfo.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
import { pathMatchesNewPath } from '@src/app/entityV2/dataset/profile/schema/utils/utils';
1+
import { pathMatchesExact } from '@src/app/entityV2/dataset/profile/schema/utils/utils';
22
import { EditableSchemaMetadata, GlobalTags, SchemaField } from '@src/types.generated';
33

44
export default function useExtractFieldTagsInfo(editableSchemaMetadata: EditableSchemaMetadata | null | undefined) {
55
return (record: SchemaField, defaultUneditableTags: GlobalTags | null = null) => {
66
const editableTags = editableSchemaMetadata?.editableSchemaFieldInfo.find((candidateEditableFieldInfo) =>
7-
pathMatchesNewPath(candidateEditableFieldInfo.fieldPath, record.fieldPath),
7+
pathMatchesExact(candidateEditableFieldInfo.fieldPath, record.fieldPath),
88
)?.globalTags;
99

1010
const uneditableTags = defaultUneditableTags || record?.globalTags;

datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/useTagsAndTermsRendererFeatureTable.tsx

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import React from 'react';
22
import { EditableSchemaMetadata, EntityType, GlobalTags, SchemaField } from '../../../../../../../types.generated';
33
import TagTermGroup from '../../../../../../sharedV2/tags/TagTermGroup';
4-
import { pathMatchesNewPath } from '../../../../../dataset/profile/schema/utils/utils';
4+
import { pathMatchesExact } from '../../../../../dataset/profile/schema/utils/utils';
55
import { useEntityData, useRefetch } from '../../../../../../entity/shared/EntityContext';
66

77
export default function useTagsAndTermsRendererFeatureTable(
@@ -15,7 +15,7 @@ export default function useTagsAndTermsRendererFeatureTable(
1515

1616
const tagAndTermRender = (tags: GlobalTags, record: SchemaField, rowIndex: number | undefined) => {
1717
const relevantEditableFieldInfo = editableSchemaMetadata?.editableSchemaFieldInfo.find(
18-
(candidateEditableFieldInfo) => pathMatchesNewPath(candidateEditableFieldInfo.fieldPath, record.fieldPath),
18+
(candidateEditableFieldInfo) => pathMatchesExact(candidateEditableFieldInfo.fieldPath, record.fieldPath),
1919
);
2020

2121
return (

metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
### Prerequisities
22

3-
Notice of breaking change: in the latest version of the DynamoDB connector, `aws_region` is now a required configuration. The connector will no longer loop through all AWS regions; instead, it will only use the region passed into the recipe configuration.
3+
Notice of breaking change: Starting v0.13.3, `aws_region` is now a required configuration for DynamoDB Connector. The connector will no longer loop through all AWS regions; instead, it will only use the region passed into the recipe configuration.
44

55
In order to execute this source, you need to attach the `AmazonDynamoDBReadOnlyAccess` policy to a user in your AWS account. Then create an API access key and secret for the user.
66

@@ -24,3 +24,10 @@ For a user to be able to create API access key, it needs the following access ke
2424
]
2525
}
2626
```
27+
28+
### Concept Mapping
29+
30+
| Source Concept | DataHub Concept | Notes |
31+
| -------------- | --------------------------------------------------------- | ----- |
32+
| `"dynamodb"` | [Data Platform](../../metamodel/entities/dataPlatform.md) | |
33+
| DynamoDB Table | [Dataset](../../metamodel/entities/dataset.md) | |

metadata-ingestion/src/datahub/cli/iceberg_cli.py

+30-6
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from datahub.configuration.common import GraphError
1515
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
1616
from datahub.metadata.schema_classes import SystemMetadataClass
17+
from datahub.telemetry import telemetry
1718

1819
logger = logging.getLogger(__name__)
1920

@@ -161,6 +162,7 @@ def validate_warehouse(data_root: str) -> None:
161162
type=int,
162163
help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
163164
)
165+
@telemetry.with_telemetry(capture_kwargs=["duration_seconds"])
164166
def create(
165167
warehouse: str,
166168
description: Optional[str],
@@ -313,6 +315,7 @@ def create(
313315
type=int,
314316
help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
315317
)
318+
@telemetry.with_telemetry(capture_kwargs=["duration_seconds"])
316319
def update(
317320
warehouse: str,
318321
data_root: str,
@@ -398,6 +401,7 @@ def update(
398401

399402

400403
@iceberg.command()
404+
@telemetry.with_telemetry()
401405
def list() -> None:
402406
"""
403407
List iceberg warehouses
@@ -413,6 +417,7 @@ def list() -> None:
413417
@click.option(
414418
"-w", "--warehouse", required=True, type=str, help="The name of the warehouse"
415419
)
420+
@telemetry.with_telemetry()
416421
def get(warehouse: str) -> None:
417422
"""Fetches the details of the specified iceberg warehouse"""
418423
client = get_default_graph()
@@ -442,6 +447,7 @@ def get(warehouse: str) -> None:
442447
is_flag=True,
443448
help="force the delete if set without confirmation",
444449
)
450+
@telemetry.with_telemetry(capture_kwargs=["dry_run", "force"])
445451
def delete(warehouse: str, dry_run: bool, force: bool) -> None:
446452
"""
447453
Delete warehouse
@@ -470,11 +476,19 @@ def delete(warehouse: str, dry_run: bool, force: bool) -> None:
470476
# Do we really need this double-check?
471477
if "__typename" in entity and "urn" in entity:
472478
if entity["__typename"] in ["Container", "Dataset"]:
479+
# add the Platform Resource URN to also be deleted for each dataset.
480+
# This is not user visible, so no need to show a name to the user and include it in the count. Each
481+
# instance corresponds to a dataset whose name is shown.
482+
if entity["__typename"] == "Dataset":
483+
resource_urn = platform_resource_urn(
484+
entity["properties"]["qualifiedName"]
485+
)
486+
urns_to_delete.append(resource_urn)
487+
473488
urns_to_delete.append(entity["urn"])
474489
resource_names_to_be_deleted.append(
475490
entity.get("name", entity.get("urn"))
476491
)
477-
# TODO: PlatformResource associated with datasets need to be deleted.
478492

479493
if dry_run:
480494
click.echo(
@@ -485,25 +499,32 @@ def delete(warehouse: str, dry_run: bool, force: bool) -> None:
485499
else:
486500
if not force:
487501
click.confirm(
488-
f"This will delete {warehouse} warehouse, credentials, and {len(urns_to_delete)} datasets and namespaces from DataHub. Do you want to continue?",
502+
f"This will delete {warehouse} warehouse, credentials, and {len(resource_names_to_be_deleted)} datasets and namespaces from DataHub. Do you want to continue?",
489503
abort=True,
490504
)
491-
client.hard_delete_entity(urn)
492-
client.hard_delete_entity(warehouse_aspect.clientId)
493-
client.hard_delete_entity(warehouse_aspect.clientSecret)
494505

506+
# Delete the resources in the warehouse first, so that in case it is interrupted, the warehouse itself is
507+
# still available to enumerate the resources in it that are not yet deleted.
495508
for urn_to_delete in urns_to_delete:
496509
client.hard_delete_entity(urn_to_delete)
497510

511+
client.hard_delete_entity(urn)
512+
client.hard_delete_entity(warehouse_aspect.clientId)
513+
client.hard_delete_entity(warehouse_aspect.clientSecret)
514+
498515
click.echo(
499-
f"✅ Successfully deleted iceberg warehouse {warehouse} and associated credentials, {len(urns_to_delete)} datasets and namespaces"
516+
f"✅ Successfully deleted iceberg warehouse {warehouse} and associated credentials, {len(resource_names_to_be_deleted)} datasets and namespaces"
500517
)
501518

502519

503520
def iceberg_data_platform_instance_urn(warehouse: str) -> str:
504521
return f"urn:li:dataPlatformInstance:({iceberg_data_platform()},{warehouse})"
505522

506523

524+
def platform_resource_urn(dataset_name: str) -> str:
525+
return f"urn:li:platformResource:iceberg.{dataset_name}"
526+
527+
507528
def iceberg_data_platform() -> str:
508529
return "urn:li:dataPlatform:iceberg"
509530

@@ -677,6 +698,9 @@ def get_related_entities_for_platform_instance(
677698
... on Dataset {
678699
urn
679700
name
701+
properties{
702+
qualifiedName
703+
}
680704
}
681705
}
682706
}

metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py

+5
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,11 @@ class DBTCommonConfig(
357357
default=True,
358358
description="When enabled, includes the compiled code in the emitted metadata.",
359359
)
360+
include_database_name: bool = Field(
361+
default=True,
362+
description="Whether to add database name to the table urn. "
363+
"Set to False to skip it for engines like AWS Athena where it's not required.",
364+
)
360365

361366
@validator("target_platform")
362367
def validate_target_platform_value(cls, target_platform: str) -> str:

metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ def extract_dbt_entities(
167167
use_identifiers: bool,
168168
tag_prefix: str,
169169
only_include_if_in_catalog: bool,
170+
include_database_name: bool,
170171
report: DBTSourceReport,
171172
) -> List[DBTNode]:
172173
sources_by_id = {x["unique_id"]: x for x in sources_results}
@@ -267,7 +268,7 @@ def extract_dbt_entities(
267268
dbt_name=key,
268269
dbt_adapter=manifest_adapter,
269270
dbt_package_name=manifest_node.get("package_name"),
270-
database=manifest_node["database"],
271+
database=manifest_node["database"] if include_database_name else None,
271272
schema=manifest_node["schema"],
272273
name=name,
273274
alias=manifest_node.get("alias"),
@@ -543,14 +544,15 @@ def loadManifestAndCatalog(
543544
all_catalog_entities = {**catalog_nodes, **catalog_sources}
544545

545546
nodes = extract_dbt_entities(
546-
all_manifest_entities,
547-
all_catalog_entities,
548-
sources_results,
549-
manifest_adapter,
550-
self.config.use_identifiers,
551-
self.config.tag_prefix,
552-
self.config.only_include_if_in_catalog,
553-
self.report,
547+
all_manifest_entities=all_manifest_entities,
548+
all_catalog_entities=all_catalog_entities,
549+
sources_results=sources_results,
550+
manifest_adapter=manifest_adapter,
551+
use_identifiers=self.config.use_identifiers,
552+
tag_prefix=self.config.tag_prefix,
553+
only_include_if_in_catalog=self.config.only_include_if_in_catalog,
554+
include_database_name=self.config.include_database_name,
555+
report=self.report,
554556
)
555557

556558
return (

metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py

+5
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,10 @@ def report_dropped(self, name: str) -> None:
165165
SourceCapability.PLATFORM_INSTANCE,
166166
"By default, platform_instance will use the AWS account id",
167167
)
168+
@capability(
169+
SourceCapability.CLASSIFICATION,
170+
"Optionally enabled via `classification.enabled`",
171+
)
168172
class DynamoDBSource(StatefulIngestionSourceBase):
169173
"""
170174
This plugin extracts the following:
@@ -244,6 +248,7 @@ def _process_table(
244248
name=dataset_name,
245249
)
246250
dataset_properties = DatasetPropertiesClass(
251+
name=table_name,
247252
tags=[],
248253
customProperties={
249254
"table.arn": table_info["TableArn"],

metadata-ingestion/tests/integration/dynamodb/dynamodb_default_platform_instance_mces_golden.json

+3-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@
3636
},
3737
{
3838
"fieldPath": "city",
39-
"nullable": true,
39+
"nullable": false,
40+
"description": "Sort Key",
4041
"type": {
4142
"type": {
4243
"com.linkedin.schema.StringType": {}
@@ -175,6 +176,7 @@
175176
"table.arn": "arn:aws:dynamodb:us-west-2:123456789012:table/Location",
176177
"table.totalItems": "1"
177178
},
179+
"name": "Location",
178180
"tags": []
179181
}
180182
},

metadata-ingestion/tests/integration/dynamodb/dynamodb_platform_instance_mces_golden.json

+3-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@
3636
},
3737
{
3838
"fieldPath": "city",
39-
"nullable": true,
39+
"nullable": false,
40+
"description": "Sort Key",
4041
"type": {
4142
"type": {
4243
"com.linkedin.schema.StringType": {}
@@ -186,6 +187,7 @@
186187
"table.arn": "arn:aws:dynamodb:us-west-2:123456789012:table/Location",
187188
"table.totalItems": "1"
188189
},
190+
"name": "Location",
189191
"tags": []
190192
}
191193
},

metadata-ingestion/tests/integration/dynamodb/test_dynamodb.py

+2
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,11 @@ def test_dynamodb(pytestconfig, tmp_path):
2929
TableName="Location",
3030
KeySchema=[
3131
{"AttributeName": "partitionKey", "KeyType": "HASH"},
32+
{"AttributeName": "city", "KeyType": "RANGE"},
3233
],
3334
AttributeDefinitions=[
3435
{"AttributeName": "partitionKey", "AttributeType": "S"},
36+
{"AttributeName": "city", "AttributeType": "S"},
3537
],
3638
ProvisionedThroughput={"ReadCapacityUnits": 10, "WriteCapacityUnits": 10},
3739
)

0 commit comments

Comments
 (0)