Skip to content

Commit 49bea6f

Browse files
authored
Merge pull request #3 from adidas/release/5.3.0
Integrate release/5.3.0 changes into master
2 parents f2c0e1b + df4d3b7 commit 49bea6f

File tree

80 files changed

+1401
-904
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+1401
-904
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1+
*.yml
12
*.iml
23
*.pyc
34
config/credentials/*
5+
config/keytab/*
6+
config/aws/*
47
.idea/*
58
.cache/*
69
.pytest_cache/*

Dockerfile

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,18 @@ RUN yum update -y && \
1515
which \
1616
gcc-c++ \
1717
python3 \
18-
python3-devel \
19-
unixODBC-devel
20-
21-
# Installing Oracle client
22-
RUN wget -q -O /etc/yum.repos.d/public-yum-ol7.repo http://yum.oracle.com/public-yum-ol7.repo && \
23-
wget -q -O /tmp/RPM-GPG-KEY-oracle-ol7 http://yum.oracle.com/RPM-GPG-KEY-oracle-ol7 && \
24-
rpm --import /tmp/RPM-GPG-KEY-oracle-ol7 && \
25-
rm /tmp/RPM-GPG-KEY-oracle-ol7 && \
26-
yum install -y yum-utils && \
27-
yum-config-manager --enable ol7_oracle_instantclient && \
28-
yum -y install oracle-instantclient18.3-basiclite && \
29-
yum clean all
18+
python3-devel
3019

3120
# Installing Python dependencies
3221
COPY requirements.txt /tmp/m3d-api-requirements.txt
3322
RUN pip3 install -r /tmp/m3d-api-requirements.txt --ignore-installed chardet && \
3423
pip3 install awscli==1.16.96 && \
3524
rm /tmp/m3d-api-requirements.txt
3625

37-
# Setting environment variables
38-
ENV LD_LIBRARY_PATH=/usr/lib/oracle/18.3/client64/lib:$LD_LIBRARY_PATH
26+
RUN groupadd -r m3d && \
27+
useradd -r -g m3d m3d && \
28+
mkdir -p /home/m3d && \
29+
chown m3d:m3d /home/m3d
30+
USER m3d
3931

4032
CMD ["/bin/bash"]

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
M3D API
22
=======
33

4-
![](static/images/m3d_logo.png)
4+
![M3D logo](/static/images/m3d_logo.png)
55

66
**M3D** stands for _Metadata Driven Development_ and is a cloud and platform agnostic framework for the automated creation, management and governance of metadata and data flows from multiple source to multiple target systems. The main features and design goals of M3D are:
77

@@ -41,7 +41,7 @@ These are the layers defined in the M3D architecture:
4141

4242
Graphically, the architecture of M3D looks like this:
4343

44-
![](static/images/m3d_layers.svg)
44+
![M3D Architecture](/static/images/m3d_layers.png)
4545

4646

4747
### AWS Prerequisites for Out of the Box Usage
@@ -117,7 +117,6 @@ For advanced users, you may use [conda](https://conda.io) for installing M3D by
117117
"upload": "upload",
118118
"pushdown": "pushdown",
119119
"aws": "aws",
120-
"hdfs": "hdfs",
121120
"file": "file"
122121
},
123122
"data_dict_delimiter": "|"
@@ -173,6 +172,7 @@ The steps are the following:
173172
-destination_database emr_database \
174173
-destination_environment test \
175174
-destination_table table_name \
175+
-destination_table_location_prefix table_location_prefix \
176176
-emr_cluster_id id-of-started-cluster
177177
```
178178

common.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,10 @@ function exec_command_within_container() {
8181

8282
if [[ -z "$LOCAL_IS_INTERACTIVE" ]]; then
8383
echo "Executing command within container: $LOCAL_CMD"
84-
docker exec "$LOCAL_CONTAINER_INSTANCE_NAME" bash -c "cd /root/workspace/${LOCAL_PROJECT_NAME} && ${LOCAL_CMD}"
84+
docker exec "$LOCAL_CONTAINER_INSTANCE_NAME" bash -c "cd /m3d/workspace/${LOCAL_PROJECT_NAME} && ${LOCAL_CMD}"
8585
else
8686
echo "Executing command within container in interactive mode: $LOCAL_CMD"
87-
docker exec -it "$LOCAL_CONTAINER_INSTANCE_NAME" bash -c "cd /root/workspace/${LOCAL_PROJECT_NAME} && ${LOCAL_CMD}"
87+
docker exec -it "$LOCAL_CONTAINER_INSTANCE_NAME" bash -c "cd /m3d/workspace/${LOCAL_PROJECT_NAME} && ${LOCAL_CMD}"
8888
fi
8989
}
9090

config/m3d/config.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
"tags": {
1515
"full_load": "full_load",
1616
"delta_load": "delta_load",
17+
"delta_lake_load": "delta_lake_load",
1718
"append_load": "append_load",
19+
"decom_gzip": "gzip_decompressor",
1820
"table_suffix_stage": "_stg1",
1921
"table_suffix_swap": "_swap",
2022
"config": "config",
@@ -25,7 +27,6 @@
2527
"upload": "upload",
2628
"pushdown": "pushdown",
2729
"aws": "aws",
28-
"hdfs": "hdfs",
2930
"file": "file"
3031
},
3132
"data_dict_delimiter": "|"

config/system/scon-bdp-emr_test.json

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -75,18 +75,17 @@
7575
"api_long_timeout_seconds": 43200,
7676
"aws_region": "eu-west-1",
7777
"packages_to_deploy": [
78-
"config",
79-
"exceptions",
80-
"hadoop",
81-
"hadoop/core",
82-
"hadoop/emr",
83-
"hadoop/dataset",
84-
"hadoop/algorithm",
85-
"hadoop/load",
86-
"system",
87-
"oracle",
88-
"util"
89-
],
78+
"config",
79+
"exceptions",
80+
"hadoop",
81+
"hadoop/core",
82+
"hadoop/emr",
83+
"hadoop/dataset",
84+
"hadoop/algorithm",
85+
"hadoop/load",
86+
"system",
87+
"util"
88+
],
9089
"configs_to_deploy": [
9190
"api",
9291
"m3d",
@@ -96,6 +95,7 @@
9695
"s3_dir_base": "/bdp/",
9796
"subdir": {
9897
"data": "data/",
98+
"delta_table": "delta_table/",
9999
"error": "error/",
100100
"log": "log/",
101101
"work": "work/",
@@ -107,6 +107,7 @@
107107
"loading": "loading/",
108108
"full_load": "full_load/",
109109
"delta_load": "delta_load/",
110+
"delta_lake_load": "delta_lake_load/",
110111
"append_load": "append_load/",
111112
"black_whole": "black_whole/",
112113
"credentials": "credentials/",

dev-env.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ CONTAINER_IMAGE_NAME="$PROJECT_NAME"
88

99
PARAM_WORKSPACE=( "workspace" "w" "m3d-engine code directory (must be the same within the container life-cycle)")
1010
PARAM_TEST_TYPE=( "test-type" "t" "type of tests to run, possible values are [unit|integration|all]")
11-
PARAM_TEST_MARK=( "test-mark" "m" "pytest mark for filtering tests, possible values are [bdp|emr|algo|oracle]")
11+
PARAM_TEST_MARK=( "test-mark" "m" "pytest mark for filtering tests, possible values are [bdp|emr|algo]")
1212
OPTION_HELP=( "help" "h" "show help message for the command")
1313
OPTION_INTERACTIVE=( "interactive" "i" "use interactive mode and allocate pseudo-TTY when executing a command inside the container")
1414

@@ -152,7 +152,7 @@ elif [[ "$ACTION" == "$ARG_ACTION_CONTAINER_RUN" ]]; then
152152
echo "Running the container $CONTAINER_INSTANCE_NAME ..."
153153
validate_args_are_empty "$HELP_STRING" "${OTHER_ARGS[@]}"
154154

155-
docker run -t -d --name "$CONTAINER_INSTANCE_NAME" -v "${WORKSPACE}:/root/workspace/${PROJECT_NAME}" "$CONTAINER_IMAGE_NAME"
155+
docker run -t -d --name "$CONTAINER_INSTANCE_NAME" -v "${WORKSPACE}:/m3d/workspace/${PROJECT_NAME}" "$CONTAINER_IMAGE_NAME"
156156

157157
# clean pyc-files in the project directory
158158
elif [[ "$ACTION" == "$ARG_ACTION_PROJECT_CLEAN" ]]; then
@@ -199,7 +199,7 @@ elif [[ "$ACTION" == "$ARG_ACTION_PROJECT_TEST" ]]; then
199199
validate_args_are_empty "$HELP_STRING" "${OTHER_ARGS[@]}"
200200

201201
AVAILABLE_TEST_TYPES=("all" "unit" "integration")
202-
AVAILABLE_TEST_MARKS=("bdp" "emr" "algo" "oracle")
202+
AVAILABLE_TEST_MARKS=("bdp" "emr" "algo")
203203

204204
if [[ -z "$TEST_TYPE" ]]; then
205205
RUN_TESTS_CMD="python3 ./test/test_runner.py all"

m3d/__init__.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,2 @@
1-
# __init__.py file in a directory indicates to the Python interpreter that the directory should be treated
2-
# like a Python package
31
# flake8: noqa
4-
52
from .m3d import M3D

m3d/config/config_service.py

Lines changed: 2 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,14 @@ class ConfigService(object):
77
class Prefixes(object):
88
ACON = "acon"
99
SCON = "scon"
10-
DDIC = "ddic"
1110
TCONX = "tconx"
1211

1312
class Protocols(object):
14-
S3A = "s3a://"
1513
S3 = "s3://"
1614

1715
class Extensions(object):
1816
JSON = ".json"
19-
SQL = ".sql"
2017
HQL = ".hql"
21-
SH = ".sh"
2218

2319
def __init__(self, config):
2420
# store parameters
@@ -43,7 +39,9 @@ def __init__(self, config):
4339
# prefixes for algorithm configuration files
4440
self.tag_full_load = params["tags"]["full_load"]
4541
self.tag_delta_load = params["tags"]["delta_load"]
42+
self.tag_delta_lake_load = params["tags"]["delta_lake_load"]
4643
self.tag_append_load = params["tags"]["append_load"]
44+
self.tag_decom_gzip = params["tags"]["decom_gzip"]
4745

4846
# suffixes for staging and swap tables
4947
self.tag_table_suffix_stage = params["tags"]["table_suffix_stage"]
@@ -60,7 +58,6 @@ def __init__(self, config):
6058
self.tag_aws = params["tags"]["aws"]
6159

6260
# protocol tags and required constants for them
63-
# TODO: Remove hdfs tag from config
6461
self.tag_file = params["tags"]["file"]
6562

6663
def get_scon_path(self, source_system, database):
@@ -181,25 +178,3 @@ def get_acon_path(self, destination_database, destination_environment, algorithm
181178
)
182179

183180
return base_path
184-
185-
def get_ddic_path(self, source_system, src_database, source_schema, source_table):
186-
"""
187-
Return ddic path for upload system export
188-
189-
:param source_system source system code
190-
:param src_database source database code
191-
:param source_schema upload schema code
192-
:param source_table: upload table code
193-
:return: ddic file for upload system export
194-
"""
195-
filename = "-".join([
196-
ConfigService.Prefixes.DDIC,
197-
source_system,
198-
src_database,
199-
source_schema,
200-
source_table
201-
]) + ConfigService.Extensions.CSV
202-
203-
base_path = os.path.join(self.tag_config, self.tag_table, self.tag_upload, source_system, filename)
204-
205-
return base_path

m3d/exceptions/m3d_exceptions.py

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def __init__(self, load_type, message=None):
7575
class M3DUnsupportedSystemException(M3DUnsupportedDataTypeException):
7676
"""
7777
This exception is thrown when the requested service does not support the system/technology.
78-
For example, some supported systems/technologies are Hive, Oracle, Exasol, etc.
78+
For example: Hive...
7979
"""
8080

8181
def __init__(self, system, message=None):
@@ -89,8 +89,7 @@ def __init__(self, system, message=None):
8989
class M3DUnsupportedStorageException(M3DUnsupportedDataTypeException):
9090
"""
9191
This exception is thrown when the requested service does not support the system/technology.
92-
For example, it can be raised while calling the hdfs_table.HDFSTable().drop_tables()
93-
over a non HDFS storage.
92+
For example, it can be raised while calling the drop_tables over a non HDFS compatible storage.
9493
"""
9594

9695
def __init__(self, storage, message=None):
@@ -190,17 +189,3 @@ def __init__(self, message=None):
190189
message = "Error integration with EMR API"
191190
self.message = message
192191
super(M3DEMRApiException, self).__init__(message)
193-
194-
195-
class M3DReconciliationDeviationException(M3DIOException):
196-
"""
197-
A general exception to be thrown if at least one reconciliation task for a table
198-
returned a deviation from the expected result
199-
The programmer is responsible to throw an appropriate message.
200-
"""
201-
202-
def __init__(self, message=None):
203-
if message is None:
204-
message = "Reconciliation failed."
205-
self.message = message
206-
super(M3DReconciliationDeviationException, self).__init__(message)

0 commit comments

Comments
 (0)