Skip to content

Commit 66ad38d

Browse files
Project updated.
1 parent 1a112d2 commit 66ad38d

File tree

158 files changed

+1362
-322
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

158 files changed

+1362
-322
lines changed

pom.xml

+76-48
Original file line numberDiff line numberDiff line change
@@ -36,63 +36,91 @@
3636

3737
<cdap.version>6.2.0</cdap.version>
3838
<gson.version>2.3</gson.version>
39+
<http.version>4.5.2</http.version>
3940
<plugin.version>2.4.0</plugin.version>
4041
<spark.version>2.1.3</spark.version>
4142

4243
</properties>
4344

4445
<dependencies>
4546

46-
<dependency>
47-
<groupId>io.cdap.cdap</groupId>
48-
<artifactId>cdap-etl-api</artifactId>
49-
<version>${cdap.version}</version>
50-
<scope>provided</scope>
51-
<exclusions>
52-
<exclusion>
53-
<groupId>com.google.code.gson</groupId>
54-
<artifactId>gson</artifactId>
55-
</exclusion>
56-
</exclusions>
57-
</dependency>
58-
59-
<dependency>
60-
<groupId>io.cdap.cdap</groupId>
61-
<artifactId>cdap-etl-api-spark</artifactId>
62-
<version>${cdap.version}</version>
63-
<scope>provided</scope>
64-
</dependency>
65-
66-
<dependency>
67-
<groupId>io.cdap.cdap</groupId>
68-
<artifactId>cdap-api-spark2_2.11</artifactId>
69-
<version>${cdap.version}</version>
70-
<scope>provided</scope>
71-
</dependency>
47+
<!-- CDAP COMMON
48+
-->
49+
<dependency>
50+
<groupId>io.cdap.cdap</groupId>
51+
<artifactId>cdap-common</artifactId>
52+
<version>${cdap.version}</version>
53+
<scope>provided</scope>
54+
<exclusions>
55+
<exclusion>
56+
<groupId>org.apache.httpcomponents</groupId>
57+
<artifactId>httpclient</artifactId>
58+
</exclusion>
59+
</exclusions>
60+
</dependency>
61+
62+
<!-- CDAP ETL
63+
-->
64+
<dependency>
65+
<groupId>io.cdap.cdap</groupId>
66+
<artifactId>cdap-etl-api</artifactId>
67+
<version>${cdap.version}</version>
68+
<scope>provided</scope>
69+
<exclusions>
70+
<exclusion>
71+
<groupId>com.google.code.gson</groupId>
72+
<artifactId>gson</artifactId>
73+
</exclusion>
74+
</exclusions>
75+
</dependency>
76+
77+
<!-- CDAP SPARK
78+
-->
79+
<dependency>
80+
<groupId>io.cdap.cdap</groupId>
81+
<artifactId>cdap-etl-api-spark</artifactId>
82+
<version>${cdap.version}</version>
83+
<scope>provided</scope>
84+
</dependency>
85+
86+
<dependency>
87+
<groupId>io.cdap.cdap</groupId>
88+
<artifactId>cdap-api-spark2_2.11</artifactId>
89+
<version>${cdap.version}</version>
90+
<scope>provided</scope>
91+
</dependency>
7292

73-
<!-- hydrator dependencies -->
74-
75-
<dependency>
76-
<groupId>io.cdap.plugin</groupId>
77-
<artifactId>hydrator-common</artifactId>
78-
<version>${plugin.version}</version>
79-
</dependency>
80-
81-
<!-- apache spark dependencies -->
93+
<!-- CDAP HYDRATOR
94+
-->
95+
<dependency>
96+
<groupId>io.cdap.plugin</groupId>
97+
<artifactId>hydrator-common</artifactId>
98+
<version>${plugin.version}</version>
99+
</dependency>
82100

83-
<dependency>
84-
<groupId>org.apache.spark</groupId>
85-
<artifactId>spark-sql_2.11</artifactId>
86-
<version>${spark.version}</version>
87-
</dependency>
88-
89-
<!-- google gson -->
90-
<dependency>
91-
<groupId>com.google.code.gson</groupId>
92-
<artifactId>gson</artifactId>
93-
<version>${gson.version}</version>
94-
</dependency>
95-
101+
<!-- SPARK SQL
102+
-->
103+
<dependency>
104+
<groupId>org.apache.spark</groupId>
105+
<artifactId>spark-sql_2.11</artifactId>
106+
<version>${spark.version}</version>
107+
</dependency>
108+
109+
<!-- GOOGLE GSON
110+
-->
111+
<dependency>
112+
<groupId>com.google.code.gson</groupId>
113+
<artifactId>gson</artifactId>
114+
<version>${gson.version}</version>
115+
</dependency>
116+
117+
<!-- HTTP CLIENT
118+
-->
119+
<dependency>
120+
<groupId>org.apache.httpcomponents</groupId>
121+
<artifactId>httpclient</artifactId>
122+
<version>${http.version}</version>
123+
</dependency>
96124
</dependencies>
97125

98126
<build>

works-core/src/main/java/de/kp/works/core/BaseConfig.java

+3-6
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,7 @@ public class BaseConfig extends PluginConfig {
4545
public String referenceName;
4646

4747
public Map<String, Object> getParamsAsMap() {
48-
49-
Map<String, Object> params = new HashMap<>();
50-
return params;
51-
48+
return new HashMap<>();
5249
}
5350

5451
public String getParamsAsJSON() {
@@ -69,13 +66,13 @@ public double[] getDataSplits(String dataSplit) {
6966
splits.add(x);
7067
splits.add(y);
7168

72-
Double[] array = splits.toArray(new Double[splits.size()]);
69+
Double[] array = splits.toArray(new Double[0]);
7370
return Stream.of(array).mapToDouble(Double::doubleValue).toArray();
7471

7572
}
7673

7774
public Boolean toBoolean(String value) {
78-
return value.equals("true") ? true : false;
75+
return value.equals("true");
7976
}
8077

8178
public void validate() {

works-core/src/main/java/de/kp/works/core/chart/TimeChart.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
import de.kp.works.core.Names;
4040
import de.kp.works.core.Params;
4141
import de.kp.works.core.SchemaUtil;
42-
import de.kp.works.core.ml.sampling.LTTBuckets;
42+
import de.kp.works.core.recording.sampling.LTTBuckets;
4343
/*
4444
* This class is intended to be used as a base sink for plotting purposes;
4545
* it writes records to a Table with one record field mapping to the Table

works-core/src/main/java/de/kp/works/core/classifier/ClassifierSink.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
import io.cdap.cdap.etl.api.batch.SparkPluginContext;
3333
import de.kp.works.core.BaseSink;
3434
import de.kp.works.core.SessionHelper;
35-
import de.kp.works.core.ml.SparkMLManager;
35+
import de.kp.works.core.recording.SparkMLManager;
3636

3737
public class ClassifierSink extends BaseSink {
3838

works-core/src/main/java/de/kp/works/core/cluster/ClusterSink.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import io.cdap.cdap.etl.api.batch.SparkPluginContext;
3232
import de.kp.works.core.BaseSink;
3333
import de.kp.works.core.SessionHelper;
34-
import de.kp.works.core.ml.SparkMLManager;
34+
import de.kp.works.core.recording.SparkMLManager;
3535

3636
public class ClusterSink extends BaseSink {
3737

works-core/src/main/java/de/kp/works/core/cluster/PredictorConfig.java

+23-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,22 @@
11
package de.kp.works.core.cluster;
2+
/*
3+
* Copyright (c) 2019 - 2021 Dr. Krusche & Partner PartG. All rights reserved.
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
6+
* use this file except in compliance with the License. You may obtain a copy of
7+
* the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13+
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14+
* License for the specific language governing permissions and limitations under
15+
* the License.
16+
*
17+
* @author Stefan Krusche, Dr. Krusche & Partner PartG
18+
*
19+
*/
220

321
import com.google.common.base.Strings;
422

@@ -45,16 +63,18 @@ public void validate() {
4563
String.format("[%s] The reference name must not be empty.", this.getClass().getName()));
4664
}
4765

48-
/** MODEL & COLUMNS **/
66+
/* MODEL & COLUMNS */
4967
if (Strings.isNullOrEmpty(modelName)) {
5068
throw new IllegalArgumentException(
5169
String.format("[%s] The model name must not be empty.", this.getClass().getName()));
5270
}
71+
5372
if (Strings.isNullOrEmpty(featuresCol)) {
5473
throw new IllegalArgumentException(
5574
String.format("[%s] The name of the field that contains the feature vector must not be empty.",
5675
this.getClass().getName()));
5776
}
77+
5878
if (Strings.isNullOrEmpty(predictionCol)) {
5979
throw new IllegalArgumentException(String.format(
6080
"[%s] The name of the field that contains the predicted label value must not be empty.",
@@ -65,7 +85,7 @@ public void validate() {
6585

6686
public void validateSchema(Schema inputSchema) {
6787

68-
/** FEATURES COLUMN **/
88+
/* FEATURES COLUMN */
6989

7090
Schema.Field featuresField = inputSchema.getField(featuresCol);
7191
if (featuresField == null) {
@@ -74,7 +94,7 @@ public void validateSchema(Schema inputSchema) {
7494
this.getClass().getName()));
7595
}
7696

77-
/** FEATURES COLUMN **/
97+
/**FEATURES COLUMN */
7898
SchemaUtil.isArrayOfNumeric(inputSchema, featuresCol);
7999

80100
}

works-core/src/main/java/de/kp/works/core/feature/FeatureSink.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import io.cdap.cdap.etl.api.batch.SparkPluginContext;
3232
import de.kp.works.core.BaseSink;
3333
import de.kp.works.core.SessionHelper;
34-
import de.kp.works.core.ml.SparkMLManager;
34+
import de.kp.works.core.recording.SparkMLManager;
3535

3636
public class FeatureSink extends BaseSink {
3737

works-core/src/main/java/de/kp/works/core/recommender/RecommenderSink.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import io.cdap.cdap.etl.api.batch.SparkPluginContext;
3232
import de.kp.works.core.BaseSink;
3333
import de.kp.works.core.SessionHelper;
34-
import de.kp.works.core.ml.SparkMLManager;
34+
import de.kp.works.core.recording.SparkMLManager;
3535

3636
public class RecommenderSink extends BaseSink {
3737

works-core/src/main/java/de/kp/works/core/ml/AbstractRecorder.java renamed to works-core/src/main/java/de/kp/works/core/recording/AbstractRecorder.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package de.kp.works.core.ml;
1+
package de.kp.works.core.recording;
22
/*
33
* Copyright (c) 2019 - 2021 Dr. Krusche & Partner PartG. All rights reserved.
44
*

works-core/src/main/java/de/kp/works/core/ml/SparkMLManager.java renamed to works-core/src/main/java/de/kp/works/core/recording/SparkMLManager.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package de.kp.works.core.ml;
1+
package de.kp.works.core.recording;
22
/*
33
* Copyright (c) 2019 Dr. Krusche & Partner PartG. All rights reserved.
44
*
@@ -34,8 +34,8 @@
3434
* - clustering
3535
* - recommendation
3636
* - regression
37-
* - textanalysis
38-
* - timeseries
37+
* - text analysis
38+
* - time series
3939
*
4040
*/
4141
public class SparkMLManager {

works-core/src/main/java/de/kp/works/core/ml/TextRecorder.java renamed to works-core/src/main/java/de/kp/works/core/recording/TextRecorder.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package de.kp.works.core.ml;
1+
package de.kp.works.core.recording;
22
/*
33
* Copyright (c) 2019 Dr. Krusche & Partner PartG. All rights reserved.
44
*

works-core/src/main/java/de/kp/works/core/ml/TimeRecorder.java renamed to works-core/src/main/java/de/kp/works/core/recording/TimeRecorder.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package de.kp.works.core.ml;
1+
package de.kp.works.core.recording;
22
/*
33
* Copyright (c) 2019 Dr. Krusche & Partner PartG. All rights reserved.
44
*

works-core/src/main/java/de/kp/works/core/ml/classification/ClassifierRecorder.java renamed to works-core/src/main/java/de/kp/works/core/recording/classification/ClassifierRecorder.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package de.kp.works.core.ml.classification;
1+
package de.kp.works.core.recording.classification;
22
/*
33
* Copyright (c) 2019 - 2021 Dr. Krusche & Partner PartG. All rights reserved.
44
*
@@ -19,8 +19,8 @@
1919
import com.google.gson.Gson;
2020
import com.google.gson.reflect.TypeToken;
2121

22-
import de.kp.works.core.ml.AbstractRecorder;
23-
import de.kp.works.core.ml.SparkMLManager;
22+
import de.kp.works.core.recording.AbstractRecorder;
23+
import de.kp.works.core.recording.SparkMLManager;
2424
import io.cdap.cdap.api.common.Bytes;
2525
import io.cdap.cdap.api.dataset.lib.FileSet;
2626
import io.cdap.cdap.api.dataset.table.Put;

works-core/src/main/java/de/kp/works/core/ml/classification/DTCRecorder.java renamed to works-core/src/main/java/de/kp/works/core/recording/classification/DTCRecorder.java

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package de.kp.works.core.ml.classification;
1+
package de.kp.works.core.recording.classification;
22

33
/*
44
* Copyright (c) 2019 - 2021 Dr. Krusche & Partner PartG. All rights reserved.
@@ -27,8 +27,7 @@
2727
import io.cdap.cdap.api.dataset.table.Table;
2828
import io.cdap.cdap.etl.api.batch.SparkExecutionPluginContext;
2929
import de.kp.works.core.Algorithms;
30-
import de.kp.works.core.ml.classification.ClassifierRecorder;
31-
import de.kp.works.core.ml.SparkMLManager;
30+
import de.kp.works.core.recording.SparkMLManager;
3231

3332
public class DTCRecorder extends ClassifierRecorder {
3433

works-core/src/main/java/de/kp/works/core/ml/classification/GBCRecorder.java renamed to works-core/src/main/java/de/kp/works/core/recording/classification/GBCRecorder.java

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package de.kp.works.core.ml.classification;
1+
package de.kp.works.core.recording.classification;
22

33
/*
44
* Copyright (c) 2019 - 2021 Dr. Krusche & Partner PartG. All rights reserved.
@@ -27,8 +27,7 @@
2727
import io.cdap.cdap.api.dataset.table.Table;
2828
import io.cdap.cdap.etl.api.batch.SparkExecutionPluginContext;
2929
import de.kp.works.core.Algorithms;
30-
import de.kp.works.core.ml.classification.ClassifierRecorder;
31-
import de.kp.works.core.ml.SparkMLManager;
30+
import de.kp.works.core.recording.SparkMLManager;
3231

3332
public class GBCRecorder extends ClassifierRecorder {
3433

works-core/src/main/java/de/kp/works/core/ml/classification/LRRecorder.java renamed to works-core/src/main/java/de/kp/works/core/recording/classification/LRRecorder.java

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package de.kp.works.core.ml.classification;
1+
package de.kp.works.core.recording.classification;
22

33
/*
44
* Copyright (c) 2019 - 2021 Dr. Krusche & Partner PartG. All rights reserved.
@@ -27,8 +27,7 @@
2727
import io.cdap.cdap.api.dataset.table.Table;
2828
import io.cdap.cdap.etl.api.batch.SparkExecutionPluginContext;
2929
import de.kp.works.core.Algorithms;
30-
import de.kp.works.core.ml.classification.ClassifierRecorder;
31-
import de.kp.works.core.ml.SparkMLManager;
30+
import de.kp.works.core.recording.SparkMLManager;
3231

3332
public class LRRecorder extends ClassifierRecorder {
3433

works-core/src/main/java/de/kp/works/core/ml/classification/MLPRecorder.java renamed to works-core/src/main/java/de/kp/works/core/recording/classification/MLPRecorder.java

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package de.kp.works.core.ml.classification;
1+
package de.kp.works.core.recording.classification;
22

33
/*
44
* Copyright (c) 2019 - 2021 Dr. Krusche & Partner PartG. All rights reserved.
@@ -27,8 +27,7 @@
2727
import io.cdap.cdap.api.dataset.table.Table;
2828
import io.cdap.cdap.etl.api.batch.SparkExecutionPluginContext;
2929
import de.kp.works.core.Algorithms;
30-
import de.kp.works.core.ml.classification.ClassifierRecorder;
31-
import de.kp.works.core.ml.SparkMLManager;
30+
import de.kp.works.core.recording.SparkMLManager;
3231

3332
public class MLPRecorder extends ClassifierRecorder {
3433

0 commit comments

Comments
 (0)