Skip to content

Commit fa82f07

Browse files
authored
Merge pull request #63 from civitaspo/develop
v0.2.3
2 parents 6ac81c0 + b920f48 commit fa82f07

File tree

8 files changed

+133
-11
lines changed

8 files changed

+133
-11
lines changed

Diff for: CHANGELOG.md

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
0.2.3 (2019-07-19)
2+
==================
3+
* [New Feature] Add `athena.drop_table_multi>` operator
4+
* [Enhancement] Expose the real error message and state when the query execution is failed.
5+
16
0.2.2 (2019-07-19)
27
==================
38

Diff for: README.md

+14-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ _export:
1515
repositories:
1616
- https://jitpack.io
1717
dependencies:
18-
- pro.civitaspo:digdag-operator-athena:0.2.2
18+
- pro.civitaspo:digdag-operator-athena:0.2.3
1919
athena:
2020
auth_method: profile
2121

@@ -228,7 +228,7 @@ Nothing
228228
## Configuration for `athena.drop_table>` operator
229229

230230
- **database**: The name of the database. (string, required)
231-
- **table**: The name of the partitioned table. (string, required)
231+
- **table**: The name of the table. (string, required)
232232
- **with_location**: Drop the partition with removing objects on S3 (boolean, default: `false`)
233233
- **ignore_if_not_exist**: Ignore if the partition does not exist. (boolean, default: `true`)
234234
- **catalog_id**: glue data catalog id if you use a catalog different from account/region default catalog. (string, optional)
@@ -237,6 +237,18 @@ Nothing
237237

238238
Nothing
239239

240+
## Configuration for `athena.drop_table_multi>` operator
241+
242+
- **database**: The name of the database. (string, required)
243+
- **regexp**: The regular expression for table names to be dropped. (string, required)
244+
- **with_location**: Drop the partition with removing objects on S3 (boolean, default: `false`)
245+
- **limit**: Max number of tables that can be dropped. (integer, optional)
246+
- **catalog_id**: glue data catalog id if you use a catalog different from account/region default catalog. (string, optional)
247+
248+
### Output Parameters
249+
250+
Nothing
251+
240252
# Development
241253

242254
## Run an Example

Diff for: build.gradle

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ plugins {
55
}
66

77
group = 'pro.civitaspo'
8-
version = '0.2.2'
8+
version = '0.2.3'
99

1010
def digdagVersion = '0.9.37'
1111
def awsSdkVersion = "1.11.587"

Diff for: example/example.dig

+25-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ _export:
44
- file://${repos}
55
# - https://jitpack.io
66
dependencies:
7-
- pro.civitaspo:digdag-operator-athena:0.2.2
7+
- pro.civitaspo:digdag-operator-athena:0.2.3
88
athena:
99
auth_method: profile
1010
value: 5
@@ -22,7 +22,7 @@ _export:
2222
athena.ctas>: template.sql
2323
database: ${database}
2424
table: hoge
25-
location: ${output}
25+
location: ${output}/
2626

2727
+step5:
2828
echo>: ${athena}
@@ -90,3 +90,26 @@ _export:
9090
b: "9"
9191
c: "10"
9292
save_mode: overwrite
93+
94+
+step14:
95+
loop>: 10
96+
_parallel: true
97+
_do:
98+
athena.ctas>: template.sql
99+
database: ${database}
100+
table: hoge_${i}
101+
location: ${output}/hoge_${i}/
102+
save_mode: overwrite
103+
104+
+step15:
105+
athena.drop_table_multi>:
106+
database: ${database}
107+
regexp: 'hoge_\d+'
108+
with_location: true
109+
limit: 3
110+
111+
+step16:
112+
athena.drop_table_multi>:
113+
database: ${database}
114+
regexp: 'hoge_\d+'
115+
with_location: true

Diff for: src/main/scala/pro/civitaspo/digdag/plugin/athena/AthenaPlugin.scala

+3-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import pro.civitaspo.digdag.plugin.athena.apas.{AthenaApasOperator, AthenaDiffSc
1212
import pro.civitaspo.digdag.plugin.athena.ctas.AthenaCtasOperator
1313
import pro.civitaspo.digdag.plugin.athena.drop_partition.AthenaDropPartitionOperator
1414
import pro.civitaspo.digdag.plugin.athena.drop_table.AthenaDropTableOperator
15+
import pro.civitaspo.digdag.plugin.athena.drop_table_multi.AthenaDropTableMultiOperator
1516
import pro.civitaspo.digdag.plugin.athena.preview.AthenaPreviewOperator
1617
import pro.civitaspo.digdag.plugin.athena.query.AthenaQueryOperator
1718

@@ -36,7 +37,8 @@ object AthenaPlugin
3637
operatorFactory("athena.ctas", classOf[AthenaCtasOperator]),
3738
operatorFactory("athena.query", classOf[AthenaQueryOperator]),
3839
operatorFactory("athena.preview", classOf[AthenaPreviewOperator]),
39-
operatorFactory("athena.drop_table", classOf[AthenaDropTableOperator])
40+
operatorFactory("athena.drop_table", classOf[AthenaDropTableOperator]),
41+
operatorFactory("athena.drop_table_multi", classOf[AthenaDropTableMultiOperator])
4042
)
4143
}
4244

Diff for: src/main/scala/pro/civitaspo/digdag/plugin/athena/aws/athena/Athena.scala

+14-4
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,20 @@ case class Athena(aws: Aws)
9797
outputLocation = outputLocation,
9898
requestToken = requestToken)
9999

100-
waitQueryExecution(executionId = executionId,
101-
successStates = successStates,
102-
failureStates = failureStates,
103-
timeout = timeout)
100+
val t = Try {
101+
waitQueryExecution(executionId = executionId,
102+
successStates = successStates,
103+
failureStates = failureStates,
104+
timeout = timeout)
105+
}
106+
t match {
107+
case Success(_) => logger.info(s"Success to execute the query: $executionId")
108+
case Failure(exception) =>
109+
logger.error(exception.getMessage, exception)
110+
val qe = getQueryExecution(executionId = executionId)
111+
throw new IllegalStateException(s"Failed the query execution: ${qe.withQuery(null).toString}", exception)
112+
}
113+
104114

105115
getQueryExecution(executionId = executionId)
106116
}

Diff for: src/main/scala/pro/civitaspo/digdag/plugin/athena/aws/glue/catalog/TableCatalog.scala

+29-1
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
package pro.civitaspo.digdag.plugin.athena.aws.glue.catalog
22

33

4-
import com.amazonaws.services.glue.model.{DeleteTableRequest, GetTableRequest, Table}
4+
import com.amazonaws.services.glue.model.{DeleteTableRequest, GetTableRequest, GetTablesRequest, Table}
55
import pro.civitaspo.digdag.plugin.athena.aws.glue.Glue
66

7+
import scala.jdk.CollectionConverters._
78
import scala.util.Try
89

910

@@ -45,4 +46,31 @@ case class TableCatalog(glue: Glue)
4546
glue.withGlue(_.deleteTable(req))
4647
}
4748

49+
def list(catalogIdOption: Option[String],
50+
database: String,
51+
expression: Option[String] = None,
52+
limit: Option[Int] = None): Seq[Table] =
53+
{
54+
val req = new GetTablesRequest()
55+
catalogIdOption.foreach(req.setCatalogId)
56+
req.setDatabaseName(database)
57+
expression.foreach(req.setExpression)
58+
limit.foreach(l => req.setMaxResults(l))
59+
60+
def recursiveGetTables(nextToken: Option[String] = None): Seq[Table] =
61+
{
62+
nextToken.foreach(req.setNextToken)
63+
val results = glue.withGlue(_.getTables(req))
64+
val tables = results.getTableList.asScala.toSeq
65+
limit.foreach { i =>
66+
if (tables.length >= i) return tables.slice(0, i)
67+
}
68+
Option(results.getNextToken) match {
69+
case Some(nt) => tables ++ recursiveGetTables(nextToken = Option(nt))
70+
case None => tables
71+
}
72+
}
73+
74+
recursiveGetTables()
75+
}
4876
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package pro.civitaspo.digdag.plugin.athena.drop_table_multi
2+
3+
4+
import io.digdag.client.config.Config
5+
import io.digdag.spi.{OperatorContext, TaskResult, TemplateEngine}
6+
import pro.civitaspo.digdag.plugin.athena.AbstractAthenaOperator
7+
8+
9+
class AthenaDropTableMultiOperator(operatorName: String,
10+
context: OperatorContext,
11+
systemConfig: Config,
12+
templateEngine: TemplateEngine)
13+
extends AbstractAthenaOperator(operatorName, context, systemConfig, templateEngine)
14+
{
15+
val database: String = params.get("database", classOf[String])
16+
val regexp: String = params.getOptional("regexp", classOf[String]).orNull()
17+
val limit: Option[Int] = Option(params.getOptional("limit", classOf[Int]).orNull())
18+
val withLocation: Boolean = params.get("with_location", classOf[Boolean], false)
19+
val catalogId: Option[String] = Option(params.getOptional("catalog_id", classOf[String]).orNull())
20+
21+
override def runTask(): TaskResult =
22+
{
23+
logger.info(s"Drop tables matched by the expression: /$regexp/ in $database")
24+
aws.glue.table.list(catalogId, database, Option(regexp), limit).foreach { t =>
25+
if (withLocation) {
26+
val location: String = {
27+
val l = t.getStorageDescriptor.getLocation
28+
if (l.endsWith("/")) l
29+
else l + "/"
30+
}
31+
if (aws.s3.hasObjects(location)) {
32+
logger.info(s"Delete objects because the location $location has objects.")
33+
aws.s3.rm_r(location).foreach(uri => logger.info(s"Deleted: ${uri.toString}"))
34+
}
35+
}
36+
logger.info(s"Drop the table '$database.${t.getName}'")
37+
aws.glue.table.delete(catalogId, database, t.getName)
38+
}
39+
TaskResult.empty(cf)
40+
}
41+
42+
}

0 commit comments

Comments
 (0)