-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathExcelLoaderSpec.scala
More file actions
112 lines (83 loc) · 3.92 KB
/
ExcelLoaderSpec.scala
File metadata and controls
112 lines (83 loc) · 3.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
package bio.ferlab.datalake.spark3.loader
import bio.ferlab.datalake.commons.config.Format.EXCEL
import bio.ferlab.datalake.spark3.testutils.AirportInput
import bio.ferlab.datalake.testutils.SparkSpec
import org.apache.spark.sql.DataFrame
class ExcelLoaderSpec extends SparkSpec {
import spark.implicits._
val folderPath: String = getClass.getClassLoader.getResource("raw/landing/").getPath
val outputLocation: String = "output/airports.xlsx"
val expected: Seq[AirportInput] = Seq(
AirportInput("1", "YYC", "Calgary Int airport"),
AirportInput("2", "YUL", "Montreal Int airport")
)
val expectedUpdate: Seq[AirportInput] = Seq(
AirportInput("3", "YVR", "Vancouver Int airport")
)
val initialDF: DataFrame = expected.toDF()
private def withInitialDfInFolder(rootPath: String)(testCode: String => Any): Unit = {
val dfPath: String = rootPath + outputLocation
ExcelLoader.writeOnce(dfPath, "", "", initialDF, Nil, EXCEL.sparkFormat, Map("header" -> "true"))
testCode(dfPath)
}
private def withUpdatedDfInFolder(updates: DataFrame, path: String)(testCode: => Any): Unit = {
ExcelLoader.insert(path, "", "", updates, Nil, EXCEL.sparkFormat, Map("header" -> "true"))
testCode
}
"read" should "read xlsx file as a DataFrame" in {
val fileLocation = folderPath + "airports.xlsx"
val result = ExcelLoader.read(fileLocation, EXCEL.sparkFormat, Map("header" -> "true"), None, None)
result
.as[AirportInput]
.collect() should contain theSameElementsAs expected
}
it should "read xls file as a DataFrame" in {
val fileLocation = folderPath + "airports.xls"
val result = ExcelLoader.read(fileLocation, EXCEL.sparkFormat, Map("header" -> "true"), None, None)
result
.as[AirportInput]
.collect() should contain theSameElementsAs expected
}
it should "throw an exception when the header option is missing" in {
val fileLocation: String = folderPath + "airports.xlsx"
an[IllegalArgumentException] should be thrownBy {
ExcelLoader.read(fileLocation, EXCEL.sparkFormat, readOptions = Map.empty, databaseName = None, tableName = None)
}
}
it should "read folder containing multiple Excel files as a DataFrame" in withOutputFolder("root") { root =>
withInitialDfInFolder(root) { folderLocation =>
withUpdatedDfInFolder(expectedUpdate.toDF(), folderLocation) {
val result = ExcelLoader.read(folderLocation, EXCEL.sparkFormat, Map("header" -> "true"), None, None)
result
.as[AirportInput]
.collect() should contain theSameElementsAs (expected ++ expectedUpdate)
}
}
}
"writeOnce" should "write a dataframe to a file" in withOutputFolder("root") { root =>
withInitialDfInFolder(root) { folderLocation =>
val result = ExcelLoader.read(folderLocation, EXCEL.sparkFormat, Map("header" -> "true"))
result.as[AirportInput].collect() should contain theSameElementsAs expected
}
}
it should "overwrite existing files when writing to the same folder" in withOutputFolder("root") { root =>
withInitialDfInFolder(root) { folderLocation =>
//Overwriting the same location
ExcelLoader.writeOnce(folderLocation, "", "", expectedUpdate.toDF(), Nil, EXCEL.sparkFormat, Map("header" -> "true"))
val result = ExcelLoader.read(folderLocation, EXCEL.sparkFormat, Map("header" -> "true"), None, None)
result
.as[AirportInput]
.collect() should contain theSameElementsAs expectedUpdate
}
}
"insert" should "append a dataframe to an existing file" in withOutputFolder("root") { root =>
withInitialDfInFolder(root) { folderLocation =>
withUpdatedDfInFolder(expectedUpdate.toDF(), folderLocation) {
val result = ExcelLoader.read(folderLocation, EXCEL.sparkFormat, Map("header" -> "true"), None, None)
result
.as[AirportInput]
.collect() should contain theSameElementsAs (expected ++ expectedUpdate)
}
}
}
}