From 2aa9e735399290c908fb36bba8cb54ad8aec66c0 Mon Sep 17 00:00:00 2001
From: Robert <78926291+Asciax@users.noreply.github.com>
Date: Wed, 25 Feb 2026 17:12:59 -0500
Subject: [PATCH 1/6] feat: UNIC-1977 Implement write and insert for
 ExcelLoader

---
 build.sbt                                     |  2 +-
 .../datalake/spark3/loader/ExcelLoader.scala  | 26 +++++++++++--
 .../datalake/spark3/loader/LoadResolver.scala |  6 +++
 .../spark3/loader/ExcelLoaderSpec.scala       | 38 ++++++++++++++++++-
 4 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/build.sbt b/build.sbt
index 7a3324ae..7d65b364 100644
--- a/build.sbt
+++ b/build.sbt
@@ -63,7 +63,7 @@ lazy val `datalake-spark3` = (project in file("datalake-spark3"))
       "com.microsoft.sqlserver" % "mssql-jdbc" % "8.4.1.jre11" % Provided,
       "com.microsoft.aad" % "adal4j" % "0.0.2" % Provided,
       "com.microsoft.azure" % "spark-mssql-connector_2.12" % "1.2.0" % Provided,
-      "com.crealytics" %% "spark-excel" % "3.5.0_0.20.3" % Provided,
+      "dev.mauch" %% "spark-excel" % "3.5.5_0.30.2", // % Provided,
       //Use by ElasticsearchClient
       "com.softwaremill.sttp.client3" %% "core" % "3.9.2",
       "com.softwaremill.sttp.client3" %% "json4s" % "3.9.2" exclude("org.json4s", "json4s-core_2.12"), //Exclusion because json4s is used in spark
diff --git a/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala b/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala
index bd703149..6c63a64d 100644
--- a/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala
+++ b/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala
@@ -1,6 +1,6 @@
 package bio.ferlab.datalake.spark3.loader
 
-import org.apache.spark.sql.{DataFrame, SparkSession}
+import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
 
 import java.time.LocalDate
 
@@ -20,6 +20,22 @@ object ExcelLoader extends Loader {
       .load(location)
   }
 
+  def write(df: DataFrame,
+            location: String,
+            databaseName: String,
+            tableName: String,
+            partitioning: List[String],
+            format: String,
+            options: Map[String, String],
+            mode: SaveMode): DataFrame = {
+    df.write
+      .options(options)
+      .format(format)
+      .mode(mode)
+      .save(location)
+    df
+  }
+
   override def overwritePartition(location: String,
                                   databaseName: String,
                                   tableName: String,
@@ -34,7 +50,9 @@ object ExcelLoader extends Loader {
                          df: DataFrame,
                          partitioning: List[String],
                          format: String,
-                         options: Map[String, String])(implicit spark: SparkSession): DataFrame = ???
+                         options: Map[String, String])(implicit spark: SparkSession): DataFrame = {
+    write(df, location, databaseName, tableName, partitioning, format, options, SaveMode.Overwrite)
+  }
 
   override def insert(location: String,
                       databaseName: String,
@@ -42,7 +60,9 @@ object ExcelLoader extends Loader {
                       updates: DataFrame,
                       partitioning: List[String],
                       format: String,
-                      options: Map[String, String])(implicit spark: SparkSession): DataFrame = ???
+                      options: Map[String, String])(implicit spark: SparkSession): DataFrame = {
+    write(updates, location, databaseName, tableName, partitioning, format, options, SaveMode.Append)
+  }
 
   override def upsert(location: String,
                       databaseName: String,
diff --git a/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/LoadResolver.scala b/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/LoadResolver.scala
index 9096db5d..62a5c977 100644
--- a/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/LoadResolver.scala
+++ b/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/LoadResolver.scala
@@ -50,6 +50,12 @@ object LoadResolver {
     case (ELASTICSEARCH, OverWrite) => (ds: DatasetConf, df: DataFrame) =>
       ElasticsearchLoader.writeOnce(ds.location, ds.table.map(_.database).getOrElse(""), ds.table.map(_.name).getOrElse(ds.location), df, ds.partitionby, ds.format.sparkFormat, ds.writeoptions)
 
+    case (EXCEL, OverWrite) => (ds: DatasetConf, df: DataFrame) =>
+      ExcelLoader.writeOnce(ds.location, ds.table.map(_.database).getOrElse(""), ds.table.map(_.name).getOrElse(ds.location), df, ds.partitionby, ds.format.sparkFormat, ds.writeoptions)
+
+    case (EXCEL, Insert) => (ds: DatasetConf, df: DataFrame) =>
+      ExcelLoader.insert(ds.location, ds.table.map(_.database).getOrElse(""), ds.table.map(_.name).getOrElse(ds.location), df, ds.partitionby, ds.format.sparkFormat, ds.writeoptions)
+
 
     //generic fallback behaviours
     case (f, OverWrite)   => (ds: DatasetConf, df: DataFrame) =>
diff --git a/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala b/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala
index 886a0740..a29f495e 100644
--- a/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala
+++ b/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala
@@ -3,16 +3,29 @@ package bio.ferlab.datalake.spark3.loader
 import bio.ferlab.datalake.commons.config.Format.EXCEL
 import bio.ferlab.datalake.spark3.testutils.AirportInput
 import bio.ferlab.datalake.testutils.SparkSpec
+import org.scalatest.BeforeAndAfterAll
 
-class ExcelLoaderSpec extends SparkSpec {
+import java.nio.file.{Files, Paths}
+
+class ExcelLoaderSpec extends SparkSpec with BeforeAndAfterAll{
 
   import spark.implicits._
 
   val folderPath: String = getClass.getClassLoader.getResource("raw/landing/").getPath
+  val outputLocation: String = folderPath + "output/airports.xlsx"
   val expected: Seq[AirportInput] = Seq(
     AirportInput("1", "YYC", "Calgary Int airport"),
     AirportInput("2", "YUL", "Montreal Int airport")
   )
+  val initialDF = expected.toDF()
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    val outputPath = Paths.get(outputLocation)
+    if (Files.exists(outputPath)) {
+      Files.delete(outputPath)
+    }
+  }
 
   "read" should "read xlsx file as a DataFrame" in {
     val fileLocation = folderPath + "airports.xlsx"
@@ -34,5 +47,28 @@ class ExcelLoaderSpec extends SparkSpec {
       .collect() should contain theSameElementsAs expected
   }
 
+  "writeOnce" should "write a dataframe to a file" in {
+    ExcelLoader.writeOnce(outputLocation, "", "", initialDF, Nil, EXCEL.sparkFormat, Map("header" -> "true"))
+
+    val result = ExcelLoader.read(outputLocation, EXCEL.sparkFormat, Map("header" -> "true"))
+
+    result.as[AirportInput].collect() should contain theSameElementsAs expected
+  }
+
+  "insert" should "append a dataframe to an existing file" in {
+    // Overwrite with initial data first
+    ExcelLoader.writeOnce(outputLocation, "", "", initialDF, Nil, EXCEL.sparkFormat, Map("header" -> "true"))
+
+    // Prepare new data and append it
+    val updates = Seq(AirportInput("3", "YVR", "Vancouver Int airport")).toDF()
+    val expectedDfValues = expected ++ Seq(AirportInput("3", "YVR", "Vancouver Int airport"))
+
+    ExcelLoader.insert(outputLocation, "", "", updates, Nil, EXCEL.sparkFormat, Map("header" -> "true"))
+
+    val result = ExcelLoader.read(outputLocation, EXCEL.sparkFormat, Map("header" -> "true"))
+
+    result.as[AirportInput].collect() should contain theSameElementsAs expectedDfValues
+  }
+
 
 }

From a0af8624677c870303967f6427593773778b7d07 Mon Sep 17 00:00:00 2001
From: Robert <78926291+Asciax@users.noreply.github.com>
Date: Thu, 26 Feb 2026 15:36:48 -0500
Subject: [PATCH 2/6] feat: Changed lib to provided, added Excel tests,
 required schema on write

---
 build.sbt                                     |  2 +-
 .../datalake/spark3/loader/ExcelLoader.scala  |  5 +-
 .../spark3/loader/ExcelLoaderSpec.scala       | 71 ++++++++++++++++---
 3 files changed, 68 insertions(+), 10 deletions(-)

diff --git a/build.sbt b/build.sbt
index 7d65b364..6926c40d 100644
--- a/build.sbt
+++ b/build.sbt
@@ -63,7 +63,7 @@ lazy val `datalake-spark3` = (project in file("datalake-spark3"))
       "com.microsoft.sqlserver" % "mssql-jdbc" % "8.4.1.jre11" % Provided,
       "com.microsoft.aad" % "adal4j" % "0.0.2" % Provided,
       "com.microsoft.azure" % "spark-mssql-connector_2.12" % "1.2.0" % Provided,
-      "dev.mauch" %% "spark-excel" % "3.5.5_0.30.2", // % Provided,
+      "dev.mauch" %% "spark-excel" % "3.5.5_0.30.2" % Provided,
       //Use by ElasticsearchClient
       "com.softwaremill.sttp.client3" %% "core" % "3.9.2",
       "com.softwaremill.sttp.client3" %% "json4s" % "3.9.2" exclude("org.json4s", "json4s-core_2.12"), //Exclusion because json4s is used in spark
diff --git a/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala b/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala
index 6c63a64d..907910a2 100644
--- a/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala
+++ b/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala
@@ -28,6 +28,9 @@ object ExcelLoader extends Loader {
             format: String,
             options: Map[String, String],
             mode: SaveMode): DataFrame = {
+    // Excel format requires the schema to be non-empty, does not support empty schema dataframe writes
+    require(df.schema.nonEmpty, "DataFrame must have a valid schema with at least one column.")
+
     df.write
       .options(options)
       .format(format)
@@ -72,7 +75,7 @@ object ExcelLoader extends Loader {
                       partitioning: List[String],
                       format: String,
                       options: Map[String, String])(implicit spark: SparkSession): DataFrame = ???
-  
+
   override def scd1(location: String,
                     databaseName: String,
                     tableName: String,
diff --git a/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala b/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala
index a29f495e..3e7774d3 100644
--- a/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala
+++ b/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala
@@ -3,11 +3,12 @@ package bio.ferlab.datalake.spark3.loader
 import bio.ferlab.datalake.commons.config.Format.EXCEL
 import bio.ferlab.datalake.spark3.testutils.AirportInput
 import bio.ferlab.datalake.testutils.SparkSpec
-import org.scalatest.BeforeAndAfterAll
+import org.apache.spark.sql.DataFrame
+import org.scalatest.BeforeAndAfterEach
 
 import java.nio.file.{Files, Paths}
 
-class ExcelLoaderSpec extends SparkSpec with BeforeAndAfterAll{
+class ExcelLoaderSpec extends SparkSpec with BeforeAndAfterEach {
 
   import spark.implicits._
 
@@ -17,14 +18,39 @@ class ExcelLoaderSpec extends SparkSpec with BeforeAndAfterAll{
     AirportInput("1", "YYC", "Calgary Int airport"),
     AirportInput("2", "YUL", "Montreal Int airport")
   )
-  val initialDF = expected.toDF()
+  val simpleExpectedUpdate: Seq[AirportInput] = Seq(
+    AirportInput("3", "YVR", "Vancouver Int airport")
+  )
+
+  val initialDF: DataFrame = expected.toDF()
 
-  override def beforeAll(): Unit = {
-    super.beforeAll()
+  override def afterEach(): Unit = {
+    super.afterEach()
     val outputPath = Paths.get(outputLocation)
     if (Files.exists(outputPath)) {
-      Files.delete(outputPath)
+      cleanUpFilesRecursively(outputPath)
+    }
+  }
+
+  /**
+   * Recursively deletes files and directories at the given path. Necessary because ExcelLoader API v2
+   * may create multiple excel partitions when writing to a folder.
+   * */
+  private def cleanUpFilesRecursively(path: java.nio.file.Path): Unit = {
+    if (Files.isDirectory(path)) {
+      Files.list(path).forEach(cleanUpFilesRecursively)
     }
+    Files.deleteIfExists(path)
+  }
+
+  private def withInitialDfInFolder(testCode: => Any): Unit = {
+    ExcelLoader.writeOnce(outputLocation, "", "", initialDF, Nil, EXCEL.sparkFormat, Map("header" -> "true"))
+    testCode
+  }
+
+  private def withUpdatedDfInFolder(updates: DataFrame, testCode: String => Any): Unit = {
+    ExcelLoader.insert(outputLocation, "", "", updates, Nil, EXCEL.sparkFormat, Map("header" -> "true"))
+    testCode(outputLocation)
   }
 
   "read" should "read xlsx file as a DataFrame" in {
@@ -47,6 +73,25 @@ class ExcelLoaderSpec extends SparkSpec with BeforeAndAfterAll{
       .collect() should contain theSameElementsAs expected
   }
 
+  it should "throw an exception when the header option is missing" in {
+    val fileLocation: String = folderPath + "airports.xlsx"
+
+    an[IllegalArgumentException] should be thrownBy {
+      ExcelLoader.read(fileLocation, EXCEL.sparkFormat, Map.empty, None, None)
+    }
+  }
+
+  it should "read folder containing multiple Excel files as a DataFrame" in withInitialDfInFolder {
+    withUpdatedDfInFolder(simpleExpectedUpdate.toDF(), { folderLocation =>
+
+      val result = ExcelLoader.read(folderLocation, EXCEL.sparkFormat, Map("header" -> "true"), None, None)
+
+      result
+        .as[AirportInput]
+        .collect() should contain theSameElementsAs (expected ++ simpleExpectedUpdate)
+    })
+  }
+
   "writeOnce" should "write a dataframe to a file" in {
     ExcelLoader.writeOnce(outputLocation, "", "", initialDF, Nil, EXCEL.sparkFormat, Map("header" -> "true"))
 
@@ -55,7 +100,18 @@ class ExcelLoaderSpec extends SparkSpec with BeforeAndAfterAll{
     result.as[AirportInput].collect() should contain theSameElementsAs expected
   }
 
-  "insert" should "append a dataframe to an existing file" in {
+  "insert" should "append a dataframe to an existing file" in withInitialDfInFolder {
+    withUpdatedDfInFolder(simpleExpectedUpdate.toDF(), {
+      folderLocation =>
+        val result = ExcelLoader.read(folderLocation, EXCEL.sparkFormat, Map("header" -> "true"), None, None)
+
+        result
+          .as[AirportInput]
+          .collect() should contain theSameElementsAs (expected ++ simpleExpectedUpdate)
+    })
+  }
+
+  "insert2" should "append a dataframe to an existing file" in {
     // Overwrite with initial data first
     ExcelLoader.writeOnce(outputLocation, "", "", initialDF, Nil, EXCEL.sparkFormat, Map("header" -> "true"))
 
@@ -70,5 +126,4 @@ class ExcelLoaderSpec extends SparkSpec with BeforeAndAfterAll{
     result.as[AirportInput].collect() should contain theSameElementsAs expectedDfValues
   }
 
-
 }

From 33dfdc2fce5190cd4ab9f87092c4a6238608bb11 Mon Sep 17 00:00:00 2001
From: Robert <78926291+Asciax@users.noreply.github.com>
Date: Thu, 26 Feb 2026 15:55:21 -0500
Subject: [PATCH 3/6] refactor: Correct docstrings, improve test spec, require
 header since required by spark-excel

---
 .../datalake/spark3/loader/ExcelLoader.scala  |  1 +
 .../spark3/loader/ExcelLoaderSpec.scala       | 28 ++++++++-----------
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala b/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala
index 907910a2..e05a0bd6 100644
--- a/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala
+++ b/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala
@@ -30,6 +30,7 @@ object ExcelLoader extends Loader {
             mode: SaveMode): DataFrame = {
     // Excel format requires the schema to be non-empty, does not support empty schema dataframe writes
     require(df.schema.nonEmpty, "DataFrame must have a valid schema with at least one column.")
+    require(options.isDefinedAt("header"), "Expecting [header] to be defined in readOptions.")
 
     df.write
       .options(options)
diff --git a/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala b/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala
index 3e7774d3..03daf496 100644
--- a/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala
+++ b/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala
@@ -33,7 +33,7 @@ class ExcelLoaderSpec extends SparkSpec with BeforeAndAfterEach {
   }
 
   /**
-   * Recursively deletes files and directories at the given path. Necessary because ExcelLoader API v2
+   * Recursively deletes files and directories at the given path. Necessary because spark-excel format API v2
    * may create multiple excel partitions when writing to a folder.
    * */
   private def cleanUpFilesRecursively(path: java.nio.file.Path): Unit = {
@@ -100,6 +100,17 @@ class ExcelLoaderSpec extends SparkSpec with BeforeAndAfterEach {
     result.as[AirportInput].collect() should contain theSameElementsAs expected
   }
 
+  it should "overwrite existing files when writing to the same folder" in withInitialDfInFolder {
+    //Overwriting the same location
+    ExcelLoader.writeOnce(outputLocation, "", "", simpleExpectedUpdate.toDF(), Nil, EXCEL.sparkFormat, Map("header" -> "true"))
+
+    val result = ExcelLoader.read(outputLocation, EXCEL.sparkFormat, Map("header" -> "true"), None, None)
+
+    result
+      .as[AirportInput]
+      .collect() should contain theSameElementsAs simpleExpectedUpdate
+  }
+
   "insert" should "append a dataframe to an existing file" in withInitialDfInFolder {
     withUpdatedDfInFolder(simpleExpectedUpdate.toDF(), {
       folderLocation =>
@@ -111,19 +122,4 @@ class ExcelLoaderSpec extends SparkSpec with BeforeAndAfterEach {
     })
   }
 
-  "insert2" should "append a dataframe to an existing file" in {
-    // Overwrite with initial data first
-    ExcelLoader.writeOnce(outputLocation, "", "", initialDF, Nil, EXCEL.sparkFormat, Map("header" -> "true"))
-
-    // Prepare new data and append it
-    val updates = Seq(AirportInput("3", "YVR", "Vancouver Int airport")).toDF()
-    val expectedDfValues = expected ++ Seq(AirportInput("3", "YVR", "Vancouver Int airport"))
-
-    ExcelLoader.insert(outputLocation, "", "", updates, Nil, EXCEL.sparkFormat, Map("header" -> "true"))
-
-    val result = ExcelLoader.read(outputLocation, EXCEL.sparkFormat, Map("header" -> "true"))
-
-    result.as[AirportInput].collect() should contain theSameElementsAs expectedDfValues
-  }
-
 }

From 67bafa98adcec465c388e183d35448403d09e377 Mon Sep 17 00:00:00 2001
From: Robert <78926291+Asciax@users.noreply.github.com>
Date: Wed, 4 Mar 2026 15:49:07 -0500
Subject: [PATCH 4/6] refactor: Use , implement suggested changes

---
 .../datalake/spark3/loader/ExcelLoader.scala  |  9 +-
 .../spark3/loader/ExcelLoaderSpec.scala       | 95 +++++++++----------
 2 files changed, 47 insertions(+), 57 deletions(-)

diff --git a/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala b/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala
index e05a0bd6..7637bd1f 100644
--- a/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala
+++ b/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala
@@ -20,11 +20,8 @@ object ExcelLoader extends Loader {
       .load(location)
   }
 
-  def write(df: DataFrame,
+  private def write(df: DataFrame,
             location: String,
-            databaseName: String,
-            tableName: String,
-            partitioning: List[String],
             format: String,
             options: Map[String, String],
             mode: SaveMode): DataFrame = {
@@ -55,7 +52,7 @@ object ExcelLoader extends Loader {
                          partitioning: List[String],
                          format: String,
                          options: Map[String, String])(implicit spark: SparkSession): DataFrame = {
-    write(df, location, databaseName, tableName, partitioning, format, options, SaveMode.Overwrite)
+    write(df, location, format, options, SaveMode.Overwrite)
   }
 
   override def insert(location: String,
@@ -65,7 +62,7 @@ object ExcelLoader extends Loader {
                       partitioning: List[String],
                       format: String,
                       options: Map[String, String])(implicit spark: SparkSession): DataFrame = {
-    write(updates, location, databaseName, tableName, partitioning, format, options, SaveMode.Append)
+    write(updates, location, format, options, SaveMode.Append)
   }
 
   override def upsert(location: String,
diff --git a/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala b/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala
index 03daf496..4ceffbaf 100644
--- a/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala
+++ b/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala
@@ -8,51 +8,33 @@ import org.scalatest.BeforeAndAfterEach
 
 import java.nio.file.{Files, Paths}
 
-class ExcelLoaderSpec extends SparkSpec with BeforeAndAfterEach {
+class ExcelLoaderSpec extends SparkSpec {
 
   import spark.implicits._
 
   val folderPath: String = getClass.getClassLoader.getResource("raw/landing/").getPath
-  val outputLocation: String = folderPath + "output/airports.xlsx"
+  val outputLocation: String = "output/airports.xlsx"
   val expected: Seq[AirportInput] = Seq(
     AirportInput("1", "YYC", "Calgary Int airport"),
     AirportInput("2", "YUL", "Montreal Int airport")
   )
-  val simpleExpectedUpdate: Seq[AirportInput] = Seq(
+  val expectedUpdate: Seq[AirportInput] = Seq(
     AirportInput("3", "YVR", "Vancouver Int airport")
   )
 
   val initialDF: DataFrame = expected.toDF()
 
-  override def afterEach(): Unit = {
-    super.afterEach()
-    val outputPath = Paths.get(outputLocation)
-    if (Files.exists(outputPath)) {
-      cleanUpFilesRecursively(outputPath)
-    }
-  }
-
-  /**
-   * Recursively deletes files and directories at the given path. Necessary because spark-excel format API v2
-   * may create multiple excel partitions when writing to a folder.
-   * */
-  private def cleanUpFilesRecursively(path: java.nio.file.Path): Unit = {
-    if (Files.isDirectory(path)) {
-      Files.list(path).forEach(cleanUpFilesRecursively)
-    }
-    Files.deleteIfExists(path)
+  private def withInitialDfInFolder(rootPath: String)(testCode: String => Any): Unit = {
+    val dfPath: String = rootPath + outputLocation
+    ExcelLoader.writeOnce(dfPath, "", "", initialDF, Nil, EXCEL.sparkFormat, Map("header" -> "true"))
+    testCode(dfPath)
   }
 
-  private def withInitialDfInFolder(testCode: => Any): Unit = {
-    ExcelLoader.writeOnce(outputLocation, "", "", initialDF, Nil, EXCEL.sparkFormat, Map("header" -> "true"))
+  private def withUpdatedDfInFolder(updates: DataFrame, path: String)(testCode: => Any): Unit = {
+    ExcelLoader.insert(path, "", "", updates, Nil, EXCEL.sparkFormat, Map("header" -> "true"))
     testCode
   }
 
-  private def withUpdatedDfInFolder(updates: DataFrame, testCode: String => Any): Unit = {
-    ExcelLoader.insert(outputLocation, "", "", updates, Nil, EXCEL.sparkFormat, Map("header" -> "true"))
-    testCode(outputLocation)
-  }
-
   "read" should "read xlsx file as a DataFrame" in {
     val fileLocation = folderPath + "airports.xlsx"
 
@@ -76,50 +58,61 @@ class ExcelLoaderSpec extends SparkSpec with BeforeAndAfterEach {
   it should "throw an exception when the header option is missing" in {
     val fileLocation: String = folderPath + "airports.xlsx"
 
+    val options = Map.empty[String, String]
+    val databaseName, tableName : Option[String] = None
+
     an[IllegalArgumentException] should be thrownBy {
-      ExcelLoader.read(fileLocation, EXCEL.sparkFormat, Map.empty, None, None)
+      ExcelLoader.read(fileLocation, EXCEL.sparkFormat, options, databaseName, tableName)
     }
   }
 
-  it should "read folder containing multiple Excel files as a DataFrame" in withInitialDfInFolder {
-    withUpdatedDfInFolder(simpleExpectedUpdate.toDF(), { folderLocation =>
+  it should "read folder containing multiple Excel files as a DataFrame" in withOutputFolder("root") { root =>
+    withInitialDfInFolder(root) { folderLocation =>
+      withUpdatedDfInFolder(expectedUpdate.toDF(), folderLocation) {
 
-      val result = ExcelLoader.read(folderLocation, EXCEL.sparkFormat, Map("header" -> "true"), None, None)
+        val result = ExcelLoader.read(folderLocation, EXCEL.sparkFormat, Map("header" -> "true"), None, None)
 
-      result
-        .as[AirportInput]
-        .collect() should contain theSameElementsAs (expected ++ simpleExpectedUpdate)
-    })
+        result
+          .as[AirportInput]
+          .collect() should contain theSameElementsAs (expected ++ expectedUpdate)
+      }
+    }
   }
 
-  "writeOnce" should "write a dataframe to a file" in {
-    ExcelLoader.writeOnce(outputLocation, "", "", initialDF, Nil, EXCEL.sparkFormat, Map("header" -> "true"))
+  "writeOnce" should "write a dataframe to a file" in withOutputFolder("root") { root =>
+    withInitialDfInFolder(root) { folderLocation =>
+
+      val result = ExcelLoader.read(folderLocation, EXCEL.sparkFormat, Map("header" -> "true"))
 
-    val result = ExcelLoader.read(outputLocation, EXCEL.sparkFormat, Map("header" -> "true"))
+      result.as[AirportInput].collect() should contain theSameElementsAs expected
 
-    result.as[AirportInput].collect() should contain theSameElementsAs expected
+    }
   }
 
-  it should "overwrite existing files when writing to the same folder" in withInitialDfInFolder {
-    //Overwriting the same location
-    ExcelLoader.writeOnce(outputLocation, "", "", simpleExpectedUpdate.toDF(), Nil, EXCEL.sparkFormat, Map("header" -> "true"))
+  it should "overwrite existing files when writing to the same folder" in withOutputFolder("root") { root =>
+    withInitialDfInFolder(root) { folderLocation =>
 
-    val result = ExcelLoader.read(outputLocation, EXCEL.sparkFormat, Map("header" -> "true"), None, None)
+      //Overwriting the same location
+      ExcelLoader.writeOnce(outputLocation, "", "", expectedUpdate.toDF(), Nil, EXCEL.sparkFormat, Map("header" -> "true"))
 
-    result
-      .as[AirportInput]
-      .collect() should contain theSameElementsAs simpleExpectedUpdate
+      val result = ExcelLoader.read(folderLocation, EXCEL.sparkFormat, Map("header" -> "true"), None, None)
+
+      result
+        .as[AirportInput]
+        .collect() should contain theSameElementsAs expectedUpdate
+    }
   }
 
-  "insert" should "append a dataframe to an existing file" in withInitialDfInFolder {
-    withUpdatedDfInFolder(simpleExpectedUpdate.toDF(), {
-      folderLocation =>
+  "insert" should "append a dataframe to an existing file" in withOutputFolder("root") { root =>
+    withInitialDfInFolder(root) { folderLocation =>
+      withUpdatedDfInFolder(expectedUpdate.toDF(), folderLocation) {
         val result = ExcelLoader.read(folderLocation, EXCEL.sparkFormat, Map("header" -> "true"), None, None)
 
         result
           .as[AirportInput]
-          .collect() should contain theSameElementsAs (expected ++ simpleExpectedUpdate)
-    })
+          .collect() should contain theSameElementsAs (expected ++ expectedUpdate)
+      }
+    }
   }
 
 }

From da4adf7f6d95e4d15f733ab2d1a148340fc02e2f Mon Sep 17 00:00:00 2001
From: Robert <78926291+Asciax@users.noreply.github.com>
Date: Wed, 4 Mar 2026 16:03:31 -0500
Subject: [PATCH 5/6] fix: Place correct new path in test

---
 .../bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala b/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala
index 4ceffbaf..432534e7 100644
--- a/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala
+++ b/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala
@@ -93,7 +93,7 @@ class ExcelLoaderSpec extends SparkSpec {
     withInitialDfInFolder(root) { folderLocation =>
 
       //Overwriting the same location
-      ExcelLoader.writeOnce(outputLocation, "", "", expectedUpdate.toDF(), Nil, EXCEL.sparkFormat, Map("header" -> "true"))
+      ExcelLoader.writeOnce(folderLocation, "", "", expectedUpdate.toDF(), Nil, EXCEL.sparkFormat, Map("header" -> "true"))
 
       val result = ExcelLoader.read(folderLocation, EXCEL.sparkFormat, Map("header" -> "true"), None, None)
 

From 38ac1136959e1eedb664e70dddb6666616fcb2e4 Mon Sep 17 00:00:00 2001
From: Robert <78926291+Asciax@users.noreply.github.com>
Date: Wed, 4 Mar 2026 16:58:50 -0500
Subject: [PATCH 6/6] fix: Implement suggested changes

---
 .../bio/ferlab/datalake/spark3/loader/ExcelLoader.scala   | 8 ++++----
 .../ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala   | 8 +-------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala b/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala
index 7637bd1f..5d3946ee 100644
--- a/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala
+++ b/datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/loader/ExcelLoader.scala
@@ -21,10 +21,10 @@ object ExcelLoader extends Loader {
   }
 
   private def write(df: DataFrame,
-            location: String,
-            format: String,
-            options: Map[String, String],
-            mode: SaveMode): DataFrame = {
+                    location: String,
+                    format: String,
+                    options: Map[String, String],
+                    mode: SaveMode): DataFrame = {
     // Excel format requires the schema to be non-empty, does not support empty schema dataframe writes
     require(df.schema.nonEmpty, "DataFrame must have a valid schema with at least one column.")
     require(options.isDefinedAt("header"), "Expecting [header] to be defined in readOptions.")
diff --git a/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala b/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala
index 432534e7..4ce372ee 100644
--- a/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala
+++ b/datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/loader/ExcelLoaderSpec.scala
@@ -4,9 +4,6 @@ import bio.ferlab.datalake.commons.config.Format.EXCEL
 import bio.ferlab.datalake.spark3.testutils.AirportInput
 import bio.ferlab.datalake.testutils.SparkSpec
 import org.apache.spark.sql.DataFrame
-import org.scalatest.BeforeAndAfterEach
-
-import java.nio.file.{Files, Paths}
 
 class ExcelLoaderSpec extends SparkSpec {
 
@@ -58,11 +55,8 @@ class ExcelLoaderSpec extends SparkSpec {
   it should "throw an exception when the header option is missing" in {
     val fileLocation: String = folderPath + "airports.xlsx"
 
-    val options = Map.empty[String, String]
-    val databaseName, tableName : Option[String] = None
-
     an[IllegalArgumentException] should be thrownBy {
-      ExcelLoader.read(fileLocation, EXCEL.sparkFormat, options, databaseName, tableName)
+      ExcelLoader.read(fileLocation, EXCEL.sparkFormat, readOptions = Map.empty, databaseName = None, tableName = None)
     }
   }