Skip to content

Commit 773fd17

Browse files
author
Kay Bosompem
committed
Fix sheet lookup and cell reference parsing issues
- Fix #17: get-sheet by name now correctly uses relationship IDs (r:id) to find worksheet files instead of relying on sheetId, which breaks when sheets have been deleted leaving gaps in IDs - Fix #18: Cells without 'r' attribute (cell reference) are now correctly assigned sequential column letters by tracking numeric column index instead of string column letters - Add comprehensive tests for both fixes with new test data files - Update CI workflow with Java 21, caching, and Clojars deploy on tags - Bump version to 0.2.0
1 parent 203306c commit 773fd17

7 files changed

Lines changed: 220 additions & 71 deletions

File tree

.github/workflows/build.yml

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,61 @@ name: Build bb-excel
33
on:
44
push:
55
branches: [ "main" ]
6+
tags:
7+
- 'v*'
68
pull_request:
79
branches: [ "main" ]
810

911
jobs:
10-
build:
11-
12+
test:
1213
runs-on: ubuntu-latest
14+
steps:
15+
- uses: actions/checkout@v4
16+
- name: Setup Java
17+
uses: actions/setup-java@v4
18+
with:
19+
distribution: 'temurin'
20+
java-version: '21'
21+
- name: Install Leiningen
22+
uses: DeLaGuardo/setup-clojure@12.5
23+
with:
24+
lein: 2.11.2
25+
- name: Cache dependencies
26+
uses: actions/cache@v4
27+
with:
28+
path: ~/.m2/repository
29+
key: ${{ runner.os }}-lein-${{ hashFiles('**/project.clj') }}
30+
restore-keys: |
31+
${{ runner.os }}-lein-
32+
- name: Install dependencies
33+
run: lein deps
34+
- name: Run tests
35+
run: lein test
1336

37+
deploy:
38+
needs: test
39+
runs-on: ubuntu-latest
40+
if: startsWith(github.ref, 'refs/tags/v')
1441
steps:
15-
- uses: actions/checkout@v3
16-
- name: Install dependencies
17-
run: lein deps
18-
- name: Run tests
19-
run: lein test
42+
- uses: actions/checkout@v4
43+
- name: Setup Java
44+
uses: actions/setup-java@v4
45+
with:
46+
distribution: 'temurin'
47+
java-version: '21'
48+
- name: Install Leiningen
49+
uses: DeLaGuardo/setup-clojure@12.5
50+
with:
51+
lein: 2.11.2
52+
- name: Cache dependencies
53+
uses: actions/cache@v4
54+
with:
55+
path: ~/.m2/repository
56+
key: ${{ runner.os }}-lein-${{ hashFiles('**/project.clj') }}
57+
restore-keys: |
58+
${{ runner.os }}-lein-
59+
- name: Deploy to Clojars
60+
env:
61+
CLOJARS_USERNAME: ${{ secrets.CLOJARS_USERNAME }}
62+
CLOJARS_PASSWORD: ${{ secrets.CLOJARS_PASSWORD }}
63+
run: lein deploy clojars

project.clj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
(defproject com.github.kbosompem/bb-excel "0.1.1"
1+
(defproject com.github.kbosompem/bb-excel "0.2.0"
22
:description "A Simple Babashka Library for working with Microsoft Excel Files"
33
:url "https://github.com/kbosompem/bb-excel"
44
:license {:name "EPL-2.0"

src/bb_excel/core.clj

Lines changed: 85 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
(set! *warn-on-reflection* true)
1818

1919
(def ^SimpleDateFormat SDF (doto (SimpleDateFormat. "HH:mm:ss")
20-
(.setTimeZone (TimeZone/getTimeZone "UTC"))))
20+
(.setTimeZone (TimeZone/getTimeZone "UTC"))))
2121
(def ^:const BASE_ROW_INDEX 0)
2222
(def ^:const BASE_COLUMN_INDEX 0)
2323
(def ^:const A_CHAR_INDEX (int \A))
@@ -32,13 +32,13 @@
3232

3333
(def ^:const pcts #{"9" "10"})
3434

35-
(def ^:const error-codes
35+
(def ^:const error-codes
3636
{"#NAME?" :bad-name
3737
"#DIV/0!" :div-by-0
3838
"#REF!" :invalid-reference
3939
"#NUM!" :infinity
4040
"#N/A" :not-applicable
41-
"#VALUE!" :invalid-value
41+
"#VALUE!" :invalid-value
4242
"#NULL!" :null
4343
"#SPILL!" :multiple-results
4444
nil :unknown-error})
@@ -90,6 +90,21 @@
9090
(ZipFile. file)
9191
(throw-ex (format "Could not open '%s'! File does not exist." file-or-filename)))))
9292

93+
(defn get-workbook-relationships
94+
"Get the relationship mappings from xl/_rels/workbook.xml.rels.
95+
Returns a map from rId to the Target path (e.g. {\"rId1\" \"worksheets/sheet1.xml\"})."
96+
[^ZipFile zipfile]
97+
(if-let [rels-entry (.getEntry zipfile "xl/_rels/workbook.xml.rels")]
98+
(with-open [rels (.getInputStream zipfile rels-entry)]
99+
(let [rels-node (xml/parse rels {:namespace-aware false})
100+
rel-nodes (->> (:content rels-node)
101+
(filter (by-tag :Relationship)))]
102+
(into {} (map (fn [rel]
103+
(let [attrs (:attrs rel)]
104+
[(:Id attrs) (:Target attrs)])))
105+
rel-nodes)))
106+
{}))
107+
93108
(defn get-sheet-names*
94109
[^ZipFile zipfile]
95110
(if-let [workbook-entry (.getEntry zipfile "xl/workbook.xml")]
@@ -100,17 +115,18 @@
100115
sheet-nodes (->> (:content sheets-node)
101116
(filter (by-tag :sheet)))]
102117
(into [] (comp (map :attrs)
103-
(map #(select-keys % [:sheetId :name]))
118+
(map #(select-keys % [:sheetId :name :id]))
104119
(map #(update % :sheetId parse-xlong))
105-
(map #(rename-keys % {:sheetId :idx})))
120+
(map #(rename-keys % {:sheetId :idx :id :rid})))
106121
sheet-nodes)))
107122
[]))
108123

109124
(defn get-sheet-names
110-
"Retrieves a list of Sheet Names from a given Excel Spreadsheet"
125+
"Retrieves a list of Sheet Names from a given Excel Spreadsheet.
126+
Returns a vector of maps with :name and :idx keys."
111127
[file-or-filename]
112128
(let [^ZipFile zipfile (get-zipfile file-or-filename)]
113-
(get-sheet-names* zipfile)))
129+
(mapv #(dissoc % :rid) (get-sheet-names* zipfile))))
114130

115131
(defn num2date
116132
"Format Excel Date"
@@ -183,15 +199,12 @@
183199
(mapv #(-> % :attrs :numFmtId) xf-nodes)))
184200
[]))
185201

186-
187202
(defn valid-cell-index?
188203
[cell-index]
189204
(if cell-index
190205
(boolean (re-find #"^[A-Z]{1,3}\d+$" cell-index))
191206
false))
192207

193-
194-
195208
(defn number->column-letter
196209
[n]
197210
(loop [num n
@@ -202,31 +215,41 @@
202215
(recur new-num (str (char (+ residue A_CHAR_INDEX)) acc)))
203216
acc)))
204217

218+
(defn column-letter->number
219+
"Convert column letter(s) to a 1-based numeric index.
220+
A=1, B=2, ..., Z=26, AA=27, etc."
221+
[col-str]
222+
(reduce (fn [acc c]
223+
(+ (* acc 26) (- (int c) (dec A_CHAR_INDEX))))
224+
0
225+
col-str))
226+
205227
(defn get-col-index
206-
"Self-calculated index is used only if cell-index attribute(:r) is missing on the cell"
207-
[cell last-processed-col-index]
228+
"Returns a vector of [col-letter col-number] where col-number is the 1-based numeric index.
229+
Self-calculated index is used only if cell-index attribute(:r) is missing on the cell"
230+
[cell last-processed-col-number]
208231
(let [cell-index (-> cell :attrs :r)]
209232
(if (valid-cell-index? cell-index)
210-
(re-find #"[A-Z]{1,3}" cell-index)
211-
(-> last-processed-col-index
212-
(inc)
213-
(number->column-letter)))))
233+
(let [col-letter (re-find #"[A-Z]{1,3}" cell-index)]
234+
[col-letter (column-letter->number col-letter)])
235+
(let [new-col-number (inc last-processed-col-number)]
236+
[(number->column-letter new-col-number) new-col-number]))))
214237

215238
(defn process-row
216239
"Process Excel row of data"
217240
[shared-strings styles row]
218241
(->> (:content row)
219-
(reduce (fn [{:keys [row-data last-processed-col-index]} cell]
220-
(let [col-index (get-col-index cell last-processed-col-index)
242+
(reduce (fn [{:keys [row-data last-processed-col-number]} cell]
243+
(let [[col-letter col-number] (get-col-index cell last-processed-col-number)
221244
cell-value (extract-cell-value shared-strings styles cell)]
222-
{:row-data (assoc row-data (keyword col-index) cell-value)
223-
:last-processed-col-index col-index}))
245+
{:row-data (assoc row-data (keyword col-letter) cell-value)
246+
:last-processed-col-number col-number}))
224247
{:row-data {}
225-
:last-processed-col-index BASE_COLUMN_INDEX})
248+
:last-processed-col-number BASE_COLUMN_INDEX})
226249
(:row-data)))
227250

228251
(defn process-rows
229-
[shared-strings styles last-processed-row-index rows]
252+
[shared-strings styles last-processed-row-index rows]
230253
(lazy-seq
231254
(when rows
232255
(let [row (first rows)
@@ -240,27 +263,35 @@
240263
row-index
241264
(next rows)))))))
242265

243-
(defn get-and-check-sheet-id
244-
[^ZipFile zipfile sheetname-or-idx]
245-
(let [sheets (get-sheet-names* zipfile)
246-
found-sheet
247-
(find-first (fn [sheet]
248-
(cond
249-
(string? sheetname-or-idx)
250-
(= (str/lower-case sheetname-or-idx)
251-
(str/lower-case (:name sheet)))
252-
253-
(and (integer? sheetname-or-idx)
254-
(pos? sheetname-or-idx))
255-
(= sheetname-or-idx (:idx sheet))))
256-
sheets)]
257-
(or (:idx found-sheet)
258-
(throw-ex (format "Could not find sheet with name or index equal '%s'! Sheet does not exist." sheetname-or-idx)))))
266+
(defn find-sheet-by-name-or-index
267+
"Find a sheet by name (case-insensitive) or by positional index (1-based).
268+
When using an integer index, it refers to the position in the sheets list,
269+
not the internal sheetId."
270+
[sheets sheetname-or-idx]
271+
(cond
272+
(string? sheetname-or-idx)
273+
(find-first (fn [sheet]
274+
(= (str/lower-case sheetname-or-idx)
275+
(str/lower-case (:name sheet))))
276+
sheets)
277+
278+
(and (integer? sheetname-or-idx) (pos? sheetname-or-idx))
279+
;; Use 1-based positional index, not sheetId
280+
(nth sheets (dec sheetname-or-idx) nil)
281+
282+
:else nil))
259283

260284
(defn get-sheet-entry
261-
[^ZipFile zipfile ^long sheet-id]
262-
(or (.getEntry zipfile (str "xl/worksheets/sheet" sheet-id ".xml"))
263-
(throw-ex (format "Could not find sheet with sheet-id equal '%s'! Sheet data file does not exist." sheet-id))))
285+
"Get the ZipEntry for a worksheet using the relationship ID.
286+
The rels map provides the mapping from rId to the actual worksheet path."
287+
[^ZipFile zipfile rels rid]
288+
(if-let [target (get rels rid)]
289+
(let [path (if (str/starts-with? target "/")
290+
(subs target 1)
291+
(str "xl/" target))]
292+
(or (.getEntry zipfile path)
293+
(throw-ex (format "Could not find worksheet file '%s' for relationship '%s'!" path rid))))
294+
(throw-ex (format "Could not find relationship with id '%s'!" rid))))
264295

265296
(defn get-sheet
266297
"Get sheet from file or filename"
@@ -270,8 +301,12 @@
270301
(get-sheet file-or-filename sheetname-or-idx {}))
271302
([file-or-filename sheetname-or-idx options]
272303
(let [^ZipFile zipfile (get-zipfile file-or-filename)
273-
^long sheet-id (get-and-check-sheet-id zipfile sheetname-or-idx)
274-
^ZipEntry sheet-entry (get-sheet-entry zipfile sheet-id)
304+
sheets (get-sheet-names* zipfile)
305+
found-sheet (find-sheet-by-name-or-index sheets sheetname-or-idx)
306+
_ (when-not found-sheet
307+
(throw-ex (format "Could not find sheet with name or index equal '%s'! Sheet does not exist." sheetname-or-idx)))
308+
rels (get-workbook-relationships zipfile)
309+
^ZipEntry sheet-entry (get-sheet-entry zipfile rels (:rid found-sheet))
275310
opts (merge defaults options)
276311
row (:row opts)
277312
hdr (:hdr opts)
@@ -299,7 +334,6 @@
299334
(mapv #(rename-keys % h) dx))]
300335
(if (empty? cols) dy (mapv #(select-keys % cols) dy)))))))
301336

302-
303337
(defn get-sheets
304338
"Get all or specified sheet from the excel spreadsheet"
305339
([file-or-filename]
@@ -384,8 +418,6 @@
384418
[cs ce] cols]
385419
(get-cells sheet (range rs re) (crange cs ce))))
386420

387-
388-
389421
(defn ws-relationships [n]
390422
(str xmlh
391423
(hc/html
@@ -395,7 +427,7 @@
395427
:Type "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"
396428
:Target (str "worksheets/sheet" (inc x) ".xml")}])))))
397429

398-
(defn- content-types
430+
(defn- content-types
399431
"Generate Content Types"
400432
[n]
401433
(str xmlh
@@ -411,17 +443,17 @@
411443
[:Override {:PartName (str "/xl/worksheets/sheet" (inc x) ".xml")
412444
:ContentType "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"}])))))
413445

414-
(defn excel-date-serial
446+
(defn excel-date-serial
415447
"Convert a java LocalDate to an MS Excel integer value"
416448
[datetime]
417449
(.between ChronoUnit/DAYS (LocalDate/of 1899 Month/DECEMBER 30) datetime))
418450

419-
(defn excel-time-serial
451+
(defn excel-time-serial
420452
"Convert a java LocalDateTime to an MS Excel decimal value."
421453
[datetime]
422454
(/ (.between ChronoUnit/SECONDS (LocalDateTime/of 1899 Month/DECEMBER 30 0 0) datetime) 86400.0))
423455

424-
(defn- cell-type
456+
(defn- cell-type
425457
"Determine cell data type"
426458
[value]
427459
(cond
@@ -440,7 +472,7 @@
440472
(inc r))
441473
:t t} v]))
442474

443-
(defn- generate-xml-row
475+
(defn- generate-xml-row
444476
"Generate row information in hiccup format"
445477
([row-data row-num]
446478
[:row {:r (inc row-num)}
@@ -458,7 +490,7 @@
458490
:when col-letter]
459491
(generate-xml-cell col-letter row-num val)))]))
460492

461-
(defn- create-sheet-xml
493+
(defn- create-sheet-xml
462494
"Create the sheet data in hiccup format.
463495
Checks to see if the data provided is a vector of hashmaps vs a vector of vectors"
464496
[data]
@@ -479,7 +511,7 @@
479511
(.write ^ZipOutputStream zip-stream (.getBytes ^String content "UTF-8"))
480512
(.closeEntry ^ZipOutputStream zip-stream)))
481513

482-
(defn create-xlsx
514+
(defn create-xlsx
483515
"Create an Excel spreadsheet
484516
file-path : Destination folder and filename. e.g /test/sample.xlsx will create the folder test
485517
if it does not exist and place the newly created sample.xlsx in that folder

0 commit comments

Comments
 (0)