|
17 | 17 | (set! *warn-on-reflection* true) |
18 | 18 |
|
19 | 19 | (def ^SimpleDateFormat SDF (doto (SimpleDateFormat. "HH:mm:ss") |
20 | | - (.setTimeZone (TimeZone/getTimeZone "UTC")))) |
| 20 | + (.setTimeZone (TimeZone/getTimeZone "UTC")))) |
21 | 21 | (def ^:const BASE_ROW_INDEX 0) |
22 | 22 | (def ^:const BASE_COLUMN_INDEX 0) |
23 | 23 | (def ^:const A_CHAR_INDEX (int \A)) |
|
32 | 32 |
|
33 | 33 | (def ^:const pcts #{"9" "10"}) |
34 | 34 |
|
35 | | -(def ^:const error-codes |
| 35 | +(def ^:const error-codes |
36 | 36 | {"#NAME?" :bad-name |
37 | 37 | "#DIV/0!" :div-by-0 |
38 | 38 | "#REF!" :invalid-reference |
39 | 39 | "#NUM!" :infinity |
40 | 40 | "#N/A" :not-applicable |
41 | | - "#VALUE!" :invalid-value |
| 41 | + "#VALUE!" :invalid-value |
42 | 42 | "#NULL!" :null |
43 | 43 | "#SPILL!" :multiple-results |
44 | 44 | nil :unknown-error}) |
|
90 | 90 | (ZipFile. file) |
91 | 91 | (throw-ex (format "Could not open '%s'! File does not exist." file-or-filename))))) |
92 | 92 |
|
| 93 | +(defn get-workbook-relationships |
| 94 | + "Get the relationship mappings from xl/_rels/workbook.xml.rels. |
| 95 | + Returns a map from rId to the Target path (e.g. {\"rId1\" \"worksheets/sheet1.xml\"})." |
| 96 | + [^ZipFile zipfile] |
| 97 | + (if-let [rels-entry (.getEntry zipfile "xl/_rels/workbook.xml.rels")] |
| 98 | + (with-open [rels (.getInputStream zipfile rels-entry)] |
| 99 | + (let [rels-node (xml/parse rels {:namespace-aware false}) |
| 100 | + rel-nodes (->> (:content rels-node) |
| 101 | + (filter (by-tag :Relationship)))] |
| 102 | + (into {} (map (fn [rel] |
| 103 | + (let [attrs (:attrs rel)] |
| 104 | + [(:Id attrs) (:Target attrs)]))) |
| 105 | + rel-nodes))) |
| 106 | + {})) |
| 107 | + |
93 | 108 | (defn get-sheet-names* |
94 | 109 | [^ZipFile zipfile] |
95 | 110 | (if-let [workbook-entry (.getEntry zipfile "xl/workbook.xml")] |
|
100 | 115 | sheet-nodes (->> (:content sheets-node) |
101 | 116 | (filter (by-tag :sheet)))] |
102 | 117 | (into [] (comp (map :attrs) |
103 | | - (map #(select-keys % [:sheetId :name])) |
| 118 | + (map #(select-keys % [:sheetId :name :id])) |
104 | 119 | (map #(update % :sheetId parse-xlong)) |
105 | | - (map #(rename-keys % {:sheetId :idx}))) |
| 120 | + (map #(rename-keys % {:sheetId :idx :id :rid}))) |
106 | 121 | sheet-nodes))) |
107 | 122 | [])) |
108 | 123 |
|
109 | 124 | (defn get-sheet-names |
110 | | - "Retrieves a list of Sheet Names from a given Excel Spreadsheet" |
| 125 | + "Retrieves a list of Sheet Names from a given Excel Spreadsheet. |
| 126 | + Returns a vector of maps with :name and :idx keys." |
111 | 127 | [file-or-filename] |
112 | 128 | (let [^ZipFile zipfile (get-zipfile file-or-filename)] |
113 | | - (get-sheet-names* zipfile))) |
| 129 | + (mapv #(dissoc % :rid) (get-sheet-names* zipfile)))) |
114 | 130 |
|
115 | 131 | (defn num2date |
116 | 132 | "Format Excel Date" |
|
183 | 199 | (mapv #(-> % :attrs :numFmtId) xf-nodes))) |
184 | 200 | [])) |
185 | 201 |
|
186 | | - |
187 | 202 | (defn valid-cell-index? |
188 | 203 | [cell-index] |
189 | 204 | (if cell-index |
190 | 205 | (boolean (re-find #"^[A-Z]{1,3}\d+$" cell-index)) |
191 | 206 | false)) |
192 | 207 |
|
193 | | - |
194 | | - |
195 | 208 | (defn number->column-letter |
196 | 209 | [n] |
197 | 210 | (loop [num n |
|
202 | 215 | (recur new-num (str (char (+ residue A_CHAR_INDEX)) acc))) |
203 | 216 | acc))) |
204 | 217 |
|
| 218 | +(defn column-letter->number |
| 219 | + "Convert column letter(s) to a 1-based numeric index. |
| 220 | + A=1, B=2, ..., Z=26, AA=27, etc." |
| 221 | + [col-str] |
| 222 | + (reduce (fn [acc c] |
| 223 | + (+ (* acc 26) (- (int c) (dec A_CHAR_INDEX)))) |
| 224 | + 0 |
| 225 | + col-str)) |
| 226 | + |
205 | 227 | (defn get-col-index |
206 | | - "Self-calculated index is used only if cell-index attribute(:r) is missing on the cell" |
207 | | - [cell last-processed-col-index] |
| 228 | + "Returns a vector of [col-letter col-number] where col-number is the 1-based numeric index. |
| 229 | + Self-calculated index is used only if cell-index attribute(:r) is missing on the cell" |
| 230 | + [cell last-processed-col-number] |
208 | 231 | (let [cell-index (-> cell :attrs :r)] |
209 | 232 | (if (valid-cell-index? cell-index) |
210 | | - (re-find #"[A-Z]{1,3}" cell-index) |
211 | | - (-> last-processed-col-index |
212 | | - (inc) |
213 | | - (number->column-letter))))) |
| 233 | + (let [col-letter (re-find #"[A-Z]{1,3}" cell-index)] |
| 234 | + [col-letter (column-letter->number col-letter)]) |
| 235 | + (let [new-col-number (inc last-processed-col-number)] |
| 236 | + [(number->column-letter new-col-number) new-col-number])))) |
214 | 237 |
|
215 | 238 | (defn process-row |
216 | 239 | "Process Excel row of data" |
217 | 240 | [shared-strings styles row] |
218 | 241 | (->> (:content row) |
219 | | - (reduce (fn [{:keys [row-data last-processed-col-index]} cell] |
220 | | - (let [col-index (get-col-index cell last-processed-col-index) |
| 242 | + (reduce (fn [{:keys [row-data last-processed-col-number]} cell] |
| 243 | + (let [[col-letter col-number] (get-col-index cell last-processed-col-number) |
221 | 244 | cell-value (extract-cell-value shared-strings styles cell)] |
222 | | - {:row-data (assoc row-data (keyword col-index) cell-value) |
223 | | - :last-processed-col-index col-index})) |
| 245 | + {:row-data (assoc row-data (keyword col-letter) cell-value) |
| 246 | + :last-processed-col-number col-number})) |
224 | 247 | {:row-data {} |
225 | | - :last-processed-col-index BASE_COLUMN_INDEX}) |
| 248 | + :last-processed-col-number BASE_COLUMN_INDEX}) |
226 | 249 | (:row-data))) |
227 | 250 |
|
228 | 251 | (defn process-rows |
229 | | - [shared-strings styles last-processed-row-index rows] |
| 252 | + [shared-strings styles last-processed-row-index rows] |
230 | 253 | (lazy-seq |
231 | 254 | (when rows |
232 | 255 | (let [row (first rows) |
|
240 | 263 | row-index |
241 | 264 | (next rows))))))) |
242 | 265 |
|
243 | | -(defn get-and-check-sheet-id |
244 | | - [^ZipFile zipfile sheetname-or-idx] |
245 | | - (let [sheets (get-sheet-names* zipfile) |
246 | | - found-sheet |
247 | | - (find-first (fn [sheet] |
248 | | - (cond |
249 | | - (string? sheetname-or-idx) |
250 | | - (= (str/lower-case sheetname-or-idx) |
251 | | - (str/lower-case (:name sheet))) |
252 | | - |
253 | | - (and (integer? sheetname-or-idx) |
254 | | - (pos? sheetname-or-idx)) |
255 | | - (= sheetname-or-idx (:idx sheet)))) |
256 | | - sheets)] |
257 | | - (or (:idx found-sheet) |
258 | | - (throw-ex (format "Could not find sheet with name or index equal '%s'! Sheet does not exist." sheetname-or-idx))))) |
| 266 | +(defn find-sheet-by-name-or-index |
| 267 | + "Find a sheet by name (case-insensitive) or by positional index (1-based). |
| 268 | + When using an integer index, it refers to the position in the sheets list, |
| 269 | + not the internal sheetId." |
| 270 | + [sheets sheetname-or-idx] |
| 271 | + (cond |
| 272 | + (string? sheetname-or-idx) |
| 273 | + (find-first (fn [sheet] |
| 274 | + (= (str/lower-case sheetname-or-idx) |
| 275 | + (str/lower-case (:name sheet)))) |
| 276 | + sheets) |
| 277 | + |
| 278 | + (and (integer? sheetname-or-idx) (pos? sheetname-or-idx)) |
| 279 | + ;; Use 1-based positional index, not sheetId |
| 280 | + (nth sheets (dec sheetname-or-idx) nil) |
| 281 | + |
| 282 | + :else nil)) |
259 | 283 |
|
260 | 284 | (defn get-sheet-entry |
261 | | - [^ZipFile zipfile ^long sheet-id] |
262 | | - (or (.getEntry zipfile (str "xl/worksheets/sheet" sheet-id ".xml")) |
263 | | - (throw-ex (format "Could not find sheet with sheet-id equal '%s'! Sheet data file does not exist." sheet-id)))) |
| 285 | + "Get the ZipEntry for a worksheet using the relationship ID. |
| 286 | + The rels map provides the mapping from rId to the actual worksheet path." |
| 287 | + [^ZipFile zipfile rels rid] |
| 288 | + (if-let [target (get rels rid)] |
| 289 | + (let [path (if (str/starts-with? target "/") |
| 290 | + (subs target 1) |
| 291 | + (str "xl/" target))] |
| 292 | + (or (.getEntry zipfile path) |
| 293 | + (throw-ex (format "Could not find worksheet file '%s' for relationship '%s'!" path rid)))) |
| 294 | + (throw-ex (format "Could not find relationship with id '%s'!" rid)))) |
264 | 295 |
|
265 | 296 | (defn get-sheet |
266 | 297 | "Get sheet from file or filename" |
|
270 | 301 | (get-sheet file-or-filename sheetname-or-idx {})) |
271 | 302 | ([file-or-filename sheetname-or-idx options] |
272 | 303 | (let [^ZipFile zipfile (get-zipfile file-or-filename) |
273 | | - ^long sheet-id (get-and-check-sheet-id zipfile sheetname-or-idx) |
274 | | - ^ZipEntry sheet-entry (get-sheet-entry zipfile sheet-id) |
| 304 | + sheets (get-sheet-names* zipfile) |
| 305 | + found-sheet (find-sheet-by-name-or-index sheets sheetname-or-idx) |
| 306 | + _ (when-not found-sheet |
| 307 | + (throw-ex (format "Could not find sheet with name or index equal '%s'! Sheet does not exist." sheetname-or-idx))) |
| 308 | + rels (get-workbook-relationships zipfile) |
| 309 | + ^ZipEntry sheet-entry (get-sheet-entry zipfile rels (:rid found-sheet)) |
275 | 310 | opts (merge defaults options) |
276 | 311 | row (:row opts) |
277 | 312 | hdr (:hdr opts) |
|
299 | 334 | (mapv #(rename-keys % h) dx))] |
300 | 335 | (if (empty? cols) dy (mapv #(select-keys % cols) dy))))))) |
301 | 336 |
|
302 | | - |
303 | 337 | (defn get-sheets |
304 | 338 | "Get all or specified sheet from the excel spreadsheet" |
305 | 339 | ([file-or-filename] |
|
384 | 418 | [cs ce] cols] |
385 | 419 | (get-cells sheet (range rs re) (crange cs ce)))) |
386 | 420 |
|
387 | | - |
388 | | - |
389 | 421 | (defn ws-relationships [n] |
390 | 422 | (str xmlh |
391 | 423 | (hc/html |
|
395 | 427 | :Type "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" |
396 | 428 | :Target (str "worksheets/sheet" (inc x) ".xml")}]))))) |
397 | 429 |
|
398 | | -(defn- content-types |
| 430 | +(defn- content-types |
399 | 431 | "Generate Content Types" |
400 | 432 | [n] |
401 | 433 | (str xmlh |
|
411 | 443 | [:Override {:PartName (str "/xl/worksheets/sheet" (inc x) ".xml") |
412 | 444 | :ContentType "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"}]))))) |
413 | 445 |
|
414 | | -(defn excel-date-serial |
| 446 | +(defn excel-date-serial |
415 | 447 | "Convert a java LocalDate to an MS Excel integer value" |
416 | 448 | [datetime] |
417 | 449 | (.between ChronoUnit/DAYS (LocalDate/of 1899 Month/DECEMBER 30) datetime)) |
418 | 450 |
|
419 | | -(defn excel-time-serial |
| 451 | +(defn excel-time-serial |
420 | 452 | "Convert a java LocalDateTime to an MS Excel decimal value." |
421 | 453 | [datetime] |
422 | 454 | (/ (.between ChronoUnit/SECONDS (LocalDateTime/of 1899 Month/DECEMBER 30 0 0) datetime) 86400.0)) |
423 | 455 |
|
424 | | -(defn- cell-type |
| 456 | +(defn- cell-type |
425 | 457 | "Determine cell data type" |
426 | 458 | [value] |
427 | 459 | (cond |
|
440 | 472 | (inc r)) |
441 | 473 | :t t} v])) |
442 | 474 |
|
443 | | -(defn- generate-xml-row |
| 475 | +(defn- generate-xml-row |
444 | 476 | "Generate row information in hiccup format" |
445 | 477 | ([row-data row-num] |
446 | 478 | [:row {:r (inc row-num)} |
|
458 | 490 | :when col-letter] |
459 | 491 | (generate-xml-cell col-letter row-num val)))])) |
460 | 492 |
|
461 | | -(defn- create-sheet-xml |
| 493 | +(defn- create-sheet-xml |
462 | 494 | "Create the sheet data in hiccup format. |
463 | 495 | Checks to see if the data provided is a vector of hashmaps vs a vector of vectors" |
464 | 496 | [data] |
|
479 | 511 | (.write ^ZipOutputStream zip-stream (.getBytes ^String content "UTF-8")) |
480 | 512 | (.closeEntry ^ZipOutputStream zip-stream))) |
481 | 513 |
|
482 | | -(defn create-xlsx |
| 514 | +(defn create-xlsx |
483 | 515 | "Create an Excel spreadsheet |
484 | 516 | file-path : Destination folder and filename. e.g /test/sample.xlsx will create the folder test |
485 | 517 | if it does not exist and place the newly created sample.xlsx in that folder |
|
0 commit comments