After bundling hickory in an uberjar, it throws and exception when running that .jar file #80
Open
Description
I created a Clojure webapp and used hickory for scraping web pages. I used io.github.clojure/tools.build {:git/tag "v0.9.1" :git/sha "27ff8a4"}
for creating an uberjar of the app. I build it using clj -T:build uber
, my deps.edn looking like this:
{:paths ["src/clj" "src/dev"]
:deps {ring/ring {:mvn/version "1.4.0"}
http-kit/http-kit {:mvn/version "2.5.3"}
com.taoensso/timbre {:mvn/version "5.2.1"}
metosin/reitit {:mvn/version "0.5.17"}
metosin/ring-http-response {:mvn/version "0.9.3"}
org.clj-commons/hickory {:mvn/version "0.7.3"}
hiccup/hiccup {:mvn/version "1.0.5"}
clojure.java-time/clojure.java-time {:mvn/version "1.2.0"}
org.clojure/core.async {:mvn/version "1.6.673"}
com.draines/postal {:mvn/version "2.0.5"}}
:aliases {:build {:extra-paths ["src/build"]
:extra-deps {io.github.clojure/tools.build {:git/tag "v0.9.1" :git/sha "27ff8a4"}
org.clj-commons/hickory {:mvn/version "0.7.3"}}
:ns-default uberjar}
:dev {:main-opts ["-m" "gajbe.server"]}}}
Then when I tried running the app using the java -jar target/gajbe.jar
command, I encountered this exception:
Exception in thread "async-dispatch-1" java.lang.NoClassDefFoundError: hickory/core/HickoryRepresentable
at gajbe.rasclanjivaci.ProcesorBeogradskiOglasi.izvuci_oglase(rasclanjivaci.clj:97)
at gajbe.rasclanjivaci$fn__24183$G__24153__24185.invoke(rasclanjivaci.clj:10)
at gajbe.rasclanjivaci$fn__24183$G__24152__24188.invoke(rasclanjivaci.clj:10)
at clojure.core$map$fn__5935.invoke(core.clj:2770)
at clojure.lang.LazySeq.sval(LazySeq.java:42)
at clojure.lang.LazySeq.seq(LazySeq.java:51)
at clojure.lang.RT.seq(RT.java:535)
at clojure.core$seq__5467.invokeStatic(core.clj:139)
at clojure.core$apply.invokeStatic(core.clj:662)
at clojure.core$mapcat.invokeStatic(core.clj:2800)
at clojure.core$mapcat.doInvoke(core.clj:2800)
at clojure.lang.RestFn.invoke(RestFn.java:423)
at gajbe.rasclanjivaci$dohvati_oglase.invokeStatic(rasclanjivaci.clj:117)
at gajbe.rasclanjivaci$dohvati_oglase.invoke(rasclanjivaci.clj:115)
at gajbe.poslovi$pokreni_obavestavaca$fn__24511$state_machine__21095__auto____24512$fn__24514.invoke(poslovi.clj:12)
at gajbe.poslovi$pokreni_obavestavaca$fn__24511$state_machine__21095__auto____24512.invoke(poslovi.clj:12)
at clojure.core.async.impl.runtime$run_state_machine.invokeStatic(runtime.clj:62)
at clojure.core.async.impl.runtime$run_state_machine.invoke(runtime.clj:61)
at clojure.core.async.impl.runtime$run_state_machine_wrapped.invokeStatic(runtime.clj:66)
at clojure.core.async.impl.runtime$run_state_machine_wrapped.invoke(runtime.clj:64)
at gajbe.poslovi$pokreni_obavestavaca$fn__24511.invoke(poslovi.clj:12)
at clojure.lang.AFn.run(AFn.java:22)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at clojure.core.async.impl.concurrent$counted_thread_factory$reify__15124$fn__15125.invoke(concurrent.clj:29)
at clojure.lang.AFn.run(AFn.java:22)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: java.lang.ClassNotFoundException: hickory.core.HickoryRepresentable
at java.base/jdk.internal.loader.BuiltinClassLoader.loadClass(BuiltinClassLoader.java:581)
at java.base/jdk.internal.loader.ClassLoaders$AppClassLoader.loadClass(ClassLoaders.java:178)
at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:522)
... 27 more
It has nothing to do with the thread being async, because it happens with sync as well. I attach the uberjar and the only file in which I use the library.
uberjar: gajbe.jar.zip
Clojure ns using hickory:
(ns gajbe.rasclanjivaci
(:require [clojure.string :as str]
[hickory.core :as h]
[hickory.select :as hs]
[gajbe.urlovi :as url]
[gajbe.util :refer [-m]]
[java-time.api :as jt])
(:import (java.time ZoneId)))
(defprotocol IzvuciOglase
(dohvati-stranicu [this] "prihvata spisak svih urlova i vraća html stranicu")
(izvuci-oglase [this] "uzima stranicu i vraca kolekciju mapa, od kojih je svaka jedan oglas")
(obradi-oglas [this oglas] "uzima hickory podatke jednog oglasa i vraca njegove elemente"))
(defn datum-string->instant [^String datum ^String format]
(let [formater (jt/formatter format)
local-date-time (.atStartOfDay (jt/local-date formater datum))]
(jt/instant (jt/zoned-date-time local-date-time (ZoneId/systemDefault)))))
(defn- relativni-u-apsolutni
[^String datum]
(condp re-matches datum
#"[Dd]anas" (jt/instant)
#"[Jj]u[a-zA-Z\u00C0-\u024F]e" (jt/minus (jt/instant) (jt/days 1))
#"pre nedelju dana" (jt/minus (jt/instant) (jt/weeks 1))
#"pre ([0-9]+) nedelj[a-z]" :>> (fn [[_ broj-nedelja]]
(jt/minus (jt/instant) (jt/weeks (read-string broj-nedelja))))
#"pre ([0-9]+) dana" :>> (fn [[_ broj-dana]]
(jt/minus (jt/instant) (jt/days (read-string broj-dana))))
#"([0-9]+) dan[a-z]?[\s]+pre" :>> (fn [[_ broj-dana]]
(jt/minus (jt/instant) (jt/days (read-string broj-dana))))
#"([0-9]+) sat[a-z]?[\s]+pre" :>> (fn [[_ broj-sati]]
(jt/minus (jt/instant) (jt/hours (read-string broj-sati))))
#"([0-9]+) minut[a-z]?[\s]+pre" :>> (fn [[_ broj-minuta]]
(jt/minus (jt/instant) (jt/minutes (read-string broj-minuta))))
#"([a-zA-Z]+) ([0-9]+), ([0-9]+)" :>> (fn [[_ mesec dan godina]] ;; e.g. Mar 21, 2023
(datum-string->instant (str/join "/" [godina mesec (inc (read-string dan))])
"yyyy/MMM/d"))))
(comment
(relativni-u-apsolutni "8 sati pre"))
(deftype ProcesorKP [urlovi imena-domena]
IzvuciOglase
(dohvati-stranicu [_this]
(slurp (first (:KP urlovi))))
(izvuci-oglase [this]
(let [oglasi (hs/select (hs/tag :article) (h/as-hickory (h/parse (dohvati-stranicu this))))]
(map (partial obradi-oglas this) oglasi)))
(obradi-oglas [_this oglas]
(let [[{{link-oglasa :href} :attrs}] (hs/select (hs/class :Link_link__J4Qd8) oglas)
link-oglasa (str (:KP imena-domena) link-oglasa)
[{[naslov] :content}] (hs/select (hs/class :AdItem_name__RhGAZ) oglas)
[{[opis] :content}] (hs/select (hs/child (hs/class :AdItem_adTextHolder__Fmra9) (hs/tag :p)) oglas)
[{[cena] :content}] (hs/select (hs/class :AdItem_price__jUgxi) oglas)
[{{link-fotografije :src} :attrs}] (hs/select (hs/child (hs/class :AdItem_imageHolder__LZaKa) (hs/tag :img))
oglas)
[{[mesto] :content}] (hs/select (hs/child (hs/class :AdItem_originAndPromoLocation__HgtYj) (hs/tag :p)) oglas)
datum (relativni-u-apsolutni
(first (:content (last
(hs/select (hs/child (hs/class :AdItem_postedStatus__swUhG)
(hs/tag :p)) oglas)))))
kp-obnovljen? (some? (first (:content (first (hs/select (hs/child (hs/class :AdItem_postedStatus__swUhG)
(hs/tag :a)) oglas)))))
izvor :KP]
(-m link-oglasa naslov opis cena link-fotografije mesto datum kp-obnovljen? izvor))))
(deftype ProcesorHaloOglasi [urlovi imena-domena]
IzvuciOglase
(dohvati-stranicu [_this]
(slurp (first (:halo-oglasi urlovi))))
(izvuci-oglase [this]
(let [oglasi (hs/select (hs/and (hs/class :product-item) (hs/el-not (hs/class :banner-list)))
(h/as-hickory (h/parse (dohvati-stranicu this))))]
(map (partial obradi-oglas this) oglasi)))
(obradi-oglas [_this oglas]
(let [[{[naslov] :content {link-oglasa :href} :attrs}] (hs/select (hs/child (hs/class :product-title) (hs/tag :a))
oglas)
link-oglasa (str (:halo-oglasi imena-domena) link-oglasa)
[{[{[cena] :content}] :content}] (hs/select (hs/attr :data-value) oglas)
[{{link-fotografije :src} :attrs}] (hs/select (hs/descendant (hs/tag :figure) (hs/tag :img)) oglas)
mesto (str/join "/" (map (comp first :content)
(:content (first (hs/select (hs/class :subtitle-places) oglas)))))
[tip kvadratura broj-soba] (map (comp first :content)
(hs/select (hs/descendant (hs/class :product-features) (hs/class :value-wrapper))
oglas))
[{[datum] :content}] (hs/select (hs/class :publish-date) oglas)
datum (datum-string->instant datum "dd.MM.yyyy.")
izvor :halo-oglasi]
(-m link-oglasa naslov cena link-fotografije mesto tip kvadratura broj-soba datum izvor))))
(deftype ProcesorBeogradskiOglasi [urlovi imena-domena]
IzvuciOglase
(dohvati-stranicu [_this]
(slurp (first (:beogradski-oglasi urlovi))))
(izvuci-oglase [this]
(let [oglasi (hs/select (hs/class :classified) (h/as-hickory (h/parse (dohvati-stranicu this))))]
(map (partial obradi-oglas this) oglasi)))
(obradi-oglas [_this oglas]
(let [[{[naslov] :content}] (hs/select (hs/child (hs/class :title) (hs/tag :h3)) oglas)
[{{link-oglasa :href} :attrs}] (hs/select (hs/child (hs/class :classified) (hs/tag :a)) oglas)
link-oglasa (str (:beogradski-oglasi imena-domena) link-oglasa)
[{[opis] :content}] (hs/select (hs/child (hs/class :fbac) (hs/tag :p)) oglas)
[{[cena] :content}] (hs/select (hs/class :sl-price) oglas)
cena (str/trim cena)
[{{link-fotografije :src} :attrs}] (hs/select (hs/class :cpic) oglas)
[{[mesto] :content}] (hs/select (hs/class :sl-loc) oglas)
[{[datum] :content}] (hs/select (hs/child (hs/class :fbac) (hs/class :small-light)) oglas)
datum (relativni-u-apsolutni (str/trim (second (str/split datum #" "))))
kp-obnovljen? (some? (first (:content (first (hs/select (hs/child (hs/class :AdItem_postedStatus__swUhG)
(hs/tag :a)) oglas)))))
izvor :beogradski-oglasi]
(-m link-oglasa naslov opis cena link-fotografije mesto datum kp-obnovljen? izvor))))
(defn dohvati-oglase []
(sort-by :datum jt/after?
(mapcat izvuci-oglase
[(->ProcesorBeogradskiOglasi url/urlovi-oglasa url/imena-domena)
(->ProcesorHaloOglasi url/urlovi-oglasa url/imena-domena)
(->ProcesorKP url/urlovi-oglasa url/imena-domena)])))
(comment
(mapcat izvuci-oglase [(->ProcesorBeogradskiOglasi url/urlovi-oglasa) (->ProcesorHaloOglasi url/urlovi-oglasa)
(->ProcesorKP url/urlovi-oglasa url/imena-domena)]))
Let me know if you need any other info.
Metadata
Assignees
Labels
No labels