From c89dd81880e1431452584fb598bff5d8e4fc8f36 Mon Sep 17 00:00:00 2001 From: Andy Chambers Date: Mon, 15 Aug 2016 14:40:03 -0700 Subject: [PATCH 1/3] Initial draft with benchmarking --- env/profiling/hitchhiker/bench.clj | 149 +++++++++--------- project.clj | 4 +- src/hitchhiker/sqlite.clj | 241 +++++++++++++++++++++++++++++ test/hitchhiker/sqlite_test.clj | 0 4 files changed, 321 insertions(+), 73 deletions(-) create mode 100644 src/hitchhiker/sqlite.clj create mode 100644 test/hitchhiker/sqlite_test.clj diff --git a/env/profiling/hitchhiker/bench.clj b/env/profiling/hitchhiker/bench.clj index d47cccc..63c87dc 100644 --- a/env/profiling/hitchhiker/bench.clj +++ b/env/profiling/hitchhiker/bench.clj @@ -2,8 +2,10 @@ (:require [clojure.pprint :as pp] [clojure.string :as str] [clojure.tools.cli :refer [parse-opts]] + [clojure.java.jdbc :as jdbc] [excel-templates.build :as excel] [hitchhiker.redis :as redis] + [hitchhiker.sqlite :as sqlite] [hitchhiker.tree.core :as core] [hitchhiker.tree.messaging :as msg]) (:import [java.io File FileWriter])) @@ -129,7 +131,7 @@ :validate [#(#{"fractal" "b-tree" "sorted-set"} %) "Data structure must be fractal, b-tree, or sorted set"]] [nil "--backend testing" "Runs the benchmark with the specified backend" :default "testing" - :validate [#(#{"redis" "testing"} %) "Backend must be redis or testing"]] + :validate [#(#{"redis" "sqlite" "testing"} %) "Backend must be redis, sqlite or testing"]] ["-d" "--delete-pattern PATTERN" "Specifies how the operations will be reordered on delete" :default "forward" :validate [#(#{"forward" "reverse" "shuffle" "zero"} %) "Incorrect delete pattern"] @@ -194,74 +196,77 @@ (defn -main [& [root & args]] - (let [outputs (atom [])] - (doseq [args (or (->> args - (partition-by #(= % "--")) - (map-indexed vector) - (filter (comp even? first)) - (map second) - (seq)) - [[]])] ; always do one iteration - (let [{:keys [options arguments errors summary]} (parse-opts args options) - tree-to-test (atom {}) - results (atom [])] - (cond - (or (= "-h" root) - (= "--help" root) - (nil? root) - (:help options)) (exit 0 (usage summary)) - (not= (count arguments) 0) (exit 1 (usage summary)) - errors (exit 1 (error-msg errors))) - (let [backend (case (:backend options) - "testing" (core/->TestingBackend) - "redis" (do (redis/start-expiry-thread!) - (redis/->RedisBackend))) - delete-xform (case (:delete-pattern options) - "forward" identity - "reverse" reverse - "shuffle" shuffle - "zero" #(repeat (count %) 0.0)) - [tree-name structure] - (case (:data-structure options) - "b-tree" ["b-tree" (core-b-tree (:tree-width options) backend)] - "fractal" ["fractal" (msg-b-tree (:tree-width options) backend)] - "sorted-set" ["sorted-set" (sorted-set-repr)]) - flush-freq (:flush-freq options) - codename (str tree-name - "__flush_" - flush-freq - "__b_" - (:tree-width options) - "__" - (:backend options) - "__n_" - (:num-operations options) - "__del_" - (:delete-pattern options))] - (doseq [ds (generate-test-datasets) - :let [codename (str codename - "_" - (:name ds)) - out (create-output-dir - root - codename) - _ (println "Doing" codename) - bench-res (benchmark (:num-operations options) ds flush-freq structure out delete-xform)]] - (swap! results conj - {:tree tree-name - :ds (:name ds) - :freq flush-freq - :n (:num-operations options) - :b (:tree-width options) - :delete-pattern (:delete-pattern options) - :results bench-res})) - ;(println "results") - ;(clojure.pprint/pprint @results) - (swap! outputs conj (template-one-sheet @results))))) - (excel/render-to-file - "template_benchmark.xlsx" - (.getPath (File. root "analysis.xlsx")) - {"SingleDS" - (map-indexed (fn [i s] - (assoc s :sheet-name (str "Trial " (inc i)))) - @outputs)}))) + (jdbc/with-db-connection [db (sqlite/db-spec ":memory:")] + (let [outputs (atom [])] + (doseq [args (or (->> args + (partition-by #(= % "--")) + (map-indexed vector) + (filter (comp even? first)) + (map second) + (seq)) + [[]])] ; always do one iteration + (let [{:keys [options arguments errors summary]} (parse-opts args options) + tree-to-test (atom {}) + results (atom [])] + (cond + (or (= "-h" root) + (= "--help" root) + (nil? root) + (:help options)) (exit 0 (usage summary)) + (not= (count arguments) 0) (exit 1 (usage summary)) + errors (exit 1 (error-msg errors))) + (let [backend (case (:backend options) + "testing" (core/->TestingBackend) + "redis" (do (redis/start-expiry-thread!) + (redis/->RedisBackend)) + "sqlite" (do (sqlite/ensure-schema db) + (sqlite/->SQLiteBackend db))) + delete-xform (case (:delete-pattern options) + "forward" identity + "reverse" reverse + "shuffle" shuffle + "zero" #(repeat (count %) 0.0)) + [tree-name structure] + (case (:data-structure options) + "b-tree" ["b-tree" (core-b-tree (:tree-width options) backend)] + "fractal" ["fractal" (msg-b-tree (:tree-width options) backend)] + "sorted-set" ["sorted-set" (sorted-set-repr)]) + flush-freq (:flush-freq options) + codename (str tree-name + "__flush_" + flush-freq + "__b_" + (:tree-width options) + "__" + (:backend options) + "__n_" + (:num-operations options) + "__del_" + (:delete-pattern options))] + (doseq [ds (generate-test-datasets) + :let [codename (str codename + "_" + (:name ds)) + out (create-output-dir + root + codename) + _ (println "Doing" codename) + bench-res (benchmark (:num-operations options) ds flush-freq structure out delete-xform)]] + (swap! results conj + {:tree tree-name + :ds (:name ds) + :freq flush-freq + :n (:num-operations options) + :b (:tree-width options) + :delete-pattern (:delete-pattern options) + :results bench-res})) + ;(println "results") + ;(clojure.pprint/pprint @results) + (swap! outputs conj (template-one-sheet @results))))) + (excel/render-to-file + "template_benchmark.xlsx" + (.getPath (File. root "analysis.xlsx")) + {"SingleDS" + (map-indexed (fn [i s] + (assoc s :sheet-name (str "Trial " (inc i)))) + @outputs)})))) diff --git a/project.clj b/project.clj index c3b1d2c..a7ed293 100644 --- a/project.clj +++ b/project.clj @@ -7,7 +7,9 @@ [org.clojure/core.memoize "0.5.8"] [com.taoensso/carmine "2.12.2"] [org.clojure/core.rrb-vector "0.0.11"] - [org.clojure/core.cache "0.6.5"]] + [org.clojure/core.cache "0.6.5"] + [org.clojure/java.jdbc "0.6.2-alpha2"] + [org.xerial/sqlite-jdbc "3.7.2"]] :aliases {"bench" ["with-profile" "profiling" "run" "-m" "hitchhiker.bench"]} :jvm-opts ["-server" "-Xmx3700m" "-Xms3700m"] :profiles {:test diff --git a/src/hitchhiker/sqlite.clj b/src/hitchhiker/sqlite.clj new file mode 100644 index 0000000..2a95940 --- /dev/null +++ b/src/hitchhiker/sqlite.clj @@ -0,0 +1,241 @@ +(ns hitchhiker.sqlite + (:require [clojure.java.jdbc :as jdbc] + [clojure.edn :as edn] + [clojure.string :as str] + [hitchhiker.tree.core :as core] + [hitchhiker.tree.messaging :as msg] + [clojure.core.cache :as cache] + [taoensso.nippy :as nippy]) + (:import [java.sql SQLException])) + +(defn underscore [x] + (str/replace (str x) "-" "_")) + +(def schema + {:hh-key (jdbc/create-table-ddl :hh-key + [[:k :string "primary key"] + [:v :blob]] + {:entities underscore}) + + :hh-ref (jdbc/create-table-ddl :hh-ref + [[:parent :string "references hh_key(k) on delete cascade"] + [:child :string "references hh_key(k) on delete cascade"]] + {:entities underscore}) + + :hh-ref-by-parent "create index if not exists hh_ref_by_parent on hh_ref (parent);" + :hh-ref-by-child "create index if not exists hh_ref_by_child on hh_ref (child);"}) + +(def query + {:table-exists? "select 1 from sqlite_master where type='table' and name=?" + :index-exists? "select 1 from sqlite_master where type='index' and name=?" + :find-key "select * from hh_key where k=?"}) + +(defn drop-ref [db key] + (jdbc/delete! db :hh-key ["k = ?" key])) + +(defn db-spec [subname] + {:classname "org.sqlite.JDBC" + :subprotocol "sqlite" + :subname subname}) + +(defn table-exists? [db table] + (let [qry (fn [args] + (jdbc/query db args))] + (-> [(query :table-exists?) (underscore table)] + qry + (not-empty)))) + +(defn index-exists? [db idx] + (let [qry (fn [args] + (jdbc/query db args))] + (-> [(query :index-exists?) (underscore idx)] + qry + (not-empty)))) + +(defn create-table [db tbl] + (try + (jdbc/execute! db tbl) + (catch SQLException e + (when-not (re-matches #"table (.*) already exists" (.getMessage e)) + (throw (ex-info "failed to create table" + {:ddl tbl} + e)))))) + +(defn create-index [db idx] + (try + (jdbc/execute! db idx) + (catch SQLException e + (when-not (re-matches #"index (.*) already exists" (.getMessage e)) + (throw (ex-info "failed to create index" + {:idx idx} + e)))))) + +(defn ensure-schema [db] + (let [ensure (fn [{:keys [items exists? create!]}] + (doseq [item items] + (when-not (exists? db (underscore (name item))) + (create! db (or (get schema item) + (throw (ex-info "tried to create unknown item" + {:item item})))))))] + + (jdbc/execute! db "pragma foreign_keys=on;") + + (ensure {:items [:hh-key :hh-ref] + :exists? table-exists? + :create! create-table}) + + (ensure {:items [:hh-ref-by-parent :hh-ref-by-child] + :exists? index-exists? + :create! create-index}))) + +(defn add-node [db {:keys [k v] :as node}] + (try + (jdbc/insert! db :hh-key {:k k :v (nippy/freeze v)} {:entities underscore}) + (catch SQLException e + (throw (ex-info "failed to add node" + {:node node + :db db} e))))) + +(defn delete-key [db k] + (jdbc/delete! :hh-key ["k = ?" k])) + +(defn add-refs [db {:keys [parent children]}] + (let [mk-ref (fn [child] + [parent child])] + (jdbc/insert-multi! db :hh-ref (for [child children] + {:parent parent + :child child}) + {:entities underscore}))) + +(defn synthesize-storage-addr + "Given a key, returns a promise containing that key for use as a storage-addr" + [key] + (doto (promise) + (deliver key))) + +(def ^:dynamic *db*) + +(let [cache (-> {} + (cache/lru-cache-factory :threshold 10000) + atom)] + (defn seed-cache! [sqlite-key val] + (swap! cache cache/miss sqlite-key val)) + + (defn io-fetch [sqlite-key] + (let [run (delay + (-> (jdbc/query *db* [(query :find-key) sqlite-key]) + :v + nippy/thaw)) + cs (swap! cache (fn [c] + (if (cache/has? c sqlite-key) + (cache/hit c sqlite-key) + (cache/miss c sqlite-key run)))) + val (cache/lookup cs sqlite-key)] + (if val @val @run)))) + +(defrecord SQLiteAddr [last-key sqlite-key storage-addr] + core/IResolve + (dirty? [_] false) + (last-key [_] last-key) + (resolve [_] (-> (io-fetch sqlite-key) + (assoc :storage-addr (synthesize-storage-addr sqlite-key))))) + +(defn sqlite-addr + [last-key sqlite-key] + (->SQLiteAddr last-key sqlite-key + (synthesize-storage-addr sqlite-key))) + +(nippy/extend-thaw :b-tree/sqlite-addr + [data-input] + (let [last-key (nippy/thaw-from-in! data-input) + sqlite-key (nippy/thaw-from-in! data-input)] + (sqlite-addr last-key sqlite-key))) + +(nippy/extend-freeze SQLiteAddr :b-tree/sqlite-addr + [{:keys [last-key sqlite-key]} data-output] + (nippy/freeze-to-out! data-output last-key) + (nippy/freeze-to-out! data-output sqlite-key)) + +(nippy/extend-thaw :b-tree/sqlite-addr + [data-input] + (let [last-key (nippy/thaw-from-in! data-input) + redis-key (nippy/thaw-from-in! data-input)] + (sqlite-addr last-key redis-key))) + +(defrecord SQLiteBackend [db] + core/IBackend + (new-session [_] (atom {:writes 0 + :deletes 0})) + (anchor-root [_ {:keys [sqlite-key] :as node}] + ;; TODO: figure out how redis gc relates to SQL + node) + (write-node [_ node session] + (swap! session update-in [:writes] inc) + (let [key (str (java.util.UUID/randomUUID)) + addr (sqlite-addr (core/last-key node) key)] + + (when (some #(not (satisfies? msg/IOperation %)) (:op-buf node)) + (println (str "Found a broken node, has " (count (:op-buf node)) " ops")) + (println (str "The node data is " node)) + (println (str "and " (:op-buf node)))) + + (jdbc/with-db-transaction [tx db] + (binding [*db* tx] + (add-node db {:k key, :v node}) + (when (core/index-node? node) + (add-refs db {:parent key + :children (for [child (:children node) + :let [child-key @(:storage-addr child)]] + child-key)})))) + + (seed-cache! key (doto (promise) + (deliver node))) + addr)) + (delete-addr [_ addr session] + (delete-key db addr) + (swap! session update-in :deletes inc))) + +(defn get-root-key + [tree] + (-> tree :storage-addr (deref 10 nil))) + +(defn create-tree-from-root-key + [db root-key] + (let [last-key (core/last-key + (-> (jdbc/find-by-keys db :hh-key {:k root-key} + {:entities underscore}) + first + :v + nippy/thaw))] + (core/resolve + (->SQLiteAddr last-key root-key (synthesize-storage-addr root-key))))) + + +(comment + + (defn insert [t v] + (msg/insert t v v)) + + (jdbc/with-db-connection [conn (assoc db :subname "yolo.sqlite") ] + (setup conn) + + (def my-tree + (let [b-tree (core/b-tree (core/->Config 17 300 (- 300 17)))] + (core/flush-tree + (reduce insert b-tree (range 50000)) + (->SQLiteBackend conn))))) + + + (jdbc/with-db-connection [db (db-spec "yolo.sqlite")] + (-> (jdbc/find-by-keys db :hh-key {:k "c6afddfe-f641-49f9-8789-4493ffa41c1c"} + {:entities underscore}) + first + :v)) + + + (jdbc/with-db-connection [conn (assoc db :subname "yolo.sqlite")] + (-> (create-tree-from-root-key conn @(:storage-addr (:tree my-tree))) + (msg/lookup-fwd-iter 1) + (count))) + + ) diff --git a/test/hitchhiker/sqlite_test.clj b/test/hitchhiker/sqlite_test.clj new file mode 100644 index 0000000..e69de29 From 9d79a3dac910ba6e61b9235d3ea02f8c6d688e83 Mon Sep 17 00:00:00 2001 From: Andy Chambers Date: Sat, 3 Sep 2016 13:09:01 -0700 Subject: [PATCH 2/3] Add genertive test for sqlite backend --- env/profiling/hitchhiker/bench.clj | 143 ++++++++++++++--------------- src/hitchhiker/sqlite.clj | 52 +++++++++-- test/hitchhiker/sqlite_test.clj | 59 ++++++++++++ 3 files changed, 174 insertions(+), 80 deletions(-) diff --git a/env/profiling/hitchhiker/bench.clj b/env/profiling/hitchhiker/bench.clj index 63c87dc..6339c94 100644 --- a/env/profiling/hitchhiker/bench.clj +++ b/env/profiling/hitchhiker/bench.clj @@ -196,77 +196,76 @@ (defn -main [& [root & args]] - (jdbc/with-db-connection [db (sqlite/db-spec ":memory:")] - (let [outputs (atom [])] - (doseq [args (or (->> args - (partition-by #(= % "--")) - (map-indexed vector) - (filter (comp even? first)) - (map second) - (seq)) - [[]])] ; always do one iteration - (let [{:keys [options arguments errors summary]} (parse-opts args options) - tree-to-test (atom {}) - results (atom [])] - (cond - (or (= "-h" root) - (= "--help" root) - (nil? root) - (:help options)) (exit 0 (usage summary)) - (not= (count arguments) 0) (exit 1 (usage summary)) - errors (exit 1 (error-msg errors))) - (let [backend (case (:backend options) - "testing" (core/->TestingBackend) - "redis" (do (redis/start-expiry-thread!) - (redis/->RedisBackend)) - "sqlite" (do (sqlite/ensure-schema db) - (sqlite/->SQLiteBackend db))) - delete-xform (case (:delete-pattern options) - "forward" identity - "reverse" reverse - "shuffle" shuffle - "zero" #(repeat (count %) 0.0)) - [tree-name structure] - (case (:data-structure options) - "b-tree" ["b-tree" (core-b-tree (:tree-width options) backend)] - "fractal" ["fractal" (msg-b-tree (:tree-width options) backend)] - "sorted-set" ["sorted-set" (sorted-set-repr)]) - flush-freq (:flush-freq options) - codename (str tree-name - "__flush_" - flush-freq - "__b_" - (:tree-width options) - "__" - (:backend options) - "__n_" - (:num-operations options) - "__del_" - (:delete-pattern options))] - (doseq [ds (generate-test-datasets) - :let [codename (str codename - "_" - (:name ds)) - out (create-output-dir - root - codename) - _ (println "Doing" codename) - bench-res (benchmark (:num-operations options) ds flush-freq structure out delete-xform)]] - (swap! results conj - {:tree tree-name - :ds (:name ds) - :freq flush-freq - :n (:num-operations options) - :b (:tree-width options) - :delete-pattern (:delete-pattern options) - :results bench-res})) + (let [outputs (atom [])] + (doseq [args (or (->> args + (partition-by #(= % "--")) + (map-indexed vector) + (filter (comp even? first)) + (map second) + (seq)) + [[]])] ; always do one iteration + (let [{:keys [options arguments errors summary]} (parse-opts args options) + tree-to-test (atom {}) + results (atom [])] + (cond + (or (= "-h" root) + (= "--help" root) + (nil? root) + (:help options)) (exit 0 (usage summary)) + (not= (count arguments) 0) (exit 1 (usage summary)) + errors (exit 1 (error-msg errors))) + (let [backend (case (:backend options) + "testing" (core/->TestingBackend) + "redis" (do (redis/start-expiry-thread!) + (redis/->RedisBackend)) + "sqlite" (sqlite/->SQLiteBackend + (sqlite/find-or-create-db "/tmp/yolo.sqlite"))) + delete-xform (case (:delete-pattern options) + "forward" identity + "reverse" reverse + "shuffle" shuffle + "zero" #(repeat (count %) 0.0)) + [tree-name structure] + (case (:data-structure options) + "b-tree" ["b-tree" (core-b-tree (:tree-width options) backend)] + "fractal" ["fractal" (msg-b-tree (:tree-width options) backend)] + "sorted-set" ["sorted-set" (sorted-set-repr)]) + flush-freq (:flush-freq options) + codename (str tree-name + "__flush_" + flush-freq + "__b_" + (:tree-width options) + "__" + (:backend options) + "__n_" + (:num-operations options) + "__del_" + (:delete-pattern options))] + (doseq [ds (generate-test-datasets) + :let [codename (str codename + "_" + (:name ds)) + out (create-output-dir + root + codename) + _ (println "Doing" codename) + bench-res (benchmark (:num-operations options) ds flush-freq structure out delete-xform)]] + (swap! results conj + {:tree tree-name + :ds (:name ds) + :freq flush-freq + :n (:num-operations options) + :b (:tree-width options) + :delete-pattern (:delete-pattern options) + :results bench-res})) ;(println "results") ;(clojure.pprint/pprint @results) - (swap! outputs conj (template-one-sheet @results))))) - (excel/render-to-file - "template_benchmark.xlsx" - (.getPath (File. root "analysis.xlsx")) - {"SingleDS" - (map-indexed (fn [i s] - (assoc s :sheet-name (str "Trial " (inc i)))) - @outputs)})))) + (swap! outputs conj (template-one-sheet @results))))) + (excel/render-to-file + "template_benchmark.xlsx" + (.getPath (File. root "analysis.xlsx")) + {"SingleDS" + (map-indexed (fn [i s] + (assoc s :sheet-name (str "Trial " (inc i)))) + @outputs)}))) diff --git a/src/hitchhiker/sqlite.clj b/src/hitchhiker/sqlite.clj index 2a95940..c3611e5 100644 --- a/src/hitchhiker/sqlite.clj +++ b/src/hitchhiker/sqlite.clj @@ -28,10 +28,19 @@ (def query {:table-exists? "select 1 from sqlite_master where type='table' and name=?" :index-exists? "select 1 from sqlite_master where type='index' and name=?" - :find-key "select * from hh_key where k=?"}) + :find-key "select * from hh_key where k=?" + :dead-keys "select k + from hh_key + where k not in ( select child from hh_ref )"}) (defn drop-ref [db key] - (jdbc/delete! db :hh-key ["k = ?" key])) + (jdbc/delete! db :hh-key ["k = ?" key] + {:entities underscore}) + + (let [dead-keys (jdbc/query db (query :dead-keys))] + (doseq [{:keys [k] :as dead-key} dead-keys] + (drop-ref db k)))) + (defn db-spec [subname] {:classname "org.sqlite.JDBC" @@ -86,7 +95,22 @@ (ensure {:items [:hh-ref-by-parent :hh-ref-by-child] :exists? index-exists? - :create! create-index}))) + :create! create-index}) + + db)) + +(defonce ^:private db-registry (atom {})) + +(defn find-db [subname] + (get @db-registry subname)) + +(defn create-db [subname] + (-> {:connection (jdbc/get-connection (db-spec subname))} + (ensure-schema))) + +(defn find-or-create-db [subname] + (or (find-db subname) + (create-db subname))) (defn add-node [db {:keys [k v] :as node}] (try @@ -96,16 +120,25 @@ {:node node :db db} e))))) +(defn list-keys [db] + (jdbc/query db "select k from hh_key")) + (defn delete-key [db k] (jdbc/delete! :hh-key ["k = ?" k])) (defn add-refs [db {:keys [parent children]}] (let [mk-ref (fn [child] [parent child])] - (jdbc/insert-multi! db :hh-ref (for [child children] - {:parent parent - :child child}) - {:entities underscore}))) + (try + (jdbc/insert-multi! db :hh-ref (for [child children] + {:parent parent + :child child}) + {:entities underscore}) + (catch Exception e + (throw (ex-info "Failed to link parent with children" + {:parent parent + :children children} + e)))))) (defn synthesize-storage-addr "Given a key, returns a promise containing that key for use as a storage-addr" @@ -113,6 +146,10 @@ (doto (promise) (deliver key))) +;;; TODO: I believe using a dynamic var to hold the DB is a bit of anti-pattern but +;;; not sure how to avoid it and still support caching as it behaves in the +;;; redis backend. +;;; (def ^:dynamic *db*) (let [cache (-> {} @@ -167,7 +204,6 @@ (new-session [_] (atom {:writes 0 :deletes 0})) (anchor-root [_ {:keys [sqlite-key] :as node}] - ;; TODO: figure out how redis gc relates to SQL node) (write-node [_ node session] (swap! session update-in [:writes] inc) diff --git a/test/hitchhiker/sqlite_test.clj b/test/hitchhiker/sqlite_test.clj index e69de29..cb70a12 100644 --- a/test/hitchhiker/sqlite_test.clj +++ b/test/hitchhiker/sqlite_test.clj @@ -0,0 +1,59 @@ +(ns hitchhiker.sqlite-test + (:require [clojure.test.check.clojure-test :refer [defspec]] + [clojure.test.check.generators :as gen] + [clojure.test.check.properties :as prop] + [hitchhiker.sqlite :as sqlite] + [hitchhiker.tree.core :as core] + hitchhiker.tree.core-test + [hitchhiker.tree.messaging :as msg] + [clojure.java.jdbc :as jdbc])) + +(defn insert + [t k] + (msg/insert t k k)) + +(defn lookup-fwd-iter + [t v] + (seq (map first (msg/lookup-fwd-iter t v)))) + +(defn mixed-op-seq + "This is like the basic mixed-op-seq tests, but it also mixes in flushes to sqlite + and automatically deletes the old tree" + [add-freq del-freq flush-freq universe-size num-ops] + (let [db (sqlite/find-or-create-db "/tmp/yolo.sqlite")] + (prop/for-all [ops (gen/vector (gen/frequency + [[add-freq (gen/tuple (gen/return :add) + (gen/no-shrink gen/int))] + [flush-freq (gen/return [:flush])] + [del-freq (gen/tuple (gen/return :del) + (gen/no-shrink gen/int))]]) + 40)] + (assert (empty? (sqlite/list-keys db)) + "Start with no keys") + (let [[b-tree root set] + (reduce (fn [[t root set] [op x]] + (let [x-reduced (when x (mod x universe-size))] + (condp = op + :flush (let [t (:tree (core/flush-tree t (sqlite/->SQLiteBackend db)))] + (when root + (sqlite/drop-ref db root)) + #_(println "flush" root) + [t @(:storage-addr t) set]) + :add (do #_(println "add" x) [(insert t x-reduced) root (conj set x-reduced)]) + :del (do #_(println "del" x) [(msg/delete t x-reduced) root (disj set x-reduced)])))) + [(core/b-tree (core/->Config 3 3 2)) nil #{}] + ops)] + #_(println "Make it to the end of a test," root "has" (count (lookup-fwd-iter b-tree -1)) "keys left") + (let [b-tree-order (lookup-fwd-iter b-tree -1) + res (= b-tree-order (seq (sort set)))] + + (sqlite/drop-ref db root) + (assert (empty? (sqlite/list-keys db)) + "End with no keys") + + (assert res (str "These are unequal: " (pr-str b-tree-order) " " (pr-str (seq (sort set))))) + res))))) + +(defspec test-many-keys-bigger-trees + 100 + (mixed-op-seq 800 200 10 1000 1000)) From 5c5d7563229af12ea98508a11dc760f1214e33dd Mon Sep 17 00:00:00 2001 From: Andy Chambers Date: Tue, 6 Sep 2016 21:55:54 -0700 Subject: [PATCH 3/3] Rename SQLite->JDBC; Add data structure docs for JDCB backend --- env/profiling/hitchhiker/bench.clj | 9 ++- src/hitchhiker/{sqlite.clj => jdbc.clj} | 67 ++++++++++++------- .../{sqlite_test.clj => jdbc_test.clj} | 19 +++--- 3 files changed, 57 insertions(+), 38 deletions(-) rename src/hitchhiker/{sqlite.clj => jdbc.clj} (79%) rename test/hitchhiker/{sqlite_test.clj => jdbc_test.clj} (83%) diff --git a/env/profiling/hitchhiker/bench.clj b/env/profiling/hitchhiker/bench.clj index 6339c94..f366cab 100644 --- a/env/profiling/hitchhiker/bench.clj +++ b/env/profiling/hitchhiker/bench.clj @@ -2,10 +2,9 @@ (:require [clojure.pprint :as pp] [clojure.string :as str] [clojure.tools.cli :refer [parse-opts]] - [clojure.java.jdbc :as jdbc] [excel-templates.build :as excel] [hitchhiker.redis :as redis] - [hitchhiker.sqlite :as sqlite] + [hitchhiker.jdbc :as jdbc] [hitchhiker.tree.core :as core] [hitchhiker.tree.messaging :as msg]) (:import [java.io File FileWriter])) @@ -131,7 +130,7 @@ :validate [#(#{"fractal" "b-tree" "sorted-set"} %) "Data structure must be fractal, b-tree, or sorted set"]] [nil "--backend testing" "Runs the benchmark with the specified backend" :default "testing" - :validate [#(#{"redis" "sqlite" "testing"} %) "Backend must be redis, sqlite or testing"]] + :validate [#(#{"redis" "jdbc" "testing"} %) "Backend must be redis, jdbc or testing"]] ["-d" "--delete-pattern PATTERN" "Specifies how the operations will be reordered on delete" :default "forward" :validate [#(#{"forward" "reverse" "shuffle" "zero"} %) "Incorrect delete pattern"] @@ -218,8 +217,8 @@ "testing" (core/->TestingBackend) "redis" (do (redis/start-expiry-thread!) (redis/->RedisBackend)) - "sqlite" (sqlite/->SQLiteBackend - (sqlite/find-or-create-db "/tmp/yolo.sqlite"))) + "jdbc" (jdbc/->JDBCBackend + (jdbc/find-or-create-db "/tmp/yolo.sqlite"))) delete-xform (case (:delete-pattern options) "forward" identity "reverse" reverse diff --git a/src/hitchhiker/sqlite.clj b/src/hitchhiker/jdbc.clj similarity index 79% rename from src/hitchhiker/sqlite.clj rename to src/hitchhiker/jdbc.clj index c3611e5..5bafae6 100644 --- a/src/hitchhiker/sqlite.clj +++ b/src/hitchhiker/jdbc.clj @@ -1,4 +1,4 @@ -(ns hitchhiker.sqlite +(ns hitchhiker.jdbc (:require [clojure.java.jdbc :as jdbc] [clojure.edn :as edn] [clojure.string :as str] @@ -8,6 +8,27 @@ [taoensso.nippy :as nippy]) (:import [java.sql SQLException])) +;;; References in a Relational DB +;;; +;;; The SQLite backend uses a simple relational model to keep track of +;;; keys and their references. Each key is listed in hh_keys, and whenever +;;; we'd like to have some key point to another, we call add-refs with the +;;; "pointer" key and a list of pointee keys. For each pointee, add-refs will +;;; add a `(pointer, pointee)` tuple in hh_refs. +;;; +;;; hh_keys +;;; k the name of the key +;;; v a binary blob representing the value of `k` +;;; +;;; hh_refs +;;; pointer the name of the pointer key +;;; pointee the name of the pointee key +;;; +;;; To delete a key, use `drop-key` which also takes care of deleting any +;;; keys that are only hanging around because they point to the key being +;;; deleted. +;;; + (defn underscore [x] (str/replace (str x) "-" "_")) @@ -18,12 +39,12 @@ {:entities underscore}) :hh-ref (jdbc/create-table-ddl :hh-ref - [[:parent :string "references hh_key(k) on delete cascade"] - [:child :string "references hh_key(k) on delete cascade"]] + [[:pointer :string "references hh_key(k) on delete cascade"] + [:pointee :string "references hh_key(k) on delete cascade"]] {:entities underscore}) - :hh-ref-by-parent "create index if not exists hh_ref_by_parent on hh_ref (parent);" - :hh-ref-by-child "create index if not exists hh_ref_by_child on hh_ref (child);"}) + :hh-ref-by-pointer "create index if not exists hh_ref_by_pointer on hh_ref (pointer);" + :hh-ref-by-pointee "create index if not exists hh_ref_by_pointee on hh_ref (pointee);"}) (def query {:table-exists? "select 1 from sqlite_master where type='table' and name=?" @@ -31,15 +52,15 @@ :find-key "select * from hh_key where k=?" :dead-keys "select k from hh_key - where k not in ( select child from hh_ref )"}) + where k not in ( select pointee from hh_ref )"}) -(defn drop-ref [db key] +(defn drop-key [db key] (jdbc/delete! db :hh-key ["k = ?" key] {:entities underscore}) (let [dead-keys (jdbc/query db (query :dead-keys))] (doseq [{:keys [k] :as dead-key} dead-keys] - (drop-ref db k)))) + (drop-key db k)))) (defn db-spec [subname] @@ -93,7 +114,7 @@ :exists? table-exists? :create! create-table}) - (ensure {:items [:hh-ref-by-parent :hh-ref-by-child] + (ensure {:items [:hh-ref-by-pointer :hh-ref-by-pointee] :exists? index-exists? :create! create-index}) @@ -126,18 +147,18 @@ (defn delete-key [db k] (jdbc/delete! :hh-key ["k = ?" k])) -(defn add-refs [db {:keys [parent children]}] - (let [mk-ref (fn [child] - [parent child])] +(defn add-refs [db {:keys [pointer pointees]}] + (let [mk-ref (fn [pointee] + [pointer pointee])] (try - (jdbc/insert-multi! db :hh-ref (for [child children] - {:parent parent - :child child}) + (jdbc/insert-multi! db :hh-ref (for [pointee pointees] + {:pointer pointer + :pointee pointee}) {:entities underscore}) (catch Exception e - (throw (ex-info "Failed to link parent with children" - {:parent parent - :children children} + (throw (ex-info "Failed to link pointer with pointees" + {:pointer pointer + :pointee pointees} e)))))) (defn synthesize-storage-addr @@ -199,7 +220,7 @@ redis-key (nippy/thaw-from-in! data-input)] (sqlite-addr last-key redis-key))) -(defrecord SQLiteBackend [db] +(defrecord JDBCBackend [db] core/IBackend (new-session [_] (atom {:writes 0 :deletes 0})) @@ -219,10 +240,10 @@ (binding [*db* tx] (add-node db {:k key, :v node}) (when (core/index-node? node) - (add-refs db {:parent key - :children (for [child (:children node) - :let [child-key @(:storage-addr child)]] - child-key)})))) + (add-refs db {:pointer key + :pointees (for [pointee (:pointees node) + :let [pointee-key @(:storage-addr pointee)]] + pointee-key)})))) (seed-cache! key (doto (promise) (deliver node))) diff --git a/test/hitchhiker/sqlite_test.clj b/test/hitchhiker/jdbc_test.clj similarity index 83% rename from test/hitchhiker/sqlite_test.clj rename to test/hitchhiker/jdbc_test.clj index cb70a12..ebe59da 100644 --- a/test/hitchhiker/sqlite_test.clj +++ b/test/hitchhiker/jdbc_test.clj @@ -1,12 +1,11 @@ -(ns hitchhiker.sqlite-test +(ns hitchhiker.jdbc-test (:require [clojure.test.check.clojure-test :refer [defspec]] [clojure.test.check.generators :as gen] [clojure.test.check.properties :as prop] - [hitchhiker.sqlite :as sqlite] + [hitchhiker.jdbc :as jdbc] [hitchhiker.tree.core :as core] hitchhiker.tree.core-test - [hitchhiker.tree.messaging :as msg] - [clojure.java.jdbc :as jdbc])) + [hitchhiker.tree.messaging :as msg])) (defn insert [t k] @@ -20,7 +19,7 @@ "This is like the basic mixed-op-seq tests, but it also mixes in flushes to sqlite and automatically deletes the old tree" [add-freq del-freq flush-freq universe-size num-ops] - (let [db (sqlite/find-or-create-db "/tmp/yolo.sqlite")] + (let [db (jdbc/find-or-create-db "/tmp/yolo.sqlite")] (prop/for-all [ops (gen/vector (gen/frequency [[add-freq (gen/tuple (gen/return :add) (gen/no-shrink gen/int))] @@ -28,15 +27,15 @@ [del-freq (gen/tuple (gen/return :del) (gen/no-shrink gen/int))]]) 40)] - (assert (empty? (sqlite/list-keys db)) + (assert (empty? (jdbc/list-keys db)) "Start with no keys") (let [[b-tree root set] (reduce (fn [[t root set] [op x]] (let [x-reduced (when x (mod x universe-size))] (condp = op - :flush (let [t (:tree (core/flush-tree t (sqlite/->SQLiteBackend db)))] + :flush (let [t (:tree (core/flush-tree t (jdbc/->JDBCBackend db)))] (when root - (sqlite/drop-ref db root)) + (jdbc/drop-key db root)) #_(println "flush" root) [t @(:storage-addr t) set]) :add (do #_(println "add" x) [(insert t x-reduced) root (conj set x-reduced)]) @@ -47,8 +46,8 @@ (let [b-tree-order (lookup-fwd-iter b-tree -1) res (= b-tree-order (seq (sort set)))] - (sqlite/drop-ref db root) - (assert (empty? (sqlite/list-keys db)) + (jdbc/drop-key db root) + (assert (empty? (jdbc/list-keys db)) "End with no keys") (assert res (str "These are unequal: " (pr-str b-tree-order) " " (pr-str (seq (sort set)))))