Merge pull request #63 from ryanboldi/main

pmapallv multi-collection implementation
This commit is contained in:
Lee Spector 2023-10-16 18:34:17 -04:00 committed by GitHub
commit adf039195c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 40 additions and 36 deletions

View File

@ -5,15 +5,15 @@
(defn assign-indices-to-data (defn assign-indices-to-data
"assigns an index to each training case in order to differentiate them when downsampling" "assigns an index to each training case in order to differentiate them when downsampling"
[training-data] [training-data argmap]
(map (fn [data-map index] (utils/pmapallv (fn [data-map index]
(let [data-m (if (map? data-map) data-map (assoc {} :data data-map))] ;if data is not in a map, make it one (let [data-m (if (map? data-map) data-map (assoc {} :data data-map))] ;if data is not in a map, make it one
(assoc data-m :index index))) (assoc data-m :index index)))
training-data (range (count training-data)))) training-data (range (count training-data)) argmap))
(defn initialize-case-distances (defn initialize-case-distances
[{:keys [training-data population-size]}] [{:keys [training-data population-size] :as argmap}]
(map #(assoc % :distances (vec (repeat (count training-data) population-size))) training-data)) (utils/pmapallv #(assoc % :distances (vec (repeat (count training-data) population-size))) training-data argmap))
(defn select-downsample-random (defn select-downsample-random
"Selects a downsample from the training cases and returns it" "Selects a downsample from the training cases and returns it"

View File

@ -70,7 +70,7 @@
(if (:diploid argmap) (if (:diploid argmap)
(interleave plushy plushy) (interleave plushy plushy)
plushy))}) (range population-size) argmap) plushy))}) (range population-size) argmap)
indexed-training-data (downsample/assign-indices-to-data (downsample/initialize-case-distances argmap))] indexed-training-data (if downsample? (downsample/assign-indices-to-data (downsample/initialize-case-distances argmap) argmap) (:training-data argmap))]
(let [training-data (if downsample? (let [training-data (if downsample?
(case (:ds-function argmap) (case (:ds-function argmap)
:case-maxmin (downsample/select-downsample-maxmin indexed-training-data argmap) :case-maxmin (downsample/select-downsample-maxmin indexed-training-data argmap)
@ -133,13 +133,16 @@
(+ evaluations (* population-size (count training-data)) ;every member evaluated on the current sample (+ evaluations (* population-size (count training-data)) ;every member evaluated on the current sample
(if (zero? (mod generation ds-parent-gens)) (* (count parent-reps) (- (count indexed-training-data) (count training-data))) 0) ; the parent-reps not evaluted already on down-sample (if (zero? (mod generation ds-parent-gens)) (* (count parent-reps) (- (count indexed-training-data) (count training-data))) 0) ; the parent-reps not evaluted already on down-sample
(if best-individual-passes-ds (- (count indexed-training-data) (count training-data)) 0)) ; if we checked for generalization or not (if best-individual-passes-ds (- (count indexed-training-data) (count training-data)) 0)) ; if we checked for generalization or not
(let [reindexed-pop (hyperselection/reindex-pop evaluated-pop)] ; give every individual an index for hyperselection loggin (let [reindexed-pop (hyperselection/reindex-pop evaluated-pop argmap)] ; give every individual an index for hyperselection loggin
(hyperselection/log-hyperselection-and-ret (hyperselection/log-hyperselection-and-ret
(if (:elitism argmap) (if (:elitism argmap)
(conj (repeatedly (dec population-size) #(variation/new-individual reindexed-pop argmap)) (conj (utils/pmapallv (fn [_] (variation/new-individual reindexed-pop argmap))
(first reindexed-pop)) (range (dec population-size))
(repeatedly population-size ;need to count occurance of each parent, and reset IDs argmap)
#(variation/new-individual reindexed-pop argmap))))) (first reindexed-pop)) ;elitism maintains the most-fit individual
(utils/pmapallv (fn [_] (variation/new-individual reindexed-pop argmap))
(range population-size)
argmap))))
(if downsample? (if downsample?
(if (zero? (mod generation ds-parent-gens)) (if (zero? (mod generation ds-parent-gens))
(downsample/update-case-distances rep-evaluated-pop indexed-training-data indexed-training-data ids-type (/ solution-error-threshold (count indexed-training-data))) ; update distances every ds-parent-gens generations (downsample/update-case-distances rep-evaluated-pop indexed-training-data indexed-training-data ids-type (/ solution-error-threshold (count indexed-training-data))) ; update distances every ds-parent-gens generations

View File

@ -1,4 +1,5 @@
(ns propeller.hyperselection) (ns propeller.hyperselection
(:require [propeller.utils :as utils]))
(defn sum-list-map-indices (defn sum-list-map-indices
"sums a list of maps that have the :index property's index multiplicity" "sums a list of maps that have the :index property's index multiplicity"
@ -31,5 +32,5 @@
(defn reindex-pop (defn reindex-pop
"assigns each member of the population a unique index before selection to track hyperselection" "assigns each member of the population a unique index before selection to track hyperselection"
[pop] [pop argmap]
(map (fn [indiv index] (assoc indiv :index index)) pop (range (count pop)))) (utils/pmapallv (fn [indiv index] (assoc indiv :index index)) pop (range (count pop)) argmap))

View File

@ -102,23 +102,23 @@
(max height)))))) (max height))))))
(defn pmapallv (defn pmapallv
"A utility for concurrent execution of a function on items in a collection. "A utility for concurrent execution of a function. If :single-thread-mode is
In single-thread-mode this acts like mapv. Otherwise it acts like pmap but: truthy in the final arg then this acts like mapv of f on the provided colls.
1) coll should be finite, 2) the returned sequence will not be lazy, and will Otherwise it acts like pmap but: 1) the colls should be finite, 2) the
in fact be a vector, 3) calls to f may occur in any order, to maximize returned sequence will not be lazy, and will in fact be a vector, and
multicore processor utilization, and 4) takes only one coll so far." 3) calls to f may occur in any order, to maximize multicore processor utilization."
[f coll args] [f & colls-args]
#?(:clj (vec (if (:single-thread-mode args) #?(:clj (vec (if (:single-thread-mode (last colls-args))
(doall (map f coll)) (apply mapv f (butlast colls-args))
(let [agents (map #(agent % :error-handler (let [agents (map #(agent % :error-handler
(fn [agnt except] (fn [agnt except]
(repl/pst except 1000) (repl/pst except 1000)
(System/exit 0))) (System/exit 0)))
coll)] (apply map vector (butlast colls-args)))]
(dorun (map #(send % f) agents)) (dorun (map (fn [a] (send a #(apply f %))) agents))
(apply await agents) (apply await agents)
(doall (map deref agents))))) (doall (mapv deref agents)))))
:cljs (mapv f coll))) :cljs (apply mapv f (butlast colls-args))))
(def PI (def PI
#?(:clj Math/PI #?(:clj Math/PI

View File

@ -9,12 +9,12 @@
(t/deftest assign-indices-to-data-test (t/deftest assign-indices-to-data-test
(t/testing "assign-indices-to-data" (t/testing "assign-indices-to-data"
(t/testing "should return a map of the same length" (t/testing "should return a map of the same length"
(t/is (= (count (ds/assign-indices-to-data (range 10))) 10)) (t/is (= (count (ds/assign-indices-to-data (range 10) {})) 10))
(t/is (= (count (ds/assign-indices-to-data (range 0))) 0))) (t/is (= (count (ds/assign-indices-to-data (range 0) {})) 0)))
(t/testing "should return a map where each element has an index key" (t/testing "should return a map where each element has an index key"
(t/is (every? #(:index %) (ds/assign-indices-to-data (map #(assoc {} :input %) (range 10)))))) (t/is (every? #(:index %) (ds/assign-indices-to-data (map #(assoc {} :input %) (range 10)) {}))))
(t/testing "should return distinct indices" (t/testing "should return distinct indices"
(t/is (= (map #(:index %) (ds/assign-indices-to-data (range 10))) (range 10)))))) (t/is (= (map #(:index %) (ds/assign-indices-to-data (range 10) {})) (range 10))))))
(t/deftest select-downsample-random-test (t/deftest select-downsample-random-test
(t/testing "select-downsample-random" (t/testing "select-downsample-random"
@ -157,7 +157,7 @@
(t/is (= '(0.25 0.25 0.25 0.25) (hs/hyperselection-track parents2)))) (t/is (= '(0.25 0.25 0.25 0.25) (hs/hyperselection-track parents2))))
(t/testing "reindex-pop works correctly" (t/testing "reindex-pop works correctly"
(t/is (= '({:blah 3 :index 0} {:blah 3 :index 1} (t/is (= '({:blah 3 :index 0} {:blah 3 :index 1}
{:blah 3 :index 2} {:blah 3 :index 3}) (hs/reindex-pop parents1))) {:blah 3 :index 2} {:blah 3 :index 3}) (hs/reindex-pop parents1 {})))
(t/is (= '({:plushy 2 :index 0} {:blah 3 :index 1} (t/is (= '({:plushy 2 :index 0} {:blah 3 :index 1}
{:blah 3 :index 2} {:index 3}) (hs/reindex-pop parents2))) {:blah 3 :index 2} {:index 3}) (hs/reindex-pop parents2 {})))
(t/is (= '({:blah 1 :index 0} {:blah 1 :index 1} {:blah 1 :index 2}) (hs/reindex-pop emptyparents)))))) (t/is (= '({:blah 1 :index 0} {:blah 1 :index 1} {:blah 1 :index 2}) (hs/reindex-pop emptyparents {}))))))