implements multi collection pmapallv, and uses it in down-sampling and in hyperselection

This commit is contained in:
Ryan Boldi 2023-10-16 18:32:38 -04:00
parent 64e35d320c
commit a315482302
5 changed files with 29 additions and 28 deletions

View File

@ -5,11 +5,11 @@
(defn assign-indices-to-data
"assigns an index to each training case in order to differentiate them when downsampling"
[training-data]
(pmap (fn [data-map index]
[training-data argmap]
(utils/pmapallv (fn [data-map index]
(let [data-m (if (map? data-map) data-map (assoc {} :data data-map))] ;if data is not in a map, make it one
(assoc data-m :index index)))
training-data (range (count training-data))))
training-data (range (count training-data)) argmap))
(defn initialize-case-distances
[{:keys [training-data population-size] :as argmap}]

View File

@ -70,7 +70,7 @@
(if (:diploid argmap)
(interleave plushy plushy)
plushy))}) (range population-size) argmap)
indexed-training-data (if downsample? (downsample/assign-indices-to-data (downsample/initialize-case-distances argmap)) (:training-data argmap))]
indexed-training-data (if downsample? (downsample/assign-indices-to-data (downsample/initialize-case-distances argmap) argmap) (:training-data argmap))]
(let [training-data (if downsample?
(case (:ds-function argmap)
:case-maxmin (downsample/select-downsample-maxmin indexed-training-data argmap)
@ -133,7 +133,7 @@
(+ evaluations (* population-size (count training-data)) ;every member evaluated on the current sample
(if (zero? (mod generation ds-parent-gens)) (* (count parent-reps) (- (count indexed-training-data) (count training-data))) 0) ; the parent-reps not evaluted already on down-sample
(if best-individual-passes-ds (- (count indexed-training-data) (count training-data)) 0)) ; if we checked for generalization or not
(let [reindexed-pop (hyperselection/reindex-pop evaluated-pop)] ; give every individual an index for hyperselection loggin
(let [reindexed-pop (hyperselection/reindex-pop evaluated-pop argmap)] ; give every individual an index for hyperselection loggin
(hyperselection/log-hyperselection-and-ret
(if (:elitism argmap)
(conj (utils/pmapallv (fn [_] (variation/new-individual reindexed-pop argmap))

View File

@ -1,4 +1,5 @@
(ns propeller.hyperselection)
(ns propeller.hyperselection
(:require [propeller.utils :as utils]))
(defn sum-list-map-indices
"sums a list of maps that have the :index property's index multiplicity"
@ -31,5 +32,5 @@
(defn reindex-pop
"assigns each member of the population a unique index before selection to track hyperselection"
[pop]
(map (fn [indiv index] (assoc indiv :index index)) pop (range (count pop))))
[pop argmap]
(utils/pmapallv (fn [indiv index] (assoc indiv :index index)) pop (range (count pop)) argmap))

View File

@ -102,23 +102,23 @@
(max height))))))
(defn pmapallv
"A utility for concurrent execution of a function on items in a collection.
In single-thread-mode this acts like mapv. Otherwise it acts like pmap but:
1) coll should be finite, 2) the returned sequence will not be lazy, and will
in fact be a vector, 3) calls to f may occur in any order, to maximize
multicore processor utilization, and 4) takes only one coll so far."
[f coll args]
#?(:clj (vec (if (:single-thread-mode args)
(doall (map f coll))
"A utility for concurrent execution of a function. If :single-thread-mode is
truthy in the final arg then this acts like mapv of f on the provided colls.
Otherwise it acts like pmap but: 1) the colls should be finite, 2) the
returned sequence will not be lazy, and will in fact be a vector, and
3) calls to f may occur in any order, to maximize multicore processor utilization."
[f & colls-args]
#?(:clj (vec (if (:single-thread-mode (last colls-args))
(apply mapv f (butlast colls-args))
(let [agents (map #(agent % :error-handler
(fn [agnt except]
(repl/pst except 1000)
(System/exit 0)))
coll)]
(dorun (map #(send % f) agents))
(apply map vector (butlast colls-args)))]
(dorun (map (fn [a] (send a #(apply f %))) agents))
(apply await agents)
(doall (map deref agents)))))
:cljs (mapv f coll)))
:cljs (apply mapv f (butlast colls-args))))
(def PI
#?(:clj Math/PI

View File

@ -9,12 +9,12 @@
(t/deftest assign-indices-to-data-test
(t/testing "assign-indices-to-data"
(t/testing "should return a map of the same length"
(t/is (= (count (ds/assign-indices-to-data (range 10))) 10))
(t/is (= (count (ds/assign-indices-to-data (range 0))) 0)))
(t/is (= (count (ds/assign-indices-to-data (range 10) {})) 10))
(t/is (= (count (ds/assign-indices-to-data (range 0) {})) 0)))
(t/testing "should return a map where each element has an index key"
(t/is (every? #(:index %) (ds/assign-indices-to-data (map #(assoc {} :input %) (range 10))))))
(t/is (every? #(:index %) (ds/assign-indices-to-data (map #(assoc {} :input %) (range 10)) {}))))
(t/testing "should return distinct indices"
(t/is (= (map #(:index %) (ds/assign-indices-to-data (range 10))) (range 10))))))
(t/is (= (map #(:index %) (ds/assign-indices-to-data (range 10) {})) (range 10))))))
(t/deftest select-downsample-random-test
(t/testing "select-downsample-random"
@ -157,7 +157,7 @@
(t/is (= '(0.25 0.25 0.25 0.25) (hs/hyperselection-track parents2))))
(t/testing "reindex-pop works correctly"
(t/is (= '({:blah 3 :index 0} {:blah 3 :index 1}
{:blah 3 :index 2} {:blah 3 :index 3}) (hs/reindex-pop parents1)))
{:blah 3 :index 2} {:blah 3 :index 3}) (hs/reindex-pop parents1 {})))
(t/is (= '({:plushy 2 :index 0} {:blah 3 :index 1}
{:blah 3 :index 2} {:index 3}) (hs/reindex-pop parents2)))
(t/is (= '({:blah 1 :index 0} {:blah 1 :index 1} {:blah 1 :index 2}) (hs/reindex-pop emptyparents))))))
{:blah 3 :index 2} {:index 3}) (hs/reindex-pop parents2 {})))
(t/is (= '({:blah 1 :index 0} {:blah 1 :index 1} {:blah 1 :index 2}) (hs/reindex-pop emptyparents {}))))))