Merge pull request #63 from ryanboldi/main

pmapallv multi-collection implementation
This commit is contained in:
Lee Spector 2023-10-16 18:34:17 -04:00 committed by GitHub
commit adf039195c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 40 additions and 36 deletions

View File

@ -5,15 +5,15 @@
(defn assign-indices-to-data
"assigns an index to each training case in order to differentiate them when downsampling"
[training-data]
(map (fn [data-map index]
[training-data argmap]
(utils/pmapallv (fn [data-map index]
(let [data-m (if (map? data-map) data-map (assoc {} :data data-map))] ;if data is not in a map, make it one
(assoc data-m :index index)))
training-data (range (count training-data))))
training-data (range (count training-data)) argmap))
(defn initialize-case-distances
[{:keys [training-data population-size]}]
(map #(assoc % :distances (vec (repeat (count training-data) population-size))) training-data))
[{:keys [training-data population-size] :as argmap}]
(utils/pmapallv #(assoc % :distances (vec (repeat (count training-data) population-size))) training-data argmap))
(defn select-downsample-random
"Selects a downsample from the training cases and returns it"

View File

@ -70,7 +70,7 @@
(if (:diploid argmap)
(interleave plushy plushy)
plushy))}) (range population-size) argmap)
indexed-training-data (downsample/assign-indices-to-data (downsample/initialize-case-distances argmap))]
indexed-training-data (if downsample? (downsample/assign-indices-to-data (downsample/initialize-case-distances argmap) argmap) (:training-data argmap))]
(let [training-data (if downsample?
(case (:ds-function argmap)
:case-maxmin (downsample/select-downsample-maxmin indexed-training-data argmap)
@ -133,13 +133,16 @@
(+ evaluations (* population-size (count training-data)) ;every member evaluated on the current sample
(if (zero? (mod generation ds-parent-gens)) (* (count parent-reps) (- (count indexed-training-data) (count training-data))) 0) ; the parent-reps not evaluted already on down-sample
(if best-individual-passes-ds (- (count indexed-training-data) (count training-data)) 0)) ; if we checked for generalization or not
(let [reindexed-pop (hyperselection/reindex-pop evaluated-pop)] ; give every individual an index for hyperselection loggin
(let [reindexed-pop (hyperselection/reindex-pop evaluated-pop argmap)] ; give every individual an index for hyperselection loggin
(hyperselection/log-hyperselection-and-ret
(if (:elitism argmap)
(conj (repeatedly (dec population-size) #(variation/new-individual reindexed-pop argmap))
(first reindexed-pop))
(repeatedly population-size ;need to count occurance of each parent, and reset IDs
#(variation/new-individual reindexed-pop argmap)))))
(conj (utils/pmapallv (fn [_] (variation/new-individual reindexed-pop argmap))
(range (dec population-size))
argmap)
(first reindexed-pop)) ;elitism maintains the most-fit individual
(utils/pmapallv (fn [_] (variation/new-individual reindexed-pop argmap))
(range population-size)
argmap))))
(if downsample?
(if (zero? (mod generation ds-parent-gens))
(downsample/update-case-distances rep-evaluated-pop indexed-training-data indexed-training-data ids-type (/ solution-error-threshold (count indexed-training-data))) ; update distances every ds-parent-gens generations

View File

@ -1,4 +1,5 @@
(ns propeller.hyperselection)
(ns propeller.hyperselection
(:require [propeller.utils :as utils]))
(defn sum-list-map-indices
"sums a list of maps that have the :index property's index multiplicity"
@ -31,5 +32,5 @@
(defn reindex-pop
"assigns each member of the population a unique index before selection to track hyperselection"
[pop]
(map (fn [indiv index] (assoc indiv :index index)) pop (range (count pop))))
[pop argmap]
(utils/pmapallv (fn [indiv index] (assoc indiv :index index)) pop (range (count pop)) argmap))

View File

@ -102,23 +102,23 @@
(max height))))))
(defn pmapallv
"A utility for concurrent execution of a function on items in a collection.
In single-thread-mode this acts like mapv. Otherwise it acts like pmap but:
1) coll should be finite, 2) the returned sequence will not be lazy, and will
in fact be a vector, 3) calls to f may occur in any order, to maximize
multicore processor utilization, and 4) takes only one coll so far."
[f coll args]
#?(:clj (vec (if (:single-thread-mode args)
(doall (map f coll))
"A utility for concurrent execution of a function. If :single-thread-mode is
truthy in the final arg then this acts like mapv of f on the provided colls.
Otherwise it acts like pmap but: 1) the colls should be finite, 2) the
returned sequence will not be lazy, and will in fact be a vector, and
3) calls to f may occur in any order, to maximize multicore processor utilization."
[f & colls-args]
#?(:clj (vec (if (:single-thread-mode (last colls-args))
(apply mapv f (butlast colls-args))
(let [agents (map #(agent % :error-handler
(fn [agnt except]
(repl/pst except 1000)
(System/exit 0)))
coll)]
(dorun (map #(send % f) agents))
(apply map vector (butlast colls-args)))]
(dorun (map (fn [a] (send a #(apply f %))) agents))
(apply await agents)
(doall (map deref agents)))))
:cljs (mapv f coll)))
(doall (mapv deref agents)))))
:cljs (apply mapv f (butlast colls-args))))
(def PI
#?(:clj Math/PI

View File

@ -9,12 +9,12 @@
(t/deftest assign-indices-to-data-test
(t/testing "assign-indices-to-data"
(t/testing "should return a map of the same length"
(t/is (= (count (ds/assign-indices-to-data (range 10))) 10))
(t/is (= (count (ds/assign-indices-to-data (range 0))) 0)))
(t/is (= (count (ds/assign-indices-to-data (range 10) {})) 10))
(t/is (= (count (ds/assign-indices-to-data (range 0) {})) 0)))
(t/testing "should return a map where each element has an index key"
(t/is (every? #(:index %) (ds/assign-indices-to-data (map #(assoc {} :input %) (range 10))))))
(t/is (every? #(:index %) (ds/assign-indices-to-data (map #(assoc {} :input %) (range 10)) {}))))
(t/testing "should return distinct indices"
(t/is (= (map #(:index %) (ds/assign-indices-to-data (range 10))) (range 10))))))
(t/is (= (map #(:index %) (ds/assign-indices-to-data (range 10) {})) (range 10))))))
(t/deftest select-downsample-random-test
(t/testing "select-downsample-random"
@ -157,7 +157,7 @@
(t/is (= '(0.25 0.25 0.25 0.25) (hs/hyperselection-track parents2))))
(t/testing "reindex-pop works correctly"
(t/is (= '({:blah 3 :index 0} {:blah 3 :index 1}
{:blah 3 :index 2} {:blah 3 :index 3}) (hs/reindex-pop parents1)))
{:blah 3 :index 2} {:blah 3 :index 3}) (hs/reindex-pop parents1 {})))
(t/is (= '({:plushy 2 :index 0} {:blah 3 :index 1}
{:blah 3 :index 2} {:index 3}) (hs/reindex-pop parents2)))
(t/is (= '({:blah 1 :index 0} {:blah 1 :index 1} {:blah 1 :index 2}) (hs/reindex-pop emptyparents))))))
{:blah 3 :index 2} {:index 3}) (hs/reindex-pop parents2 {})))
(t/is (= '({:blah 1 :index 0} {:blah 1 :index 1} {:blah 1 :index 2}) (hs/reindex-pop emptyparents {}))))))