implemented auto downsample size

This commit is contained in:
Ryan Boldi 2022-06-08 08:51:16 -04:00
parent 72d325a609
commit 6bc31e2d9c
3 changed files with 29 additions and 5 deletions

View File

@ -67,9 +67,32 @@
(shuffle (concat (utils/drop-nth selected-case-index tournament) (shuffle (concat (utils/drop-nth selected-case-index tournament)
rest-of-cases)))))))) rest-of-cases))))))))
(defn select-downsample-metalex (defn select-downsample-maxmin-adaptive
"uses meta-lexicase selection to select a downsample that is biased to being spread out" "selects a downsample that has it's cases maximally far away by sequentially
[training-data {:keys [downsample-rate]}]) adding cases to the downsample that have their closest case maximally far away
automatically stops when the maximum minimum distance is below delta"
[training-data {:keys [case-t-size case-delta]}]
(let [shuffled-cases (shuffle training-data)]
(loop [new-downsample (conj [] (first shuffled-cases))
cases-to-pick-from (rest shuffled-cases)
end? false]
(if (or end? (zero? (count cases-to-pick-from)))
new-downsample
(let [tournament (take case-t-size cases-to-pick-from)
rest-of-cases (drop case-t-size cases-to-pick-from)
min-case-distances (metrics/min-of-colls
(map (fn [distance-list]
(utils/filter-by-index distance-list (map #(:index %) tournament)))
(map #(:distances %) new-downsample)))
selected-case-index (metrics/argmax min-case-distances)]
(if (sequential? (:input1 (first new-downsample)))
(prn {:cases-in-ds (map #(first (:input1 %)) new-downsample) :cases-in-tourn (map #(first (:input1 %)) tournament)})
(prn {:cases-in-ds (map #(:input1 %) new-downsample) :cases-in-tourn (map #(:input1 %) tournament)}))
(prn {:min-case-distances min-case-distances :selected-case-index selected-case-index})
(recur (conj new-downsample (nth tournament selected-case-index))
(shuffle (concat (utils/drop-nth selected-case-index tournament)
rest-of-cases))
(<= (apply max min-case-distances) case-delta)))))))
(defn get-distance-between-cases (defn get-distance-between-cases
"returns the distance between two cases given a list of individual error vectors, and the index these "returns the distance between two cases given a list of individual error vectors, and the index these

View File

@ -56,6 +56,7 @@
(case (:ds-function argmap) (case (:ds-function argmap)
:case-avg (downsample/select-downsample-avg indexed-training-data argmap) :case-avg (downsample/select-downsample-avg indexed-training-data argmap)
:case-maxmin (downsample/select-downsample-maxmin indexed-training-data argmap) :case-maxmin (downsample/select-downsample-maxmin indexed-training-data argmap)
:case-maxmin-auto (downsample/select-downsample-maxmin-adaptive indexed-training-data argmap)
(downsample/select-downsample-random indexed-training-data argmap)) (downsample/select-downsample-random indexed-training-data argmap))
indexed-training-data) ;defaults to random indexed-training-data) ;defaults to random
parent-reps (if (zero? (mod generation ds-parent-gens)) ;every ds-parent-gens generations parent-reps (if (zero? (mod generation ds-parent-gens)) ;every ds-parent-gens generations

View File

@ -86,8 +86,8 @@
:training-data (:train train-and-test-data) :training-data (:train train-and-test-data)
:testing-data (:test train-and-test-data) :testing-data (:test train-and-test-data)
:case-t-size (count (:train train-and-test-data)) :case-t-size (count (:train train-and-test-data))
:case-parent-rate 0 :ds-parent-rate 0
:case-parent-gens 1 :ds-parent-gens 1
:max-generations 500 :max-generations 500
:population-size 500 :population-size 500
:max-initial-plushy-size 100 :max-initial-plushy-size 100