implemented auto downsample size

This commit is contained in:
Ryan Boldi 2022-06-08 08:51:16 -04:00
parent 72d325a609
commit 6bc31e2d9c
3 changed files with 29 additions and 5 deletions

View File

@ -67,9 +67,32 @@
(shuffle (concat (utils/drop-nth selected-case-index tournament)
rest-of-cases))))))))
(defn select-downsample-metalex
"uses meta-lexicase selection to select a downsample that is biased to being spread out"
[training-data {:keys [downsample-rate]}])
(defn select-downsample-maxmin-adaptive
"selects a downsample that has it's cases maximally far away by sequentially
adding cases to the downsample that have their closest case maximally far away
automatically stops when the maximum minimum distance is below delta"
[training-data {:keys [case-t-size case-delta]}]
(let [shuffled-cases (shuffle training-data)]
(loop [new-downsample (conj [] (first shuffled-cases))
cases-to-pick-from (rest shuffled-cases)
end? false]
(if (or end? (zero? (count cases-to-pick-from)))
new-downsample
(let [tournament (take case-t-size cases-to-pick-from)
rest-of-cases (drop case-t-size cases-to-pick-from)
min-case-distances (metrics/min-of-colls
(map (fn [distance-list]
(utils/filter-by-index distance-list (map #(:index %) tournament)))
(map #(:distances %) new-downsample)))
selected-case-index (metrics/argmax min-case-distances)]
(if (sequential? (:input1 (first new-downsample)))
(prn {:cases-in-ds (map #(first (:input1 %)) new-downsample) :cases-in-tourn (map #(first (:input1 %)) tournament)})
(prn {:cases-in-ds (map #(:input1 %) new-downsample) :cases-in-tourn (map #(:input1 %) tournament)}))
(prn {:min-case-distances min-case-distances :selected-case-index selected-case-index})
(recur (conj new-downsample (nth tournament selected-case-index))
(shuffle (concat (utils/drop-nth selected-case-index tournament)
rest-of-cases))
(<= (apply max min-case-distances) case-delta)))))))
(defn get-distance-between-cases
"returns the distance between two cases given a list of individual error vectors, and the index these

View File

@ -56,6 +56,7 @@
(case (:ds-function argmap)
:case-avg (downsample/select-downsample-avg indexed-training-data argmap)
:case-maxmin (downsample/select-downsample-maxmin indexed-training-data argmap)
:case-maxmin-auto (downsample/select-downsample-maxmin-adaptive indexed-training-data argmap)
(downsample/select-downsample-random indexed-training-data argmap))
indexed-training-data) ;defaults to random
parent-reps (if (zero? (mod generation ds-parent-gens)) ;every ds-parent-gens generations

View File

@ -86,8 +86,8 @@
:training-data (:train train-and-test-data)
:testing-data (:test train-and-test-data)
:case-t-size (count (:train train-and-test-data))
:case-parent-rate 0
:case-parent-gens 1
:ds-parent-rate 0
:ds-parent-gens 1
:max-generations 500
:population-size 500
:max-initial-plushy-size 100