From 217f1950471243401a50e24d0fc912638e7820f3 Mon Sep 17 00:00:00 2001 From: Ryan Boldi Date: Tue, 29 Mar 2022 15:32:34 -0400 Subject: [PATCH] Implemented max-min, which takes the maximum minimum distance into account instead of average distances --- src/propeller/downsample.cljc | 23 ++++++++++++++++++++++- src/propeller/gp.cljc | 10 +++------- src/propeller/tools/metrics.cljc | 6 +++++- test/propeller/tools/metrics_test.cljc | 5 +++++ 4 files changed, 35 insertions(+), 9 deletions(-) diff --git a/src/propeller/downsample.cljc b/src/propeller/downsample.cljc index ed7be71..283761f 100644 --- a/src/propeller/downsample.cljc +++ b/src/propeller/downsample.cljc @@ -20,7 +20,7 @@ [training-data {:keys [downsample-rate]}] (take (int (* downsample-rate (count training-data))) (shuffle training-data))) -(defn select-downsample-tournament +(defn select-downsample-avg "uses case-tournament selection to select a downsample that is biased to being spread out" [training-data {:keys [downsample-rate case-t-size]}] (let [shuffled-cases (shuffle training-data) @@ -42,6 +42,27 @@ (shuffle (concat (utils/drop-nth selected-case-index tournament) rest-of-cases)))))))) +(defn select-downsample-maxmin + "uses tournament selection to select a downsample that has it's cases maximally far away" + [training-data {:keys [downsample-rate case-t-size]}] + (let [shuffled-cases (shuffle training-data) + goal-size (int (* downsample-rate (count training-data)))] + (loop [new-downsample (conj [] (first shuffled-cases)) + cases-to-pick-from (rest shuffled-cases)] + (if (>= (count new-downsample) goal-size) + new-downsample + (let [tournament (take case-t-size cases-to-pick-from) + rest-of-cases (drop case-t-size cases-to-pick-from) + min-case-distances (metrics/min-of-colls + (map (fn [distance-list] + (utils/filter-by-index distance-list (map #(:index %) tournament))) + (map #(:distances %) new-downsample))) + selected-case-index (metrics/argmax min-case-distances)] + (prn {:min-case-distances min-case-distances :selected-case-index selected-case-index}) + (recur (conj new-downsample (nth tournament selected-case-index)) + (shuffle (concat (utils/drop-nth selected-case-index tournament) + rest-of-cases)))))))) + (defn select-downsample-metalex "uses meta-lexicase selection to select a downsample that is biased to being spread out" [training-data {:keys [downsample-rate]}]) diff --git a/src/propeller/gp.cljc b/src/propeller/gp.cljc index d37a07f..f76c9d6 100644 --- a/src/propeller/gp.cljc +++ b/src/propeller/gp.cljc @@ -52,7 +52,8 @@ (prn {:data (some #(when (zero? (:index %)) %) indexed-training-data)}) (let [training-data (if (= (:parent-selection argmap) :ds-lexicase) (case (:ds-function argmap) - :case-tournament (downsample/select-downsample-tournament indexed-training-data argmap) + :case-avg (downsample/select-downsample-avg indexed-training-data argmap) + :case-maxmin (downsample/select-downsample-maxmin indexed-training-data argmap) (downsample/select-downsample-random indexed-training-data argmap)) indexed-training-data) ;defaults to random full-evaluated-pop (sort-by :total-error @@ -65,13 +66,8 @@ population)) best-individual (first ds-evaluated-pop) best-individual-passes-ds (and (= (:parent-selection argmap) :ds-lexicase) (<= (:total-error best-individual) solution-error-threshold)) - tot-evaluated-pop (when best-individual-passes-ds ;evaluate the whole pop on all training data - (sort-by :total-error - (mapper - (partial error-function argmap (:training-data argmap)) - population))) ;;best individual on all training-cases - tot-best-individual (if best-individual-passes-ds (first tot-evaluated-pop) best-individual)] + tot-best-individual (if best-individual-passes-ds (first full-evaluated-pop) best-individual)] (prn (first training-data)) (if (:custom-report argmap) ((:custom-report argmap) ds-evaluated-pop generation argmap) diff --git a/src/propeller/tools/metrics.cljc b/src/propeller/tools/metrics.cljc index 3019edd..d0039c3 100755 --- a/src/propeller/tools/metrics.cljc +++ b/src/propeller/tools/metrics.cljc @@ -18,9 +18,13 @@ (defn mean-of-colls "returns the mean of multiple colls" [coll] - ;(prn {:func :mean-of-colls :coll coll}) (map mean (math/transpose coll))) +(defn min-of-colls + "returns the smallest value of multiple colls" + [coll] + (map #(apply min %) (math/transpose coll))) + (defn median "Returns the median of a collection." [coll] diff --git a/test/propeller/tools/metrics_test.cljc b/test/propeller/tools/metrics_test.cljc index b35b7f1..3b0908d 100644 --- a/test/propeller/tools/metrics_test.cljc +++ b/test/propeller/tools/metrics_test.cljc @@ -14,6 +14,11 @@ (t/is (= (m/mean-of-colls '((1 2 3) (4 3 2 1))) '(2.5 2.5 2.5))) (t/is (= (m/mean-of-colls '((1))) '(1.0)))) +(t/deftest min-of-colls-test + (t/is (= (m/min-of-colls '((1 2 3 4) (4 3 2 1))) '(1 2 2 1))) + (t/is (= (m/min-of-colls '((1 2 3) (4 3 2 1))) '(1 2 2))) + (t/is (= (m/min-of-colls '((1))) '(1)))) + (t/deftest mean-test (t/is (= (m/mean '(1 2 3 4)) 2.5)) (t/is (= (m/mean '()) 0)))