From 6bc31e2d9c7d618e0b93edd2c2b8359e4544b5fe Mon Sep 17 00:00:00 2001 From: Ryan Boldi Date: Wed, 8 Jun 2022 08:51:16 -0400 Subject: [PATCH] implemented auto downsample size --- src/propeller/downsample.cljc | 29 +++++++++++++++++-- src/propeller/gp.cljc | 1 + .../problems/simple_classification.cljc | 4 +-- 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/propeller/downsample.cljc b/src/propeller/downsample.cljc index 25749c9..0e0f7d7 100644 --- a/src/propeller/downsample.cljc +++ b/src/propeller/downsample.cljc @@ -67,9 +67,32 @@ (shuffle (concat (utils/drop-nth selected-case-index tournament) rest-of-cases)))))))) -(defn select-downsample-metalex - "uses meta-lexicase selection to select a downsample that is biased to being spread out" - [training-data {:keys [downsample-rate]}]) +(defn select-downsample-maxmin-adaptive + "selects a downsample that has it's cases maximally far away by sequentially + adding cases to the downsample that have their closest case maximally far away + automatically stops when the maximum minimum distance is below delta" +[training-data {:keys [case-t-size case-delta]}] +(let [shuffled-cases (shuffle training-data)] + (loop [new-downsample (conj [] (first shuffled-cases)) + cases-to-pick-from (rest shuffled-cases) + end? false] + (if (or end? (zero? (count cases-to-pick-from))) + new-downsample + (let [tournament (take case-t-size cases-to-pick-from) + rest-of-cases (drop case-t-size cases-to-pick-from) + min-case-distances (metrics/min-of-colls + (map (fn [distance-list] + (utils/filter-by-index distance-list (map #(:index %) tournament))) + (map #(:distances %) new-downsample))) + selected-case-index (metrics/argmax min-case-distances)] + (if (sequential? (:input1 (first new-downsample))) + (prn {:cases-in-ds (map #(first (:input1 %)) new-downsample) :cases-in-tourn (map #(first (:input1 %)) tournament)}) + (prn {:cases-in-ds (map #(:input1 %) new-downsample) :cases-in-tourn (map #(:input1 %) tournament)})) + (prn {:min-case-distances min-case-distances :selected-case-index selected-case-index}) + (recur (conj new-downsample (nth tournament selected-case-index)) + (shuffle (concat (utils/drop-nth selected-case-index tournament) + rest-of-cases)) + (<= (apply max min-case-distances) case-delta))))))) (defn get-distance-between-cases "returns the distance between two cases given a list of individual error vectors, and the index these diff --git a/src/propeller/gp.cljc b/src/propeller/gp.cljc index 34eb740..7c2e81a 100644 --- a/src/propeller/gp.cljc +++ b/src/propeller/gp.cljc @@ -56,6 +56,7 @@ (case (:ds-function argmap) :case-avg (downsample/select-downsample-avg indexed-training-data argmap) :case-maxmin (downsample/select-downsample-maxmin indexed-training-data argmap) + :case-maxmin-auto (downsample/select-downsample-maxmin-adaptive indexed-training-data argmap) (downsample/select-downsample-random indexed-training-data argmap)) indexed-training-data) ;defaults to random parent-reps (if (zero? (mod generation ds-parent-gens)) ;every ds-parent-gens generations diff --git a/src/propeller/problems/simple_classification.cljc b/src/propeller/problems/simple_classification.cljc index deb2115..44e02ab 100644 --- a/src/propeller/problems/simple_classification.cljc +++ b/src/propeller/problems/simple_classification.cljc @@ -86,8 +86,8 @@ :training-data (:train train-and-test-data) :testing-data (:test train-and-test-data) :case-t-size (count (:train train-and-test-data)) - :case-parent-rate 0 - :case-parent-gens 1 + :ds-parent-rate 0 + :ds-parent-gens 1 :max-generations 500 :population-size 500 :max-initial-plushy-size 100