From 43fde4c7ad5057e6038a2fbb332860f31bea250b Mon Sep 17 00:00:00 2001 From: Ryan Boldi Date: Mon, 14 Mar 2022 19:04:32 -0400 Subject: [PATCH] Implements first case subsampling function. --- src/propeller/downsample.cljc | 44 +++++++++++++++++++++++++++++------ src/propeller/gp.cljc | 8 +++++-- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/src/propeller/downsample.cljc b/src/propeller/downsample.cljc index 3e24f26..ed7be71 100644 --- a/src/propeller/downsample.cljc +++ b/src/propeller/downsample.cljc @@ -1,5 +1,7 @@ (ns propeller.downsample - (:require [propeller.tools.math :as math])) + (:require [propeller.tools.math :as math] + [propeller.tools.metrics :as metrics] + [propeller.utils :as utils])) (defn assign-indices-to-data "assigns an index to each training case in order to differentiate them when downsampling" @@ -18,6 +20,32 @@ [training-data {:keys [downsample-rate]}] (take (int (* downsample-rate (count training-data))) (shuffle training-data))) +(defn select-downsample-tournament + "uses case-tournament selection to select a downsample that is biased to being spread out" + [training-data {:keys [downsample-rate case-t-size]}] + (let [shuffled-cases (shuffle training-data) + goal-size (int (* downsample-rate (count training-data)))] + (loop [new-downsample (conj [] (first shuffled-cases)) + cases-to-pick-from (rest shuffled-cases)] + ;(prn {:new-downsample new-downsample :cases-to-pick-from cases-to-pick-from}) + (if (>= (count new-downsample) goal-size) + new-downsample + (let [tournament (take case-t-size cases-to-pick-from) + rest-of-cases (drop case-t-size cases-to-pick-from) + case-distances (metrics/mean-of-colls + (map (fn [distance-list] + (utils/filter-by-index distance-list (map #(:index %) tournament))) + (map #(:distances %) new-downsample))) + selected-case-index (metrics/argmax case-distances)] + (prn {:avg-case-distances case-distances :selected-case-index selected-case-index}) + (recur (conj new-downsample (nth tournament selected-case-index)) + (shuffle (concat (utils/drop-nth selected-case-index tournament) + rest-of-cases)))))))) + +(defn select-downsample-metalex + "uses meta-lexicase selection to select a downsample that is biased to being spread out" + [training-data {:keys [downsample-rate]}]) + (defn get-distance-between-cases "returns the distance between two cases given a list of individual error vectors, and the index these cases exist in the error vector" @@ -52,9 +80,11 @@ "updates the case distance field of training-data list, should be called after evaluation of individuals evaluated-pop should be a list of individuals that all have the :errors field with a list of this individuals performance on the each case in the ds-data, in order" - [evaluated-pop ds-data training-data] - (let [ds-indices (map #(:index %) ds-data) errors (map #(:errors %) evaluated-pop)] - (merge-map-lists-at-index training-data - (map-indexed (fn [idx d-case] - (update-in d-case [:distances] #(update-at-indices % - (map (fn [other] (get-distance-between-cases errors idx other)) (range (count ds-indices))) ds-indices))) ds-data)))) + [evaluated-pop ds-data training-data] + (let [ds-indices (map #(:index %) ds-data) errors (map #(:errors %) evaluated-pop)] + (merge-map-lists-at-index + training-data (map-indexed + (fn [idx d-case] (update-in d-case + [:distances] #(update-at-indices + % (map (fn [other] (get-distance-between-cases errors idx other)) + (range (count ds-indices))) ds-indices))) ds-data)))) diff --git a/src/propeller/gp.cljc b/src/propeller/gp.cljc index dafa99d..be79e9d 100644 --- a/src/propeller/gp.cljc +++ b/src/propeller/gp.cljc @@ -49,7 +49,9 @@ (range population-size)) indexed-training-data (downsample/assign-indices-to-data (downsample/initialize-case-distances argmap))] (let [training-data (if (= (:parent-selection argmap) :ds-lexicase) - (downsample/select-downsample-random indexed-training-data argmap) + (case (:ds-function argmap) + :case-tournament (downsample/select-downsample-tournament indexed-training-data argmap) + (downsample/select-downsample-random indexed-training-data argmap)) ;defaults to random indexed-training-data) evaluated-pop (sort-by :total-error (mapper @@ -93,4 +95,6 @@ (first evaluated-pop)) (repeatedly population-size #(variation/new-individual evaluated-pop argmap))) - (if (= (:parent-selection argmap) :ds-lexicase) (downsample/update-case-distances evaluated-pop training-data indexed-training-data) indexed-training-data)))))) \ No newline at end of file + (if (= (:parent-selection argmap) :ds-lexicase) + (downsample/update-case-distances evaluated-pop training-data indexed-training-data) + indexed-training-data)))))) \ No newline at end of file