Implements first case subsampling function.
This commit is contained in:
parent
22b7067f10
commit
43fde4c7ad
@ -1,5 +1,7 @@
|
|||||||
(ns propeller.downsample
|
(ns propeller.downsample
|
||||||
(:require [propeller.tools.math :as math]))
|
(:require [propeller.tools.math :as math]
|
||||||
|
[propeller.tools.metrics :as metrics]
|
||||||
|
[propeller.utils :as utils]))
|
||||||
|
|
||||||
(defn assign-indices-to-data
|
(defn assign-indices-to-data
|
||||||
"assigns an index to each training case in order to differentiate them when downsampling"
|
"assigns an index to each training case in order to differentiate them when downsampling"
|
||||||
@ -18,6 +20,32 @@
|
|||||||
[training-data {:keys [downsample-rate]}]
|
[training-data {:keys [downsample-rate]}]
|
||||||
(take (int (* downsample-rate (count training-data))) (shuffle training-data)))
|
(take (int (* downsample-rate (count training-data))) (shuffle training-data)))
|
||||||
|
|
||||||
|
(defn select-downsample-tournament
|
||||||
|
"uses case-tournament selection to select a downsample that is biased to being spread out"
|
||||||
|
[training-data {:keys [downsample-rate case-t-size]}]
|
||||||
|
(let [shuffled-cases (shuffle training-data)
|
||||||
|
goal-size (int (* downsample-rate (count training-data)))]
|
||||||
|
(loop [new-downsample (conj [] (first shuffled-cases))
|
||||||
|
cases-to-pick-from (rest shuffled-cases)]
|
||||||
|
;(prn {:new-downsample new-downsample :cases-to-pick-from cases-to-pick-from})
|
||||||
|
(if (>= (count new-downsample) goal-size)
|
||||||
|
new-downsample
|
||||||
|
(let [tournament (take case-t-size cases-to-pick-from)
|
||||||
|
rest-of-cases (drop case-t-size cases-to-pick-from)
|
||||||
|
case-distances (metrics/mean-of-colls
|
||||||
|
(map (fn [distance-list]
|
||||||
|
(utils/filter-by-index distance-list (map #(:index %) tournament)))
|
||||||
|
(map #(:distances %) new-downsample)))
|
||||||
|
selected-case-index (metrics/argmax case-distances)]
|
||||||
|
(prn {:avg-case-distances case-distances :selected-case-index selected-case-index})
|
||||||
|
(recur (conj new-downsample (nth tournament selected-case-index))
|
||||||
|
(shuffle (concat (utils/drop-nth selected-case-index tournament)
|
||||||
|
rest-of-cases))))))))
|
||||||
|
|
||||||
|
(defn select-downsample-metalex
|
||||||
|
"uses meta-lexicase selection to select a downsample that is biased to being spread out"
|
||||||
|
[training-data {:keys [downsample-rate]}])
|
||||||
|
|
||||||
(defn get-distance-between-cases
|
(defn get-distance-between-cases
|
||||||
"returns the distance between two cases given a list of individual error vectors, and the index these
|
"returns the distance between two cases given a list of individual error vectors, and the index these
|
||||||
cases exist in the error vector"
|
cases exist in the error vector"
|
||||||
@ -54,7 +82,9 @@
|
|||||||
individuals performance on the each case in the ds-data, in order"
|
individuals performance on the each case in the ds-data, in order"
|
||||||
[evaluated-pop ds-data training-data]
|
[evaluated-pop ds-data training-data]
|
||||||
(let [ds-indices (map #(:index %) ds-data) errors (map #(:errors %) evaluated-pop)]
|
(let [ds-indices (map #(:index %) ds-data) errors (map #(:errors %) evaluated-pop)]
|
||||||
(merge-map-lists-at-index training-data
|
(merge-map-lists-at-index
|
||||||
(map-indexed (fn [idx d-case]
|
training-data (map-indexed
|
||||||
(update-in d-case [:distances] #(update-at-indices %
|
(fn [idx d-case] (update-in d-case
|
||||||
(map (fn [other] (get-distance-between-cases errors idx other)) (range (count ds-indices))) ds-indices))) ds-data))))
|
[:distances] #(update-at-indices
|
||||||
|
% (map (fn [other] (get-distance-between-cases errors idx other))
|
||||||
|
(range (count ds-indices))) ds-indices))) ds-data))))
|
||||||
|
@ -49,7 +49,9 @@
|
|||||||
(range population-size))
|
(range population-size))
|
||||||
indexed-training-data (downsample/assign-indices-to-data (downsample/initialize-case-distances argmap))]
|
indexed-training-data (downsample/assign-indices-to-data (downsample/initialize-case-distances argmap))]
|
||||||
(let [training-data (if (= (:parent-selection argmap) :ds-lexicase)
|
(let [training-data (if (= (:parent-selection argmap) :ds-lexicase)
|
||||||
(downsample/select-downsample-random indexed-training-data argmap)
|
(case (:ds-function argmap)
|
||||||
|
:case-tournament (downsample/select-downsample-tournament indexed-training-data argmap)
|
||||||
|
(downsample/select-downsample-random indexed-training-data argmap)) ;defaults to random
|
||||||
indexed-training-data)
|
indexed-training-data)
|
||||||
evaluated-pop (sort-by :total-error
|
evaluated-pop (sort-by :total-error
|
||||||
(mapper
|
(mapper
|
||||||
@ -93,4 +95,6 @@
|
|||||||
(first evaluated-pop))
|
(first evaluated-pop))
|
||||||
(repeatedly population-size
|
(repeatedly population-size
|
||||||
#(variation/new-individual evaluated-pop argmap)))
|
#(variation/new-individual evaluated-pop argmap)))
|
||||||
(if (= (:parent-selection argmap) :ds-lexicase) (downsample/update-case-distances evaluated-pop training-data indexed-training-data) indexed-training-data))))))
|
(if (= (:parent-selection argmap) :ds-lexicase)
|
||||||
|
(downsample/update-case-distances evaluated-pop training-data indexed-training-data)
|
||||||
|
indexed-training-data))))))
|
Loading…
x
Reference in New Issue
Block a user