tested and fixed auto-ds size

This commit is contained in:
Ryan Boldi 2022-10-18 15:45:31 -04:00
parent 7467f638a7
commit fc738e2154
2 changed files with 39 additions and 21 deletions

View File

@ -45,15 +45,14 @@
(defn select-downsample-maxmin (defn select-downsample-maxmin
"selects a downsample that has it's cases maximally far away by sequentially "selects a downsample that has it's cases maximally far away by sequentially
adding cases to the downsample that have their closest case maximally far away" adding cases to the downsample that have their closest case maximally far away"
[training-data {:keys [downsample-rate case-t-size]}] [training-data {:keys [downsample-rate]}]
(let [shuffled-cases (shuffle training-data) (let [shuffled-cases (shuffle training-data)
goal-size (int (* downsample-rate (count training-data)))] goal-size (int (* downsample-rate (count training-data)))]
(loop [new-downsample (conj [] (first shuffled-cases)) (loop [new-downsample (conj [] (first shuffled-cases))
cases-to-pick-from (rest shuffled-cases)] cases-to-pick-from (rest shuffled-cases)]
(if (>= (count new-downsample) goal-size) (if (>= (count new-downsample) goal-size)
new-downsample new-downsample
(let [tournament (take case-t-size cases-to-pick-from) (let [tournament cases-to-pick-from
rest-of-cases (drop case-t-size cases-to-pick-from)
min-case-distances (metrics/min-of-colls min-case-distances (metrics/min-of-colls
(map (fn [distance-list] (map (fn [distance-list]
(utils/filter-by-index distance-list (map #(:index %) tournament))) (utils/filter-by-index distance-list (map #(:index %) tournament)))
@ -64,35 +63,31 @@
(prn {:cases-in-ds (map #(:input1 %) new-downsample) :cases-in-tourn (map #(:input1 %) tournament)})) (prn {:cases-in-ds (map #(:input1 %) new-downsample) :cases-in-tourn (map #(:input1 %) tournament)}))
(prn {:min-case-distances min-case-distances :selected-case-index selected-case-index}) (prn {:min-case-distances min-case-distances :selected-case-index selected-case-index})
(recur (conj new-downsample (nth tournament selected-case-index)) (recur (conj new-downsample (nth tournament selected-case-index))
(shuffle (concat (utils/drop-nth selected-case-index tournament) (shuffle (utils/drop-nth selected-case-index tournament))))))))
rest-of-cases))))))))
(defn select-downsample-maxmin-adaptive (defn select-downsample-maxmin-adaptive
"selects a downsample that has it's cases maximally far away by sequentially "selects a downsample that has it's cases maximally far away by sequentially
adding cases to the downsample that have their closest case maximally far away adding cases to the downsample that have their closest case maximally far away
automatically stops when the maximum minimum distance is below delta" automatically stops when the maximum minimum distance is below delta"
[training-data {:keys [case-t-size case-delta]}] [training-data {:keys [case-delta]}]
(let [shuffled-cases (shuffle training-data)] (let [shuffled-cases (shuffle training-data)]
(loop [new-downsample (conj [] (first shuffled-cases)) (loop [new-downsample (conj [] (first shuffled-cases))
cases-to-pick-from (rest shuffled-cases) cases-to-pick-from (rest shuffled-cases)]
end? false] (let [tournament cases-to-pick-from
(if (or end? (zero? (count cases-to-pick-from)))
new-downsample
(let [tournament (take case-t-size cases-to-pick-from)
rest-of-cases (drop case-t-size cases-to-pick-from)
min-case-distances (metrics/min-of-colls min-case-distances (metrics/min-of-colls
(map (fn [distance-list] (map (fn [distance-list]
(utils/filter-by-index distance-list (map #(:index %) tournament))) (utils/filter-by-index distance-list (map #(:index %) tournament)))
(map #(:distances %) new-downsample))) (map #(:distances %) new-downsample)))
selected-case-index (metrics/argmax min-case-distances)] selected-case-index (metrics/argmax min-case-distances)]
(if (sequential? (:input1 (first new-downsample))) (if (or (<= (apply max min-case-distances) case-delta) (zero? (count cases-to-pick-from)))
(prn {:cases-in-ds (map #(first (:input1 %)) new-downsample) :cases-in-tourn (map #(first (:input1 %)) tournament)}) new-downsample
(prn {:cases-in-ds (map #(:input1 %) new-downsample) :cases-in-tourn (map #(:input1 %) tournament)})) (do
(prn {:min-case-distances min-case-distances :selected-case-index selected-case-index}) (if (sequential? (:input1 (first new-downsample)))
(recur (conj new-downsample (nth tournament selected-case-index)) (prn {:cases-in-ds (map #(first (:input1 %)) new-downsample) :cases-in-tourn (map #(first (:input1 %)) tournament)})
(shuffle (concat (utils/drop-nth selected-case-index tournament) (prn {:cases-in-ds (map #(:input1 %) new-downsample) :cases-in-tourn (map #(:input1 %) tournament)}))
rest-of-cases)) (prn {:min-case-distances min-case-distances :selected-case-index selected-case-index})
(<= (apply max min-case-distances) case-delta))))))) (recur (conj new-downsample (nth tournament selected-case-index))
(shuffle (utils/drop-nth selected-case-index tournament)))))))))
(defn get-distance-between-cases (defn get-distance-between-cases
"returns the distance between two cases given a list of individual error vectors, and the index these "returns the distance between two cases given a list of individual error vectors, and the index these

View File

@ -166,10 +166,33 @@
{:input1 [2] :output1 [12] :index 2 :distances [0 5 0 0 0]} {:input1 [2] :output1 [12] :index 2 :distances [0 5 0 0 0]}
{:input1 [3] :output1 [13] :index 3 :distances [0 5 0 0 0]} {:input1 [3] :output1 [13] :index 3 :distances [0 5 0 0 0]}
{:input1 [4] :output1 [14] :index 4 :distances [0 5 0 0 0]}) {:input1 [4] :output1 [14] :index 4 :distances [0 5 0 0 0]})
{:downsample-rate 0.4 :case-t-size 5})] {:downsample-rate 0.4})]
(prn {:selected selected}) (prn {:selected selected})
(t/is (or (= (:index (first selected)) 1) (= (:index (second selected)) 1)))))) (t/is (or (= (:index (first selected)) 1) (= (:index (second selected)) 1))))))
(t/deftest case-maxmin-adaptive
(t/testing "case-maxmin-adaptive selects correct downsample simple"
(let [selected (ds/select-downsample-maxmin-adaptive
'({:input1 [0] :output1 [10] :index 0 :distances [0 5 0 0 0]}
{:input1 [1] :output1 [11] :index 1 :distances [5 0 5 5 5]}
{:input1 [2] :output1 [12] :index 2 :distances [0 5 0 0 0]}
{:input1 [3] :output1 [13] :index 3 :distances [0 5 0 0 0]}
{:input1 [4] :output1 [14] :index 4 :distances [0 5 0 0 0]})
{:case-delta 0})]
(prn {:selected selected})
(t/is (or (= (:index (first selected)) 1) (= (:index (second selected)) 1)))
(t/is (= 2 (count selected)))))
(t/testing "case-maxmin-adaptive selects correct downsample when all identical"
(let [selected (ds/select-downsample-maxmin-adaptive
'({:input1 [0] :output1 [10] :index 0 :distances [0 0 0 0 0]}
{:input1 [1] :output1 [11] :index 1 :distances [0 0 0 0 0]}
{:input1 [2] :output1 [12] :index 2 :distances [0 0 0 0 0]}
{:input1 [3] :output1 [13] :index 3 :distances [0 0 0 0 0]}
{:input1 [4] :output1 [14] :index 4 :distances [0 0 0 0 0]})
{:case-delta 0})]
(prn {:selected selected})
(t/is (= 1 (count selected))))))
(t/deftest hyperselection-test (t/deftest hyperselection-test
(let [parents1 '({:blah 3 :index 1} {:blah 3 :index 1} (let [parents1 '({:blah 3 :index 1} {:blah 3 :index 1}