From 45b19fbd3e88a537e6665f6962da02b974e4490d Mon Sep 17 00:00:00 2001 From: Ryan Boldi Date: Thu, 3 Mar 2022 21:43:44 -0500 Subject: [PATCH] implement assign-indices-to-data and select-downsample-random and tests --- doc/downsample.md | 5 ++ src/propeller/downsample.cljc | 18 +++++ test/propeller/utils_test.cljc | 125 +++++++++++++++++++++------------ 3 files changed, 102 insertions(+), 46 deletions(-) create mode 100644 doc/downsample.md create mode 100644 src/propeller/downsample.cljc diff --git a/doc/downsample.md b/doc/downsample.md new file mode 100644 index 0000000..0b80ed7 --- /dev/null +++ b/doc/downsample.md @@ -0,0 +1,5 @@ +# Downsampling the Training Data + +```clojure +:downsample-rate +``` \ No newline at end of file diff --git a/src/propeller/downsample.cljc b/src/propeller/downsample.cljc new file mode 100644 index 0000000..8c85ab8 --- /dev/null +++ b/src/propeller/downsample.cljc @@ -0,0 +1,18 @@ +(ns propeller.downsample) + +(defn assign-indices-to-data + "assigns an index to each training case in order to differentiate them when downsampling" + [{:keys [training-data]}] + (map (fn [data-map index] + (let [data-m (if (map? data-map) data-map (assoc {} :data data-map))] ;if data is not in a map, make it one + (assoc data-m :index index))) + training-data (range (count training-data)))) + +(defn select-downsample-random + "Selects a downsample from the training cases and returns it" + [{:keys [downsample-rate training-data]}] + (take (int (* downsample-rate (count training-data))) (shuffle training-data))) + +(defn update-case-data + "updates the case metadata field of argmap, should be called after evaluation of individuals" + [argmap]) \ No newline at end of file diff --git a/test/propeller/utils_test.cljc b/test/propeller/utils_test.cljc index e450a38..1212e24 100644 --- a/test/propeller/utils_test.cljc +++ b/test/propeller/utils_test.cljc @@ -1,7 +1,8 @@ (ns propeller.utils-test (:require [clojure.test :as t] [propeller.utils :as u] - [propeller.simplification :as s])) + [propeller.simplification :as s] + [propeller.downsample :as ds])) (t/deftest first-non-nil-test (t/is (= 1 (u/first-non-nil '(1 2 3)))) @@ -22,57 +23,89 @@ (t/deftest random-instruction-test (t/is - (letfn [(instruct [] 1)] - (let [test (u/random-instruction [instruct 2])] - (if (= 1 test) - true - (= 2 test)))))) + (letfn [(instruct [] 1)] + (let [test (u/random-instruction [instruct 2])] + (if (= 1 test) + true + (= 2 test)))))) (t/deftest count-points-test (t/is (= 6 (u/count-points '(:a :b (:c :d))))) (t/is (= 1 (u/count-points '()))) (t/is (= 2 (u/count-points '(:a))))) -(t/testing "choose-random-k" - (t/testing "should return indices that are a member of the original array" - (t/is (every? identity (map #(contains? (set (range 10)) %) (s/choose-random-k 3 (range 10)))))) - (t/testing "should return a list of size k" - (t/is (= (count (s/choose-random-k 7 (range 10))) 7)))) +(t/deftest choose-random-k-test + (t/testing "choose-random-k" + (t/testing "should return indices that are a member of the original array" + (t/is (every? identity (map #(contains? (set (range 10)) %) (s/choose-random-k 3 (range 10)))))) + (t/testing "should return a list of size k" + (t/is (= (count (s/choose-random-k 7 (range 10))) 7))))) -(t/testing "delete-at-indices" - (t/testing "should actually remove indicated items" - (t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 3) '(:hi0 :hi1 :hi2 :hi3))))) - (t/testing "should work with numerical indices" - (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0) '(:hi0 :hi1 :hi2 :hi3))))) - (t/testing "should not delete anything for index out of bounds" - (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 10) '(:hi0 :hi1 :hi2 :hi3)))) - (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 -10) '(:hi0 :hi1 :hi2 :hi3)))) - (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(-0 -10) '(:hi0 :hi1 :hi2 :hi3))))) - (t/testing "should only delete at single index once" - (t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 0 0 0 3 3 3) '(:hi0 :hi1 :hi2 :hi3))))) - (t/testing "should return empty list when deleting from empty list" - (t/is (= '() (s/delete-at-indices '(0) '())))) - (t/testing "should be able to delete at arbitrary indices" - (t/is (= (count (s/delete-at-indices (s/choose-random-k 3 (range 10)) (range 10))) 7)))) -(t/testing "delete-random-k" - (t/testing "should remove the correct amount of items" - (t/is (= (count (s/delete-k-random 3 (range 10))) 7)) - (t/is (= (count (s/delete-k-random 10 (range 10))) 0)) - (t/is (= (count (s/delete-k-random 0 (range 10))) 10))) - (t/testing "should not fail if k >> size of collection" - (t/is (= (count (s/delete-k-random 300 (range 10))) 0)) - (t/is (= (s/delete-k-random 300 '(:hi1 :hi2 :hi3)) '()))) - (t/testing "should not fail if the collection is empty" - (t/is (= (count (s/delete-k-random 300 '())) 0)) - (t/is (= (count (s/delete-k-random 0 '())) 0))) - (t/testing "should maintain order of the remaining items" - (t/is (apply < (s/delete-k-random 3 (range 10)))))) +(t/deftest delete-at-indices-test + (t/testing "delete-at-indices" + (t/testing "should actually remove indicated items" + (t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 3) '(:hi0 :hi1 :hi2 :hi3))))) + (t/testing "should work with numerical indices" + (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0) '(:hi0 :hi1 :hi2 :hi3))))) + (t/testing "should not delete anything for index out of bounds" + (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 10) '(:hi0 :hi1 :hi2 :hi3)))) + (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 -10) '(:hi0 :hi1 :hi2 :hi3)))) + (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(-0 -10) '(:hi0 :hi1 :hi2 :hi3))))) + (t/testing "should only delete at single index once" + (t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 0 0 0 3 3 3) '(:hi0 :hi1 :hi2 :hi3))))) + (t/testing "should return empty list when deleting from empty list" + (t/is (= '() (s/delete-at-indices '(0) '())))) + (t/testing "should be able to delete at arbitrary indices" + (t/is (= (count (s/delete-at-indices (s/choose-random-k 3 (range 10)) (range 10))) 7))))) -(t/testing "auto-simplify-plushy" - (t/testing "should handle having an empty plushy" - (t/is (= (s/auto-simplify-plushy {} '() 100 (fn [argmap data plushy] 0) {} 3 false) '()))) - (let [plushy '(:exec_dup 1 :integer_add close :in1 :integer_add 0 :in1 :in1 :integer_mult :integer_add)] - (t/testing "should decrease size of plushy that always has perfect scores" - (t/is (< (count (s/auto-simplify-plushy {} plushy 5 (fn [argmap data plushy] 0) {} 3 false)) (count plushy))) - (t/is (< (count (s/auto-simplify-plushy {} plushy 1 (fn [argmap data plushy] 0) {} 10 false)) (count plushy)))))) \ No newline at end of file +(t/deftest delete-random-k-test + (t/testing "delete-random-k" + (t/testing "should remove the correct amount of items" + (t/is (= (count (s/delete-k-random 3 (range 10))) 7)) + (t/is (= (count (s/delete-k-random 10 (range 10))) 0)) + (t/is (= (count (s/delete-k-random 0 (range 10))) 10))) + (t/testing "should not fail if k >> size of collection" + (t/is (= (count (s/delete-k-random 300 (range 10))) 0)) + (t/is (= (s/delete-k-random 300 '(:hi1 :hi2 :hi3)) '()))) + (t/testing "should not fail if the collection is empty" + (t/is (= (count (s/delete-k-random 300 '())) 0)) + (t/is (= (count (s/delete-k-random 0 '())) 0))) + (t/testing "should maintain order of the remaining items" + (t/is (apply < (s/delete-k-random 3 (range 10))))))) + +(t/deftest auto-simplify-plushy-test + (t/testing "auto-simplify-plushy" + (t/testing "should handle having an empty plushy" + (t/is (= (s/auto-simplify-plushy {} '() 100 (fn [argmap data plushy] 0) {} 3 false) '()))) + (let [plushy '(:exec_dup 1 :integer_add close :in1 :integer_add 0 :in1 :in1 :integer_mult :integer_add)] + (t/testing "should decrease size of plushy that always has perfect scores" + (t/is (< (count (s/auto-simplify-plushy {} plushy 5 (fn [argmap data plushy] 0) {} 3 false)) (count plushy))) + (t/is (< (count (s/auto-simplify-plushy {} plushy 1 (fn [argmap data plushy] 0) {} 10 false)) (count plushy))))))) + +(t/deftest assign-indices-to-data-test + (t/testing "assign-indices-to-data" + (t/testing "should return a map of the same length" + (t/is (= (count (ds/assign-indices-to-data {:training-data (range 10)})) 10)) + (t/is (= (count (ds/assign-indices-to-data {:training-data (range 0)})) 0))) + (t/testing "should return a map where each element has an index key" + (t/is (every? #(:index %) (ds/assign-indices-to-data {:training-data (map #(assoc {} :input %) (range 10))})))) + (t/testing "should return distinct indices" + (t/is (= (map #(:index %) (ds/assign-indices-to-data {:training-data (range 10)})) (range 10)))))) + +(t/deftest select-downsample-random-test + (t/testing "select-downsample-random" + (t/testing "should select the correct amount of elements" + (t/is (= (count (ds/select-downsample-random {:training-data (range 10) :downsample-rate 0.1})) 1)) + (t/is (= (count (ds/select-downsample-random {:training-data (range 10) :downsample-rate 0.2})) 2)) + (t/is (= (count (ds/select-downsample-random {:training-data (range 10) :downsample-rate 0.5})) 5))) + (t/testing "should not return duplicate items (when called with set of numbers)" + (t/is (= (count (set (ds/select-downsample-random {:training-data (range 10) :downsample-rate 0.1}))) 1)) + (t/is (= (count (set (ds/select-downsample-random {:training-data (range 10) :downsample-rate 0.2}))) 2)) + (t/is (= (count (set (ds/select-downsample-random {:training-data (range 10) :downsample-rate 0.5}))) 5))) + (t/testing "should round down the number of elements selected if not whole" + (t/is (= (count (ds/select-downsample-random {:training-data (range 3) :downsample-rate 0.5})) 1)) + (t/is (= (count (ds/select-downsample-random {:training-data (range 1) :downsample-rate 0.5})) 0))) + (t/testing "should not return more elements than available" + (t/is (= (count (ds/select-downsample-random {:training-data (range 10) :downsample-rate 2})) 10)) + (t/is (= (count (ds/select-downsample-random {:training-data (range 10) :downsample-rate 1.5})) 10)))))