implement assign-indices-to-data and select-downsample-random and tests
This commit is contained in:
parent
8c074a4ba8
commit
45b19fbd3e
5
doc/downsample.md
Normal file
5
doc/downsample.md
Normal file
@ -0,0 +1,5 @@
|
||||
# Downsampling the Training Data
|
||||
|
||||
```clojure
|
||||
:downsample-rate
|
||||
```
|
18
src/propeller/downsample.cljc
Normal file
18
src/propeller/downsample.cljc
Normal file
@ -0,0 +1,18 @@
|
||||
(ns propeller.downsample)
|
||||
|
||||
(defn assign-indices-to-data
|
||||
"assigns an index to each training case in order to differentiate them when downsampling"
|
||||
[{:keys [training-data]}]
|
||||
(map (fn [data-map index]
|
||||
(let [data-m (if (map? data-map) data-map (assoc {} :data data-map))] ;if data is not in a map, make it one
|
||||
(assoc data-m :index index)))
|
||||
training-data (range (count training-data))))
|
||||
|
||||
(defn select-downsample-random
|
||||
"Selects a downsample from the training cases and returns it"
|
||||
[{:keys [downsample-rate training-data]}]
|
||||
(take (int (* downsample-rate (count training-data))) (shuffle training-data)))
|
||||
|
||||
(defn update-case-data
|
||||
"updates the case metadata field of argmap, should be called after evaluation of individuals"
|
||||
[argmap])
|
@ -1,7 +1,8 @@
|
||||
(ns propeller.utils-test
|
||||
(:require [clojure.test :as t]
|
||||
[propeller.utils :as u]
|
||||
[propeller.simplification :as s]))
|
||||
[propeller.simplification :as s]
|
||||
[propeller.downsample :as ds]))
|
||||
|
||||
(t/deftest first-non-nil-test
|
||||
(t/is (= 1 (u/first-non-nil '(1 2 3))))
|
||||
@ -22,57 +23,89 @@
|
||||
|
||||
(t/deftest random-instruction-test
|
||||
(t/is
|
||||
(letfn [(instruct [] 1)]
|
||||
(let [test (u/random-instruction [instruct 2])]
|
||||
(if (= 1 test)
|
||||
true
|
||||
(= 2 test))))))
|
||||
(letfn [(instruct [] 1)]
|
||||
(let [test (u/random-instruction [instruct 2])]
|
||||
(if (= 1 test)
|
||||
true
|
||||
(= 2 test))))))
|
||||
|
||||
(t/deftest count-points-test
|
||||
(t/is (= 6 (u/count-points '(:a :b (:c :d)))))
|
||||
(t/is (= 1 (u/count-points '())))
|
||||
(t/is (= 2 (u/count-points '(:a)))))
|
||||
|
||||
(t/testing "choose-random-k"
|
||||
(t/testing "should return indices that are a member of the original array"
|
||||
(t/is (every? identity (map #(contains? (set (range 10)) %) (s/choose-random-k 3 (range 10))))))
|
||||
(t/testing "should return a list of size k"
|
||||
(t/is (= (count (s/choose-random-k 7 (range 10))) 7))))
|
||||
(t/deftest choose-random-k-test
|
||||
(t/testing "choose-random-k"
|
||||
(t/testing "should return indices that are a member of the original array"
|
||||
(t/is (every? identity (map #(contains? (set (range 10)) %) (s/choose-random-k 3 (range 10))))))
|
||||
(t/testing "should return a list of size k"
|
||||
(t/is (= (count (s/choose-random-k 7 (range 10))) 7)))))
|
||||
|
||||
(t/testing "delete-at-indices"
|
||||
(t/testing "should actually remove indicated items"
|
||||
(t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 3) '(:hi0 :hi1 :hi2 :hi3)))))
|
||||
(t/testing "should work with numerical indices"
|
||||
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0) '(:hi0 :hi1 :hi2 :hi3)))))
|
||||
(t/testing "should not delete anything for index out of bounds"
|
||||
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 10) '(:hi0 :hi1 :hi2 :hi3))))
|
||||
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 -10) '(:hi0 :hi1 :hi2 :hi3))))
|
||||
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(-0 -10) '(:hi0 :hi1 :hi2 :hi3)))))
|
||||
(t/testing "should only delete at single index once"
|
||||
(t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 0 0 0 3 3 3) '(:hi0 :hi1 :hi2 :hi3)))))
|
||||
(t/testing "should return empty list when deleting from empty list"
|
||||
(t/is (= '() (s/delete-at-indices '(0) '()))))
|
||||
(t/testing "should be able to delete at arbitrary indices"
|
||||
(t/is (= (count (s/delete-at-indices (s/choose-random-k 3 (range 10)) (range 10))) 7))))
|
||||
|
||||
(t/testing "delete-random-k"
|
||||
(t/testing "should remove the correct amount of items"
|
||||
(t/is (= (count (s/delete-k-random 3 (range 10))) 7))
|
||||
(t/is (= (count (s/delete-k-random 10 (range 10))) 0))
|
||||
(t/is (= (count (s/delete-k-random 0 (range 10))) 10)))
|
||||
(t/testing "should not fail if k >> size of collection"
|
||||
(t/is (= (count (s/delete-k-random 300 (range 10))) 0))
|
||||
(t/is (= (s/delete-k-random 300 '(:hi1 :hi2 :hi3)) '())))
|
||||
(t/testing "should not fail if the collection is empty"
|
||||
(t/is (= (count (s/delete-k-random 300 '())) 0))
|
||||
(t/is (= (count (s/delete-k-random 0 '())) 0)))
|
||||
(t/testing "should maintain order of the remaining items"
|
||||
(t/is (apply < (s/delete-k-random 3 (range 10))))))
|
||||
(t/deftest delete-at-indices-test
|
||||
(t/testing "delete-at-indices"
|
||||
(t/testing "should actually remove indicated items"
|
||||
(t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 3) '(:hi0 :hi1 :hi2 :hi3)))))
|
||||
(t/testing "should work with numerical indices"
|
||||
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0) '(:hi0 :hi1 :hi2 :hi3)))))
|
||||
(t/testing "should not delete anything for index out of bounds"
|
||||
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 10) '(:hi0 :hi1 :hi2 :hi3))))
|
||||
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 -10) '(:hi0 :hi1 :hi2 :hi3))))
|
||||
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(-0 -10) '(:hi0 :hi1 :hi2 :hi3)))))
|
||||
(t/testing "should only delete at single index once"
|
||||
(t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 0 0 0 3 3 3) '(:hi0 :hi1 :hi2 :hi3)))))
|
||||
(t/testing "should return empty list when deleting from empty list"
|
||||
(t/is (= '() (s/delete-at-indices '(0) '()))))
|
||||
(t/testing "should be able to delete at arbitrary indices"
|
||||
(t/is (= (count (s/delete-at-indices (s/choose-random-k 3 (range 10)) (range 10))) 7)))))
|
||||
|
||||
(t/testing "auto-simplify-plushy"
|
||||
(t/testing "should handle having an empty plushy"
|
||||
(t/is (= (s/auto-simplify-plushy {} '() 100 (fn [argmap data plushy] 0) {} 3 false) '())))
|
||||
(let [plushy '(:exec_dup 1 :integer_add close :in1 :integer_add 0 :in1 :in1 :integer_mult :integer_add)]
|
||||
(t/testing "should decrease size of plushy that always has perfect scores"
|
||||
(t/is (< (count (s/auto-simplify-plushy {} plushy 5 (fn [argmap data plushy] 0) {} 3 false)) (count plushy)))
|
||||
(t/is (< (count (s/auto-simplify-plushy {} plushy 1 (fn [argmap data plushy] 0) {} 10 false)) (count plushy))))))
|
||||
(t/deftest delete-random-k-test
|
||||
(t/testing "delete-random-k"
|
||||
(t/testing "should remove the correct amount of items"
|
||||
(t/is (= (count (s/delete-k-random 3 (range 10))) 7))
|
||||
(t/is (= (count (s/delete-k-random 10 (range 10))) 0))
|
||||
(t/is (= (count (s/delete-k-random 0 (range 10))) 10)))
|
||||
(t/testing "should not fail if k >> size of collection"
|
||||
(t/is (= (count (s/delete-k-random 300 (range 10))) 0))
|
||||
(t/is (= (s/delete-k-random 300 '(:hi1 :hi2 :hi3)) '())))
|
||||
(t/testing "should not fail if the collection is empty"
|
||||
(t/is (= (count (s/delete-k-random 300 '())) 0))
|
||||
(t/is (= (count (s/delete-k-random 0 '())) 0)))
|
||||
(t/testing "should maintain order of the remaining items"
|
||||
(t/is (apply < (s/delete-k-random 3 (range 10)))))))
|
||||
|
||||
(t/deftest auto-simplify-plushy-test
|
||||
(t/testing "auto-simplify-plushy"
|
||||
(t/testing "should handle having an empty plushy"
|
||||
(t/is (= (s/auto-simplify-plushy {} '() 100 (fn [argmap data plushy] 0) {} 3 false) '())))
|
||||
(let [plushy '(:exec_dup 1 :integer_add close :in1 :integer_add 0 :in1 :in1 :integer_mult :integer_add)]
|
||||
(t/testing "should decrease size of plushy that always has perfect scores"
|
||||
(t/is (< (count (s/auto-simplify-plushy {} plushy 5 (fn [argmap data plushy] 0) {} 3 false)) (count plushy)))
|
||||
(t/is (< (count (s/auto-simplify-plushy {} plushy 1 (fn [argmap data plushy] 0) {} 10 false)) (count plushy)))))))
|
||||
|
||||
(t/deftest assign-indices-to-data-test
|
||||
(t/testing "assign-indices-to-data"
|
||||
(t/testing "should return a map of the same length"
|
||||
(t/is (= (count (ds/assign-indices-to-data {:training-data (range 10)})) 10))
|
||||
(t/is (= (count (ds/assign-indices-to-data {:training-data (range 0)})) 0)))
|
||||
(t/testing "should return a map where each element has an index key"
|
||||
(t/is (every? #(:index %) (ds/assign-indices-to-data {:training-data (map #(assoc {} :input %) (range 10))}))))
|
||||
(t/testing "should return distinct indices"
|
||||
(t/is (= (map #(:index %) (ds/assign-indices-to-data {:training-data (range 10)})) (range 10))))))
|
||||
|
||||
(t/deftest select-downsample-random-test
|
||||
(t/testing "select-downsample-random"
|
||||
(t/testing "should select the correct amount of elements"
|
||||
(t/is (= (count (ds/select-downsample-random {:training-data (range 10) :downsample-rate 0.1})) 1))
|
||||
(t/is (= (count (ds/select-downsample-random {:training-data (range 10) :downsample-rate 0.2})) 2))
|
||||
(t/is (= (count (ds/select-downsample-random {:training-data (range 10) :downsample-rate 0.5})) 5)))
|
||||
(t/testing "should not return duplicate items (when called with set of numbers)"
|
||||
(t/is (= (count (set (ds/select-downsample-random {:training-data (range 10) :downsample-rate 0.1}))) 1))
|
||||
(t/is (= (count (set (ds/select-downsample-random {:training-data (range 10) :downsample-rate 0.2}))) 2))
|
||||
(t/is (= (count (set (ds/select-downsample-random {:training-data (range 10) :downsample-rate 0.5}))) 5)))
|
||||
(t/testing "should round down the number of elements selected if not whole"
|
||||
(t/is (= (count (ds/select-downsample-random {:training-data (range 3) :downsample-rate 0.5})) 1))
|
||||
(t/is (= (count (ds/select-downsample-random {:training-data (range 1) :downsample-rate 0.5})) 0)))
|
||||
(t/testing "should not return more elements than available"
|
||||
(t/is (= (count (ds/select-downsample-random {:training-data (range 10) :downsample-rate 2})) 10))
|
||||
(t/is (= (count (ds/select-downsample-random {:training-data (range 10) :downsample-rate 1.5})) 10)))))
|
||||
|
Loading…
x
Reference in New Issue
Block a user