From d9e16aa0285ee6f93615abb6df44db19f798f15e Mon Sep 17 00:00:00 2001 From: Ryan Boldi Date: Sun, 8 Jan 2023 14:26:36 +0100 Subject: [PATCH] implemented soft ids, where being a threshold away from solved still counts as being solved. --- src/propeller/downsample.cljc | 21 ++++++++++++++------- test/propeller/push/downsample_test.cljc | 8 ++++++++ 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/propeller/downsample.cljc b/src/propeller/downsample.cljc index ca13cec..a24b62d 100644 --- a/src/propeller/downsample.cljc +++ b/src/propeller/downsample.cljc @@ -96,6 +96,11 @@ (let [m (apply min coll)] (map #(if (= m %) 0 %) coll)))) +(defn replace-close-zero-with-zero + "replaces values within a delta of zero with zero, used for regression problems" + [coll delta] + (map #(if (>= delta %) 0 %) coll)) + (defn convert-to-elite-error "converts a set of errors into a list where all the elite errors are replaced with 0s so that we can use it in the selection of down-samples with elite/not-elite selection" @@ -105,17 +110,19 @@ (defn update-case-distances "updates the case distance field of training-data list, should be called after evaluation of individuals evaluated-pop should be a list of individuals that all have the :errors field with a list of this - individuals performance on the each case in the training-data, in order. ids-type is :elite to use elite/not-elite - or :solved to use solve/not-solved" - - [evaluated-pop ds-data training-data ids-type] - (flush) + individuals performance on the each case in the training-data, in order. ids-type is :elite to use elite/not-elite, :soft to consider near solves, and :solved to use solve/not-solved" + ([evaluated-pop ds-data training-data ids-type] + (update-case-distances evaluated-pop ds-data training-data ids-type 0)) ; default solution threshold is 0, only used if ids-type is :soft + ([evaluated-pop ds-data training-data ids-type solution-threshold] (let [ds-indices (map #(:index %) ds-data) errors (map #(:errors %) evaluated-pop) - corr-errors (if (= ids-type :elite) (convert-to-elite-error errors) errors)] ;errors, including elite/not-elite distinction + corr-errors (case ids-type + :elite (convert-to-elite-error errors) + :soft (replace-close-zero-with-zero errors solution-threshold) + errors)] ;errors, including elite/not-elite distinction (merge-map-lists-at-index training-data (map-indexed (fn [idx d-case] (update-in d-case [:distances] #(update-at-indices % (map (fn [other] (get-distance-between-cases corr-errors idx other)) - (range (count ds-indices))) ds-indices))) ds-data)))) \ No newline at end of file + (range (count ds-indices))) ds-indices))) ds-data))))) \ No newline at end of file diff --git a/test/propeller/push/downsample_test.cljc b/test/propeller/push/downsample_test.cljc index 6e3c4d0..3beb56b 100644 --- a/test/propeller/push/downsample_test.cljc +++ b/test/propeller/push/downsample_test.cljc @@ -71,6 +71,14 @@ (t/testing "should work when input is a list" (t/is (= (ds/update-at-indices '(6 5 4 0 0) '(2 1) '(1 0)) [1 2 4 0 0]))))) + +(t/deftest ids-types-test + (t/testing "replace-close-zero-with-zero" + (t/testing "should replace the close to zero values with zero" + (t/is (= (ds/replace-close-zero-with-zero '(0.1 2 3 4 0.1 2 3 4) 0.2) '(0 2 3 4 0 2 3 4))) + (t/is (= (ds/replace-close-zero-with-zero '(0.1 0.1) 0.0) '(0.1 0.1))) + (t/is (= (ds/replace-close-zero-with-zero '(100 100 200) 100) '(0 0 200)))))) + (t/deftest update-case-distances-test (t/testing "update-case-distances" (t/testing "should update correctly when fewer errors than all"