diff --git a/.gitignore b/.gitignore index f4b58c9..b70ae2e 100644 --- a/.gitignore +++ b/.gitignore @@ -33,4 +33,5 @@ node_modules/ # https://github.com/thelmuth/program-synthesis-benchmark-datasets /data **/.DS_Store +*.edn /.cpcache/ diff --git a/project.clj b/project.clj index bd02092..3a74514 100644 --- a/project.clj +++ b/project.clj @@ -6,7 +6,8 @@ :dependencies [[org.clojure/clojure "1.10.0"] [org.clojure/clojurescript "1.9.946"] [org.clojure/test.check "1.1.0"] - [net.clojars.schneau/psb2 "1.1.1"]] + [net.clojars.schneau/psb2 "1.1.1"] + [org.clojure/data.csv "1.0.1"]] :profiles {:profiling {:dependencies [[com.clojure-goes-fast/clj-async-profiler "0.5.1"]]}} :main ^:skip-aot propeller.core :repl-options {:init-ns propeller.core} diff --git a/src/docs_src/Downsampling_training_data.md b/src/docs_src/Downsampling_training_data.md new file mode 100644 index 0000000..79c14b0 --- /dev/null +++ b/src/docs_src/Downsampling_training_data.md @@ -0,0 +1,56 @@ +Downsampling the Training Data += + +Downsampling is a very simple way to improve the efficiency of your evolutionary runs. It might allow for deeper evolutionary searches and a greater success rate. + +Using Downsampled selection with propeller is easy: + +Set the :parent-selection argument to whichever selection strategy you would like, and set the :downsample? argument to true as follows: + +```clojure +lein run -m propeller.problems.simple-regression :parent-selection :lexicase :downsample? true +``` + +The number of evaluations is held constant when comparing to a full training set run, so set the :max-generations to a number of generations that you would have gone to using a **full** sample. + +## Downsample Functions + +In this repository, you have access to 3 different downsampling functions. These are the methods used to take a down-sample from the entire training set. + +To use them, add the argument ```:ds-function``` followed by which function you would like to us + +The list is +- ```:case-maxmin``` - This is the method used for informed down-sampled lexicase selection +- ```:case-maxmin-auto``` - This method automatically determines the downsample size +- ```:case-rand```- Random Sampling + +### Using ```:case-maxmin```: + +In order to use regular informed down-sampled selection, you must specify a few things: +- ```:downsample-rate```- This is the $r$ parameter: what proportion of the full sample should be in the down-sample $\in [0,1]$ +- ```:ds-parent-rate``` - This is the $\rho$ parameter: what proportion of parents are used to evaluate case distances $\in [0,1]$ +- ```:ds-parent-gens``` - This is the $k$ parameter: How many generations in between parent evaluations for distances $\in \{1,2,3, \dots\}$ + +### Using ```:case-maxmin-auto```: + +In order to use automatic informed down-sampled selection, you must specify a few things: +- ```:case-delta ```- This is the $\Delta$ parameter: How close can the farthest case be from its closest case before we stop adding to the down-sample +- ```:ids-type``` - Either ```:elite``` or ```:solved ``` - Specifies whether we are using elite/not-elite or solved/not-solved as our binary-fication of case solve vectors. +- ```:ds-parent-rate``` - This is the $\rho$ parameter: what proportion of parents are used to evaluate case distances $\in [0,1]$ +- ```:ds-parent-gens``` - This is the $k$ parameter: How many generations in between parent evaluations for distances $\in \{1,2,3, \dots\}$ + +### Using ```:case-rand```: + +In order to use regular randomly down-sampled selection, you must specify a few things: +- ```:downsample-rate```- This is the $r$ parameter: what proportion of the full sample should be in the down-sample $\in [0,1]$ + + + + + +Here's an example of running informed downsampled lexicase selection with $r=0.1$, $\rho=0.01$ and $k=100$ on the simple classification problem: + +```clojure +lein run -m propeller.problems.simple-classification :parent-selection :lexicase :downsample? true :ds-function :case-maxmin :downsample-rate 0.1 :max-generations 300 :ds-parent-rate 0.01 :ds-parent-gens 100 +``` + diff --git a/src/propeller/downsample.cljc b/src/propeller/downsample.cljc new file mode 100644 index 0000000..a5117a9 --- /dev/null +++ b/src/propeller/downsample.cljc @@ -0,0 +1,132 @@ +(ns propeller.downsample + (:require [propeller.tools.math :as math] + [propeller.tools.metrics :as metrics] + [propeller.utils :as utils])) + +(defn assign-indices-to-data + "assigns an index to each training case in order to differentiate them when downsampling" + [training-data] + (map (fn [data-map index] + (let [data-m (if (map? data-map) data-map (assoc {} :data data-map))] ;if data is not in a map, make it one + (assoc data-m :index index))) + training-data (range (count training-data)))) + +(defn initialize-case-distances + [{:keys [training-data population-size]}] + (map #(assoc % :distances (vec (repeat (count training-data) population-size))) training-data)) + +(defn select-downsample-random + "Selects a downsample from the training cases and returns it" + [training-data {:keys [downsample-rate]}] + (take (int (* downsample-rate (count training-data))) (shuffle training-data))) + +(defn select-downsample-maxmin + "selects a downsample that has it's cases maximally far away by sequentially + adding cases to the downsample that have their closest case maximally far away" + [training-data {:keys [downsample-rate]}] + (let [shuffled-cases (shuffle training-data) + goal-size (int (* downsample-rate (count training-data)))] + (loop [new-downsample (conj [] (first shuffled-cases)) + cases-to-pick-from (rest shuffled-cases)] + (if (>= (count new-downsample) goal-size) + new-downsample + (let [tournament cases-to-pick-from + min-case-distances (metrics/min-of-colls + (map (fn [distance-list] + (utils/filter-by-index distance-list (map #(:index %) tournament))) + (map #(:distances %) new-downsample))) + selected-case-index (metrics/argmax min-case-distances)] + (recur (conj new-downsample (nth tournament selected-case-index)) + (shuffle (utils/drop-nth selected-case-index tournament)))))))) + +(defn select-downsample-maxmin-adaptive + "selects a downsample that has it's cases maximally far away by sequentially + adding cases to the downsample that have their closest case maximally far away + automatically stops when the maximum minimum distance is below delta" +[training-data {:keys [case-delta]}] +(let [shuffled-cases (shuffle training-data)] + (loop [new-downsample (conj [] (first shuffled-cases)) + cases-to-pick-from (rest shuffled-cases)] + (let [tournament cases-to-pick-from + min-case-distances (metrics/min-of-colls + (map (fn [distance-list] + (utils/filter-by-index distance-list (map #(:index %) tournament))) + (map #(:distances %) new-downsample))) + selected-case-index (metrics/argmax min-case-distances)] + (if (or (= 0 (count tournament)) (<= (apply max min-case-distances) case-delta)) + new-downsample + (recur (conj new-downsample (nth tournament selected-case-index)) + (shuffle (utils/drop-nth selected-case-index tournament)))))))) + +(defn get-distance-between-cases + "returns the distance between two cases given a list of individual error vectors, and the index these + cases exist in the error vector. Only makes the distinction between zero and nonzero errors" + [error-lists case-index-1 case-index-2] + (if (or (< (count (first error-lists)) case-index-1) + (< (count (first error-lists)) case-index-2) + (neg? case-index-1) (neg? case-index-2)) + (count error-lists) ;return the max distance + (let [errors-1 (map #(nth % case-index-1) error-lists) + errors-2 (map #(nth % case-index-2) error-lists)] + ;compute distance between errors-1 and errors-2 + (reduce + (map (fn [e1 e2] (math/abs (- (math/step e1) (math/step e2)))) errors-1 errors-2))))) + +(defn update-at-indices + "merges two vectors at the indices provided by a third vector" + [big-vec small-vec indices] + (->> big-vec + (map-indexed (fn [idx itm] (let [index (.indexOf indices idx)] + (if (not= -1 index) (nth small-vec index) itm)))) + vec)) + +(defn merge-map-lists-at-index + "merges two lists of maps, replacing the maps in the big + list with their corresponding (based on index) maps in the small list" + [big-list small-list] + (map + #(let [corresponding-small (some (fn [c] (when (= (:index %) (:index c)) c)) small-list)] + (if (nil? corresponding-small) % corresponding-small)) + big-list)) + +(defn replace-mins-with-zero + "replaces the minimum value(s) in a list with zero" + [coll] + (if (empty? coll) + '() + (let [m (apply min coll)] + (map #(if (= m %) 0 %) coll)))) + +(defn replace-close-zero-with-zero + "replaces values within a delta of zero with zero, used for regression problems" + [coll delta] + (map #(if (>= delta %) 0 %) coll)) + +(defn convert-to-elite-error + "converts a set of errors into a list where all the elite errors are replaced with 0s so that we can use + it in the selection of down-samples with elite/not-elite selection" + [errors] + (map #(replace-mins-with-zero %) errors)) + +(defn convert-to-soft-error + [errors delta] + (map #(replace-close-zero-with-zero % delta) errors)) + +(defn update-case-distances + "updates the case distance field of training-data list, should be called after evaluation of individuals + evaluated-pop should be a list of individuals that all have the :errors field with a list of this + individuals performance on the each case in the training-data, in order. ids-type is :elite to use elite/not-elite, :soft to consider near solves, and :solved to use solve/not-solved" + ([evaluated-pop ds-data training-data ids-type] + (update-case-distances evaluated-pop ds-data training-data ids-type 0)) ; default solution threshold is 0, only used if ids-type is :soft + ([evaluated-pop ds-data training-data ids-type solution-threshold] + (let [ds-indices (map #(:index %) ds-data) + errors (map #(:errors %) evaluated-pop) + corr-errors (case ids-type + :elite (convert-to-elite-error errors) + :soft (convert-to-soft-error errors solution-threshold) + errors)] ;errors, including elite/not-elite distinction + (merge-map-lists-at-index + training-data (map-indexed + (fn [idx d-case] (update-in d-case + [:distances] #(update-at-indices + % (map (fn [other] (get-distance-between-cases corr-errors idx other)) + (range (count ds-indices))) ds-indices))) ds-data))))) \ No newline at end of file diff --git a/src/propeller/gp.cljc b/src/propeller/gp.cljc index 22a8aa5..107a0ca 100644 --- a/src/propeller/gp.cljc +++ b/src/propeller/gp.cljc @@ -5,6 +5,8 @@ [propeller.genome :as genome] [propeller.simplification :as simplification] [propeller.variation :as variation] + [propeller.downsample :as downsample] + [propeller.hyperselection :as hyperselection] [propeller.push.instructions.bool] [propeller.push.instructions.character] [propeller.push.instructions.code] @@ -17,14 +19,16 @@ [propeller.utils :as utils])) (defn report - "Reports information for each generation." - [pop generation argmap] + "Reports information each generation." + [evaluations pop generation argmap training-data] (let [best (first pop)] (clojure.pprint/pprint (merge {:generation generation :best-plushy (:plushy best) :best-program (genome/plushy->push (:plushy best) argmap) - :best-total-error (:total-error best) + :best-total-error (:total-error best) + :evaluations evaluations + :ds-indices (map #(:index %) training-data) :best-errors (:errors best) :best-behaviors (:behaviors best) :genotypic-diversity (float (/ (count (distinct (map :plushy pop))) (count pop))) @@ -47,59 +51,100 @@ (defn gp "Main GP loop." [{:keys [population-size max-generations error-function instructions - max-initial-plushy-size solution-error-threshold] - :or {solution-error-threshold 0.0} + max-initial-plushy-size solution-error-threshold mapper ds-parent-rate ds-parent-gens dont-end ids-type downsample?] + :or {solution-error-threshold 0.0 + dont-end false + ds-parent-rate 0 + ds-parent-gens 1 + ids-type :solved ; :solved or :elite or :soft + downsample? false + ;; The `mapper` will perform a `map`-like operation to apply a function to every individual + ;; in the population. The default is `map` but other options include `mapv`, or `pmap`. + mapper #?(:clj pmap :cljs map)} :as argmap}] ;; (prn {:starting-args (update (update argmap :error-function str) :instructions str)}) (println) ;; (loop [generation 0 + evaluations 0 population (utils/pmapallv (fn [_] {:plushy (let [plushy (genome/make-random-plushy instructions max-initial-plushy-size)] (if (:diploid argmap) (interleave plushy plushy) - plushy))}) - (range population-size) - argmap)] ;creates population of random plushys - (let [evaluated-pop (sort-by :total-error - (utils/pmapallv - (partial error-function argmap (:training-data argmap)) - population - argmap)) + plushy))}) (range population-size) argmap) + indexed-training-data (downsample/assign-indices-to-data (downsample/initialize-case-distances argmap))] + (let [training-data (if downsample? + (case (:ds-function argmap) + :case-maxmin (downsample/select-downsample-maxmin indexed-training-data argmap) + :case-maxmin-auto (downsample/select-downsample-maxmin-adaptive indexed-training-data argmap) + :case-rand (downsample/select-downsample-random indexed-training-data argmap) + (do (prn {:error "Invalid Downsample Function"}) (downsample/select-downsample-random indexed-training-data argmap))) + indexed-training-data) ;defaults to full training set + parent-reps (if + (and downsample? ; if we are down-sampling + (zero? (mod generation ds-parent-gens))) ;every ds-parent-gens generations + (take (* ds-parent-rate (count population)) (shuffle population)) + '()) ;else just empty list + ; parent representatives for down-sampling + rep-evaluated-pop (if downsample? + (sort-by :total-error + (utils/pmapallv + (partial error-function argmap indexed-training-data) + parent-reps + argmap)) + '()) + evaluated-pop (sort-by :total-error + (utils/pmapallv + (partial error-function argmap training-data) + population + argmap)) best-individual (first evaluated-pop) + best-individual-passes-ds (and downsample? (<= (:total-error best-individual) solution-error-threshold)) argmap (if (= (:parent-selection argmap) :epsilon-lexicase) (assoc argmap :epsilons (selection/epsilon-list evaluated-pop)) - argmap)] ;adds :epsilons if using epsilon-lexicase + argmap)] ; epsilons (if (:custom-report argmap) - ((:custom-report argmap) evaluated-pop generation argmap) - (report evaluated-pop generation argmap)) + ((:custom-report argmap) evaluations evaluated-pop generation argmap) + (report evaluations evaluated-pop generation argmap training-data)) + ;;did the indvidual pass all cases in ds? + (when best-individual-passes-ds + (prn {:semi-success-generation generation})) (cond - ;; Success on training cases is verified on testing cases - (<= (:total-error best-individual) solution-error-threshold) - (do (prn {:success-generation generation}) - (prn {:total-test-error - (:total-error (error-function argmap (:testing-data argmap) best-individual))}) - (when (:simplification? argmap) - (let [simplified-plushy (simplification/auto-simplify-plushy - (:plushy best-individual) - error-function argmap)] - (prn {:total-test-error-simplified - (:total-error (error-function argmap - (:testing-data argmap) - (hash-map :plushy simplified-plushy)))}))) - #?(:clj (shutdown-agents))) + ;; If either the best individual on the ds passes all training cases, or best individual on full sample passes all training cases + ;; We verify success on test cases and end evolution + (if (or (and best-individual-passes-ds (<= (:total-error (error-function argmap indexed-training-data best-individual)) solution-error-threshold)) + (and (not downsample?) + (<= (:total-error best-individual) solution-error-threshold))) + (do (prn {:success-generation generation}) + (prn {:total-test-error + (:total-error (error-function argmap (:testing-data argmap) best-individual))}) + (when (:simplification? argmap) + (let [simplified-plushy (simplification/auto-simplify-plushy (:plushy best-individual) error-function argmap)] + (prn {:total-test-error-simplified (:total-error (error-function argmap (:testing-data argmap) (hash-map :plushy simplified-plushy)))}))) + (if dont-end false true)) + false) + nil ;; - (>= generation max-generations) - (do #?(:clj (shutdown-agents))) + (and (not downsample?) (>= generation max-generations)) + nil + ;; + (and downsample? (>= evaluations (* max-generations population-size (count indexed-training-data)))) + nil ;; :else (recur (inc generation) - (if (:elitism argmap) - (conj (utils/pmapallv (fn [_] (variation/new-individual evaluated-pop argmap)) - (range (dec population-size)) - argmap) - (first evaluated-pop)) ;elitism maintains the most-fit individual - (utils/pmapallv (fn [_] (variation/new-individual evaluated-pop argmap)) - (range population-size) - argmap))))))) - + (+ evaluations (* population-size (count training-data)) ;every member evaluated on the current sample + (if (zero? (mod generation ds-parent-gens)) (* (count parent-reps) (- (count indexed-training-data) (count training-data))) 0) ; the parent-reps not evaluted already on down-sample + (if best-individual-passes-ds (- (count indexed-training-data) (count training-data)) 0)) ; if we checked for generalization or not + (let [reindexed-pop (hyperselection/reindex-pop evaluated-pop)] ; give every individual an index for hyperselection loggin + (hyperselection/log-hyperselection-and-ret + (if (:elitism argmap) + (conj (repeatedly (dec population-size) #(variation/new-individual reindexed-pop argmap)) + (first reindexed-pop)) + (repeatedly population-size ;need to count occurance of each parent, and reset IDs + #(variation/new-individual reindexed-pop argmap))))) + (if downsample? + (if (zero? (mod generation ds-parent-gens)) + (downsample/update-case-distances rep-evaluated-pop indexed-training-data indexed-training-data ids-type (/ solution-error-threshold (count indexed-training-data))) ; update distances every ds-parent-gens generations + indexed-training-data) + indexed-training-data)))))) diff --git a/src/propeller/hyperselection.cljc b/src/propeller/hyperselection.cljc new file mode 100644 index 0000000..7d406d8 --- /dev/null +++ b/src/propeller/hyperselection.cljc @@ -0,0 +1,35 @@ +(ns propeller.hyperselection) + +(defn sum-list-map-indices + "sums a list of maps that have the :index property's index multiplicity" + [list-of-maps] + (->> list-of-maps + (map #(:index %)) + frequencies)) + +(defn ordered-freqs + "takes a map from indices to frequencies, and returns a sorted list of the frequences is descencing order" + [freqs] + (->> freqs + vals + (sort >))) + +(defn normalize-list-by-popsize [popsize lst] + (map #(double (/ % popsize)) lst)) + +(defn hyperselection-track + "outputs a normalized list of the hyperselection proportion for each parent" + [new-pop] + (->> new-pop + sum-list-map-indices + ordered-freqs + (normalize-list-by-popsize (count new-pop)))) + +(defn log-hyperselection-and-ret [new-pop] + (prn {:hyperselection (hyperselection-track new-pop)}) + new-pop) + +(defn reindex-pop + "assigns each member of the population a unique index before selection to track hyperselection" + [pop] + (map (fn [indiv index] (assoc indiv :index index)) pop (range (count pop)))) \ No newline at end of file diff --git a/src/propeller/problems/PSB1/count_odds.cljc b/src/propeller/problems/PSB1/count_odds.cljc index 8506eb5..2ab65ea 100644 --- a/src/propeller/problems/PSB1/count_odds.cljc +++ b/src/propeller/problems/PSB1/count_odds.cljc @@ -65,6 +65,9 @@ :error-function error-function :training-data train-data :testing-data test-data + :case-t-size (count train-data) + :ds-parent-rate 0 + :ds-parent-gens 1 :max-generations 300 :population-size 1000 :max-initial-plushy-size 250 diff --git a/src/propeller/problems/PSB1/grade.cljc b/src/propeller/problems/PSB1/grade.cljc index 040cec5..8d26ca3 100644 --- a/src/propeller/problems/PSB1/grade.cljc +++ b/src/propeller/problems/PSB1/grade.cljc @@ -83,6 +83,9 @@ :error-function error-function :training-data train-data :testing-data test-data + :case-t-size (count train-data) + :ds-parent-rate 0 + :ds-parent-gens 1 :max-generations 300 :population-size 1000 :max-initial-plushy-size 250 @@ -92,4 +95,5 @@ :umad-rate 0.1 :variation {:umad 1.0 :crossover 0.0} :elitism false} - (apply hash-map (map #(if (string? %) (read-string %) %) args))))) + (apply hash-map (map #(if (string? %) (read-string %) %) args)))) + (#?(:clj shutdown-agents))) diff --git a/src/propeller/problems/PSB1/scrabble_score.cljc b/src/propeller/problems/PSB1/scrabble_score.cljc index 6ecc7a0..6e7865d 100644 --- a/src/propeller/problems/PSB1/scrabble_score.cljc +++ b/src/propeller/problems/PSB1/scrabble_score.cljc @@ -134,6 +134,9 @@ :error-function error-function :training-data train-data :testing-data test-data + :case-t-size (count train-data) + :ds-parent-rate 0 + :ds-parent-gens 1 :max-generations 300 :population-size 1000 :max-initial-plushy-size 250 diff --git a/src/propeller/problems/PSB1/small_or_large.cljc b/src/propeller/problems/PSB1/small_or_large.cljc index 389811a..6de3547 100644 --- a/src/propeller/problems/PSB1/small_or_large.cljc +++ b/src/propeller/problems/PSB1/small_or_large.cljc @@ -66,6 +66,9 @@ :error-function error-function :training-data train-data :testing-data test-data + :case-t-size (count train-data) + :ds-parent-rate 0 + :ds-parent-gens 1 :max-generations 300 :population-size 1000 :max-initial-plushy-size 250 diff --git a/src/propeller/problems/PSB2/find_pair.cljc b/src/propeller/problems/PSB2/find_pair.cljc new file mode 100644 index 0000000..9dbaacc --- /dev/null +++ b/src/propeller/problems/PSB2/find_pair.cljc @@ -0,0 +1,108 @@ +(ns propeller.problems.PSB2.find-pair + (:require [psb2.core :as psb2] + [propeller.genome :as genome] + [propeller.push.interpreter :as interpreter] + [propeller.problems.data-creation :as dc] + [propeller.utils :as utils] + [propeller.push.instructions :refer [def-instruction get-stack-instructions]] + [propeller.push.state :as state] + [propeller.tools.math :as math] + [propeller.gp :as gp] + #?(:cljs [cljs.reader :refer [read-string]]))) + +(def train-and-test-data (psb2/fetch-examples "data" "find-pair" 200 2000)) +(def train-data (:train train-and-test-data)) +(def test-data (:test train-and-test-data)) + +(defn random-int [] (- (rand-int 201) 100)) + +(defn map-vals-input + "Returns all the input values of a map" + [i] + (vals (select-keys i [:input1 :input2]))) + +(defn map-vals-output + "Returns the output values of a map" + [i] + (vals (select-keys i [:output1 :output2]))) + +(def-instruction :output-one + ^{:stacks #{:integer :output}} + (fn [state] + (if (empty? (:integer state)) + state + (let [top-int (state/peek-stack state :integer)] + (assoc-in state [:output :out1] top-int))))) + +(def-instruction :output-two + ^{:stacks #{:integer :output}} + (fn [state] + (if (empty? (:integer state)) + state + (let [top-int (state/peek-stack state :integer)] + (assoc-in state [:output :out2] top-int))))) + +(def instructions + (utils/not-lazy + (concat + ;;; stack-specific instructions + (get-stack-instructions #{:exec :integer :vector_integer :boolean}) + (list :output-one :output-two) + ;;; input instructions + (list :in1 :in2) + ;;; close + (list 'close) + ;;; ERCs (constants) + (list -1 0 1 2 random-int)))) + +(defn error-function + [argmap data individual] + (let [program (genome/plushy->push (:plushy individual) argmap) + inputs (map (fn [i] (map-vals-input i)) data) + correct-outputs (map (fn [i] (map-vals-output i)) data) + outputs (map (fn [input] + (:output + (interpreter/interpret-program + program + (assoc state/empty-state :input {:in1 (nth input 0) + :in2 (nth input 1)}) + (:step-limit argmap)))) + inputs) + outputs-1 (map #(:out1 %) outputs) + outputs-2 (map #(:out2 %) outputs) + ;_ (prn {:o1 outputs-1 :o2 outputs-2}) + errors (map (fn [correct-output output-1 output-2] + (if (not (and (number? output-2) (number? output-1))) + 100000 + (+ (math/abs (- (first correct-output) output-1)) + (math/abs (- (second correct-output) output-2))))) + correct-outputs outputs-1 outputs-2)] + (assoc individual + :behavior outputs + :errors errors + :total-error #?(:clj (apply +' errors) + :cljs (apply + errors))))) + +(defn -main + "Runs propel-gp, giving it a map of arguments." + [& args] + (gp/gp + (merge + {:instructions instructions + :error-function error-function + :training-data train-data + :testing-data test-data + :case-t-size (count train-data) + :ds-parent-rate 0 + :ds-parent-gens 1 + :max-generations 300 + :population-size 1000 + :max-initial-plushy-size 250 + :step-limit 2000 + :parent-selection :lexicase + :tournament-size 5 + :umad-rate 0.1 + :variation {:umad 1.0 :crossover 0.0} + :elitism false} + (apply hash-map (map #(if (string? %) (read-string %) %) args)))) + (#?(:clj shutdown-agents))) \ No newline at end of file diff --git a/src/propeller/problems/PSB2/fizz_buzz.cljc b/src/propeller/problems/PSB2/fizz_buzz.cljc index f331eef..b7ece36 100644 --- a/src/propeller/problems/PSB2/fizz_buzz.cljc +++ b/src/propeller/problems/PSB2/fizz_buzz.cljc @@ -10,6 +10,7 @@ Source: https://arxiv.org/pdf/2106.06086.pdf" (:require [psb2.core :as psb2] [propeller.genome :as genome] [propeller.push.interpreter :as interpreter] + [propeller.problems.data-creation :as dc] [propeller.utils :as utils] [propeller.push.instructions :refer [get-stack-instructions]] [propeller.push.state :as state] @@ -17,7 +18,21 @@ Source: https://arxiv.org/pdf/2106.06086.pdf" [propeller.gp :as gp] #?(:cljs [cljs.reader :refer [read-string]]))) +; =========== PROBLEM DESCRIPTION ========================= +; FIZZ BUZZ from PSB2 +; Given an integer x, return "Fizz" if x is +; divisible by 3, "Buzz" if x is divisible by 5, "FizzBuzz" if x +; is divisible by 3 and 5, and a string version of x if none of +; the above hold. +; +; Source: https://arxiv.org/pdf/2106.06086.pdf +; ============================================================ + + (def train-and-test-data "Data taken from https://zenodo.org/record/5084812" (psb2/fetch-examples "data" "fizz-buzz" 200 2000)) +(def train-data (:train train-and-test-data)) +(def test-data (:test train-and-test-data)) + (def instructions "Stack-specific instructions, input instructions, close, and constants" @@ -71,8 +86,11 @@ Source: https://arxiv.org/pdf/2106.06086.pdf" (merge {:instructions instructions :error-function error-function - :training-data (:train train-and-test-data) - :testing-data (:test train-and-test-data) + :training-data train-data + :testing-data test-data + :case-t-size (count train-data) + :ds-parent-rate 0 + :ds-parent-gens 1 :max-generations 300 :population-size 1000 :max-initial-plushy-size 250 diff --git a/src/propeller/problems/PSB2/fuel_cost.cljc b/src/propeller/problems/PSB2/fuel_cost.cljc index 54e9d03..ea19214 100644 --- a/src/propeller/problems/PSB2/fuel_cost.cljc +++ b/src/propeller/problems/PSB2/fuel_cost.cljc @@ -11,6 +11,7 @@ Source: https://arxiv.org/pdf/2106.06086.pdf" (:require [psb2.core :as psb2] [propeller.genome :as genome] [propeller.push.interpreter :as interpreter] + [propeller.problems.data-creation :as dc] [propeller.utils :as utils] [propeller.push.instructions :refer [get-stack-instructions]] [propeller.push.state :as state] @@ -18,7 +19,19 @@ Source: https://arxiv.org/pdf/2106.06086.pdf" [propeller.gp :as gp] #?(:cljs [cljs.reader :refer [read-string]]))) +; =========== PROBLEM DESCRIPTION ========================= +; FUEL COST from PSB2 +; Given a vector of positive integers, divide +; each by 3, round the result down to the nearest integer, and +; subtract 2. Return the sum of all of the new integers in the +; vector +; +; Source: https://arxiv.org/pdf/2106.06086.pdf +; ============================================================ + (def train-and-test-data "Data taken from https://zenodo.org/record/5084812" (psb2/fetch-examples "data" "fuel-cost" 200 2000)) +(def train-data (:train train-and-test-data)) +(def test-data (:test train-and-test-data)) ; Random integer between -100 and 100 (from smallest) (defn random-int "Random integer between -100 and 100" [] (- (rand-int 201) 100)) @@ -26,15 +39,15 @@ Source: https://arxiv.org/pdf/2106.06086.pdf" (def instructions "Stack-specific instructions, input instructions, close, and constants" (utils/not-lazy - (concat + (concat ;;; stack-specific instructions - (get-stack-instructions #{:exec :integer :boolean :vector_integer :print}) + (get-stack-instructions #{:exec :integer :boolean :vector_integer :print}) ;;; input instructions - (list :in1) + (list :in1) ;;; close - (list 'close) + (list 'close) ;;; ERCs (constants) - (list random-int 0 1 2 3)))) + (list random-int 0 1 2 3)))) (defn error-function "Finds the behaviors and errors of an individual: Error is 0 if the value and @@ -47,11 +60,11 @@ Source: https://arxiv.org/pdf/2106.06086.pdf" correct-outputs (map (fn [i] (get i :output1)) data) outputs (map (fn [input] (state/peek-stack - (interpreter/interpret-program - program - (assoc state/empty-state :input {:in1 input}) - (:step-limit argmap)) - :integer)) + (interpreter/interpret-program + program + (assoc state/empty-state :input {:in1 input}) + (:step-limit argmap)) + :integer)) inputs) errors (map (fn [correct-output output] (if (= output :no-stack-item) @@ -60,10 +73,10 @@ Source: https://arxiv.org/pdf/2106.06086.pdf" correct-outputs outputs)] (assoc individual - :behaviors outputs - :errors errors - :total-error #?(:clj (apply +' errors) - :cljs (apply + errors))))) + :behaviors outputs + :errors errors + :total-error #?(:clj (apply +' errors) + :cljs (apply + errors))))) (defn -main "Runs the top-level genetic programming function, giving it a map of @@ -71,18 +84,21 @@ Source: https://arxiv.org/pdf/2106.06086.pdf" or through a passed map." [& args] (gp/gp - (merge - {:instructions instructions - :error-function error-function - :training-data (:train train-and-test-data) - :testing-data (:test train-and-test-data) - :max-generations 300 - :population-size 1000 - :max-initial-plushy-size 250 - :step-limit 2000 - :parent-selection :lexicase - :tournament-size 5 - :umad-rate 0.1 - :variation {:umad 1.0 :crossover 0.0} - :elitism false} - (apply hash-map (map #(if (string? %) (read-string %) %) args))))) + (merge + {:instructions instructions + :error-function error-function + :training-data train-data + :testing-data test-data + :case-t-size (count train-data) + :ds-parent-rate 0 + :ds-parent-gens 1 + :max-generations 300 + :population-size 1000 + :max-initial-plushy-size 250 + :step-limit 2000 + :parent-selection :lexicase + :tournament-size 5 + :umad-rate 0.1 + :variation {:umad 1.0 :crossover 0.0} + :elitism false} + (apply hash-map (map #(if (string? %) (read-string %) %) args))))) diff --git a/src/propeller/problems/PSB2/gcd.cljc b/src/propeller/problems/PSB2/gcd.cljc index 0ba421b..0581fdd 100644 --- a/src/propeller/problems/PSB2/gcd.cljc +++ b/src/propeller/problems/PSB2/gcd.cljc @@ -9,6 +9,7 @@ Source: https://arxiv.org/pdf/2106.06086.pdf" (:require [psb2.core :as psb2] [propeller.genome :as genome] [propeller.push.interpreter :as interpreter] + [propeller.problems.data-creation :as dc] [propeller.utils :as utils] [propeller.push.instructions :refer [get-stack-instructions]] [propeller.push.state :as state] @@ -18,6 +19,11 @@ Source: https://arxiv.org/pdf/2106.06086.pdf" (def train-and-test-data "Data taken from https://zenodo.org/record/5084812" (psb2/fetch-examples "data" "gcd" 200 2000)) + +(def train-and-test-data (psb2/fetch-examples "data" "gcd" 200 2000)) +(def train-data (:train train-and-test-data)) +(def test-data (:test train-and-test-data)) + (defn random-int "Random integer between -100 and 100" [] (- (rand-int 201) 100)) (defn map-vals-input @@ -82,8 +88,11 @@ Source: https://arxiv.org/pdf/2106.06086.pdf" (merge {:instructions instructions :error-function error-function - :training-data (:train train-and-test-data) - :testing-data (:test train-and-test-data) + :training-data train-data + :testing-data test-data + :case-t-size (count train-data) + :ds-parent-rate 0 + :ds-parent-gens 1 :max-generations 300 :population-size 1000 :max-initial-plushy-size 250 diff --git a/src/propeller/problems/UBall5D.cljc b/src/propeller/problems/UBall5D.cljc new file mode 100644 index 0000000..4334f1b --- /dev/null +++ b/src/propeller/problems/UBall5D.cljc @@ -0,0 +1,128 @@ +(ns propeller.problems.UBall5D + (:require [propeller.genome :as genome] + [propeller.push.interpreter :as interpreter] + [propeller.push.state :as state] + [propeller.tools.math :as math] + [propeller.gp :as gp] + #?(:cljs [cljs.reader :refer [read-string]]))) + +(defn- target-function + "Target function: f(x) = 10/(5 + SUM_i=1^5 (x_i - 3)^2)" + [x1 x2 x3 x4 x5] + (->> (list x1 x2 x3 x4 x5) + (map #(* (- % 3) (- % 3))) + (apply +) + (+ 5) + (/ 10))) + +(target-function 3 7 3 3 3) + +(defn map-vals-input + "Returns all the input values of a map" + [i] + (vals (select-keys i [:input1 :input2 :input3 :input4 :input5]))) + + +(defn get-random-input + "returns a random input between two ranges" + [a b] + (->> (rand) + (* (- b a)) + (+ a) + (float))) + +(def train-data + (repeatedly 1024 (fn [] (repeatedly 5 #(get-random-input 0.05 6.05))))) + +(def test-data + (repeatedly 5000 (fn [] (repeatedly 5 #(get-random-input -0.25 6.35))))) + +(def train-and-test-data + (let [train-inputs train-data + test-inputs test-data] + {:train (map (fn [x] {:input1 (first x) + :input2 (nth x 1) + :input3 (nth x 2) + :input4 (nth x 3) + :input5 (nth x 4) + :output1 (apply target-function (map #(nth x %) (range 5)))}) train-inputs) + :test (map (fn [x] {:input1 (first x) + :input2 (nth x 1) + :input3 (nth x 2) + :input4 (nth x 3) + :input5 (nth x 4) + :output1 (apply target-function (map #(nth x %) (range 5)))}) test-inputs)})) + +(def instructions + (list :in1 + :in2 + :in3 + :in4 + :in5 + :float_add + :float_subtract + :float_mult + :float_quot + :float_eq + :exec_dup + :exec_if + 'close + 0.0 + 1.0)) + +(def data (:train train-and-test-data)) + +(defn error-function + "Finds the behaviors and errors of an individual. The error is the absolute + deviation between the target output value and the program's selected behavior, + or 1000000 if no behavior is produced. The behavior is here defined as the + final top item on the FLOAT stack." + ([argmap data individual] + (let [program (genome/plushy->push (:plushy individual) argmap) + inputs (map (fn [i] (map-vals-input i)) data) + correct-outputs (map (fn [x] (:output1 x)) data) + outputs (map (fn [input] + (state/peek-stack + (interpreter/interpret-program + program + (assoc state/empty-state :input {:in1 (nth input 0) + :in2 (nth input 1) + :in3 (nth input 2) + :in4 (nth input 3) + :in5 (nth input 4)}) + (:step-limit argmap)) + :float)) + inputs) + errors (map (fn [correct-output output] + (if (= output :no-stack-item) + 1000000 + (math/abs (- correct-output output)))) + correct-outputs + outputs)] + (assoc individual + :behaviors outputs + :errors errors + :total-error #?(:clj (apply +' errors) + :cljs (apply + errors)))))) + +(defn -main + "Runs propel-gp, giving it a map of arguments." + [& args] + (gp/gp + (merge + {:instructions instructions + :error-function error-function + :training-data (:train train-and-test-data) + :testing-data (:test train-and-test-data) + :max-generations 300 + :population-size 1000 + :max-initial-plushy-size 100 + :step-limit 200 + :parent-selection :lexicase + :tournament-size 5 + :umad-rate 0.1 + :solution-error-threshold 0.1 + :variation {:umad 1.0 :crossover 0.0} + :elitism false} + (apply hash-map (map #(if (string? %) (read-string %) %) args)))) + (#?(:clj shutdown-agents))) diff --git a/src/propeller/problems/complex_regression.cljc b/src/propeller/problems/complex_regression.cljc index 6ce4e1c..f3b1af0 100644 --- a/src/propeller/problems/complex_regression.cljc +++ b/src/propeller/problems/complex_regression.cljc @@ -75,7 +75,11 @@ :population-size 500 :max-initial-plushy-size 100 :step-limit 200 - :parent-selection :lexicase + :parent-selection :ds-lexicase + :ds-function :case-maxmin + :downsample-rate 0.1 + :case-t-size 20 + :tournament-size 5 :umad-rate 0.1 :variation {:umad 1.0 :crossover 0.0} :elitism false} diff --git a/src/propeller/problems/data_creation.cljc b/src/propeller/problems/data_creation.cljc new file mode 100644 index 0000000..84a278d --- /dev/null +++ b/src/propeller/problems/data_creation.cljc @@ -0,0 +1,103 @@ +(ns propeller.problems.data-creation + (:require [psb2.core :as psb2] + [clojure.data.csv :as csv] + [clojure.java.io :as io] + [clojure.string :as s])) + +(defn generate-data [problem train-or-test] + (let [train-and-test-data (psb2/fetch-examples "data" problem 200 1000) + cleaned-data (cons (vector "input1" "output1") (map #(vector (:input1 %) (:output1 %)) ((keyword train-or-test) train-and-test-data)))] + (prn cleaned-data) + (with-open [writer (io/writer (str problem "-" train-or-test ".csv"))] + (csv/write-csv writer + (doall cleaned-data))))) + +(defn generate-data-gcd [train-or-test] + (let [train-and-test-data (psb2/fetch-examples "data" "gcd" 200 1000) + cleaned-data (cons (vector "input1" "input2" "output1") (map #(vector (:input1 %) (:input2 %) (:output1 %)) ((keyword train-or-test) train-and-test-data)))] + (prn cleaned-data) + (with-open [writer (io/writer (str "gcd-" train-or-test ".csv"))] + (csv/write-csv writer + (doall cleaned-data))))) + +(defn generate-data-find-pair [train-or-test] + (let [train-and-test-data (psb2/fetch-examples "data" "find-pair" 200 1000) + cleaned-data (cons (vector "input1" "input2" "output1" "output2") (map #(vector (:input1 %) (:input2 %) (:output1 %) (:output2 %)) ((keyword train-or-test) train-and-test-data)))] + (prn cleaned-data) + (with-open [writer (io/writer (str "find-pair-" train-or-test ".csv"))] + (csv/write-csv writer + (doall cleaned-data))))) + +(defn generate-data-for-problem [problem] + (map (partial generate-data problem) '["test" "train"])) + +(defn generate-data-for-all-problems [] + (map (partial generate-data-for-problem) '["gcd" + "find-pair"])) + + +;--------PSB1 + +(defn read-data [problem qual] + (with-open [reader (io/reader (str "src/propeller/problems/PSB1/" problem "-" qual ".csv"))] + (doall + (csv/read-csv reader)))) + +(defn edge-cases-for-problem [problem] + (read-data problem "edge")) + +(defn training-cases-for-problem [shuffled-data problem] + (let [edge-cases (edge-cases-for-problem problem) + left (- 201 (count edge-cases)) ;because labels is first + random-cases (take left shuffled-data)] + (concat edge-cases random-cases))) + +(defn testing-cases-for-problem [shuffled-data problem] + (take 1000 (drop 500 shuffled-data))) + +(defn save-train-test-data [shuffled-data problem train-or-test] + (with-open [writer (io/writer (str problem "-" train-or-test ".csv"))] + (csv/write-csv writer + (if (= train-or-test "train") + (training-cases-for-problem shuffled-data problem) + (testing-cases-for-problem shuffled-data problem))))) + +(defn save-data-for-problem [problem] + (let [shuffled-data (shuffle (rest (read-data problem "random")))] + (map (partial save-train-test-data shuffled-data problem) '["test" "train"]))) + +(defn save-data-for-all-problems [] + (map (partial save-data-for-problem) '["small-or-large" + "scrabble-score" + "grade" + "count-odds"])) + +(defn read-string-and-convert [elem] + (if (= elem "") + "" + (let [before (read-string elem)] + (if (symbol? before) + elem + before)))) + + +(defn read-data-formatted [problem train-or-test] + (apply list (with-open [reader (io/reader (str "picked/" problem "-" train-or-test ".csv"))] + (let [csv-data (csv/read-csv reader)] + (mapv zipmap + (->> (first csv-data) ;; First row is the header + (map keyword) ;; Drop if you want string keys instead + repeat) + (map (fn [elem] (map #(read-string-and-convert %) elem)) (rest csv-data))))))) + + +;scrabble-score doesn't play nice with read-string, hacky solution below +(defn scrabble-score-read-data-formatted [problem train-or-test] + (apply list (with-open [reader (io/reader (str "picked/" problem "-" train-or-test ".csv"))] + (let [csv-data (csv/read-csv reader)] + (prn {:csv-data csv-data}) + (mapv zipmap + (->> (first csv-data) ;; First row is the header + (map keyword) ;; Drop if you want string keys instead + repeat) + (map (fn [elem] (list (s/replace (first elem) "\\n" "\n") (read-string (second elem)))) (rest csv-data))))))) \ No newline at end of file diff --git a/src/propeller/problems/float_regression.cljc b/src/propeller/problems/float_regression.cljc index 86c7084..fa896ea 100644 --- a/src/propeller/problems/float_regression.cljc +++ b/src/propeller/problems/float_regression.cljc @@ -23,6 +23,10 @@ :float_subtract :float_mult :float_quot + :float_eq + :exec_dup + :exec_if + 'close 0.0 1.0)) diff --git a/src/propeller/problems/integer_regression.cljc b/src/propeller/problems/integer_regression.cljc index 4595c18..297198c 100644 --- a/src/propeller/problems/integer_regression.cljc +++ b/src/propeller/problems/integer_regression.cljc @@ -12,12 +12,15 @@ (+ (* x x x) (* 2 x x) x 3)) (def train-and-test-data + "Training data: Inputs and outputs with -10 <= x < 11 + Test data: Inputs and outputs of -20 <= x < -10 and 11 <= x < 21" (let [train-inputs (range -10 11) test-inputs (concat (range -20 -10) (range 11 21))] {:train (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) train-inputs) :test (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) test-inputs)})) (def instructions + "stack-specific instructions, input instructions, close, and constants" (list :in1 :integer_add :integer_subtract diff --git a/src/propeller/problems/simple_classification.cljc b/src/propeller/problems/simple_classification.cljc index fab4a50..76e6b34 100644 --- a/src/propeller/problems/simple_classification.cljc +++ b/src/propeller/problems/simple_classification.cljc @@ -87,11 +87,17 @@ :error-function error-function :training-data (:train train-and-test-data) :testing-data (:test train-and-test-data) + :case-t-size (count (:train train-and-test-data)) + :ds-parent-rate 0 + :ds-parent-gens 1 + :ds-function :case-rand :max-generations 500 :population-size 500 :max-initial-plushy-size 100 :step-limit 200 :parent-selection :lexicase + :downsample? false + :tournament-size 5 :umad-rate 0.1 :variation {:umad 1.0 :crossover 0.0} :elitism false} diff --git a/src/propeller/problems/simple_regression.cljc b/src/propeller/problems/simple_regression.cljc deleted file mode 100755 index cc67fc4..0000000 --- a/src/propeller/problems/simple_regression.cljc +++ /dev/null @@ -1,90 +0,0 @@ -(ns propeller.problems.simple-regression - "Simple Regression: - -Given inputs and outputs, find the target function." - {:doc/format :markdown} - (:require [propeller.genome :as genome] - [propeller.push.interpreter :as interpreter] - [propeller.push.state :as state] - [propeller.tools.math :as math] - [propeller.gp :as gp] - #?(:cljs [cljs.reader :refer [read-string]]))) - -(defn- target-function - "Target function: f(x) = x^3 + x + 3" - [x] - (+ (* x x x) x 3)) - -(def train-and-test-data - "Training data: Inputs and outputs with -10 <= x < 11 - - Test data: Inputs and outputs of -20 <= x < -10 and 11 <= x < 21" - (let [train-inputs (range -10 11) - test-inputs (concat (range -20 -10) (range 11 21))] - {:train (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) train-inputs) - :test (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) test-inputs)})) - -(def instructions - "stack-specific instructions, input instructions, close, and constants" - (list :in1 - :integer_add - :integer_subtract - :integer_mult - :integer_quot - :integer_eq - :exec_dup - :exec_if - 'close - 0 - 1)) - -(defn error-function - "Finds the behaviors and errors of an individual. The error is the absolute - deviation between the target output value and the program's selected behavior, - or 1000000 if no behavior is produced. The behavior is here defined as the - final top item on the INTEGER stack." - ([argmap data individual] - (let [program (genome/plushy->push (:plushy individual) argmap) - inputs (map (fn [x] (first (:input1 x))) data) - correct-outputs (map (fn [x] (first (:output1 x))) data) - outputs (map (fn [input] - (state/peek-stack - (interpreter/interpret-program - program - (assoc state/empty-state :input {:in1 input}) - (:step-limit argmap)) - :integer)) - inputs) - errors (map (fn [correct-output output] - (if (= output :no-stack-item) - 1000000 - (math/abs (- correct-output output)))) - correct-outputs - outputs)] - (assoc individual - :behaviors outputs - :errors errors - :total-error #?(:clj (apply +' errors) - :cljs (apply + errors)))))) - -(defn -main - "Runs the top-level genetic programming function, giving it a map of - arguments with defaults that can be overridden from the command line - or through a passed map." - [& args] - (gp/gp - (merge - {:instructions instructions - :error-function error-function - :training-data (:train train-and-test-data) - :testing-data (:test train-and-test-data) - :max-generations 500 - :population-size 500 - :max-initial-plushy-size 100 - :step-limit 200 - :parent-selection :lexicase - :tournament-size 5 - :umad-rate 0.1 - :variation {:umad 0.5 :crossover 0.5} - :elitism false} - (apply hash-map (map #(if (string? %) (read-string %) %) args))))) diff --git a/src/propeller/problems/string_classification.cljc b/src/propeller/problems/string_classification.cljc index 7174424..46c37f4 100755 --- a/src/propeller/problems/string_classification.cljc +++ b/src/propeller/problems/string_classification.cljc @@ -105,6 +105,6 @@ Given a string, return true if it contains A, C, G, and T. Else return false." :parent-selection :lexicase :tournament-size 5 :umad-rate 0.1 - :variation {:umad 0.5 :crossover 0.5} + :variation {:umad 1.0 :crossover 0.0} :elitism false} (apply hash-map (map #(if (string? %) (read-string %) %) args))))) diff --git a/src/propeller/selection.cljc b/src/propeller/selection.cljc old mode 100755 new mode 100644 index 2bb7710..294d3c9 --- a/src/propeller/selection.cljc +++ b/src/propeller/selection.cljc @@ -30,6 +30,23 @@ survivors) (rest cases)))))) +(defn fitness-proportionate-selection + "Selects an individual from the population using a fitness proportionate selection." + [pop argmap] + (let [pop-fits (->> pop ;convert from error to fitness, where fitness (probability) is (1/ (1+ tot_err)) + (map #(assoc % :fitness (/ 1 (inc (:total-error %)))))) + pop-total-fit (->> pop-fits + (map :fitness) + (reduce +)) + random-num (* (rand) pop-total-fit) + sorted-by-fitness (->> pop-fits + (sort-by :fitness) + (reverse))] + (loop [tot (:fitness (first sorted-by-fitness)) individuals sorted-by-fitness] + (if (< random-num tot) + (first individuals) + (recur (+ tot (:fitness (first (rest individuals)))) (rest individuals)))))) + (defn motley-batch-lexicase-selection "Selects an individual from the population using motley batch lexicase selection. Cases are combined in random collections of max size (:max-batch-size argmap)." @@ -91,5 +108,6 @@ (case (:parent-selection argmap) :tournament (tournament-selection pop argmap) :lexicase (lexicase-selection pop argmap) + :roulette (fitness-proportionate-selection pop argmap) :epsilon-lexicase (epsilon-lexicase-selection pop argmap) :motley-batch-lexicase (motley-batch-lexicase-selection pop argmap))) diff --git a/src/propeller/simplification.cljc b/src/propeller/simplification.cljc index 828ff93..e1c360a 100644 --- a/src/propeller/simplification.cljc +++ b/src/propeller/simplification.cljc @@ -26,8 +26,7 @@ The output with verbose adds the following lines to the output: (:require [propeller.genome :as genome] [propeller.push.interpreter :as interpreter] [propeller.push.state :as state] - [propeller.tools.math :as math] - )) + [propeller.tools.math :as math])) (defn choose-random-k "Takes k random indices" diff --git a/src/propeller/tools/math.cljc b/src/propeller/tools/math.cljc old mode 100755 new mode 100644 index 4780c2e..6102316 --- a/src/propeller/tools/math.cljc +++ b/src/propeller/tools/math.cljc @@ -7,6 +7,11 @@ (defonce ^{:no-doc true :const true} E #?(:clj Math/E :cljs js/Math.PI)) +(defn step + "returns 1 if number is nonzero, 0 otherwise" + [x] + (if (zero? x) 0 1)) + (defn mean "Returns the mean." [coll] @@ -132,3 +137,8 @@ [x] #?(:clj (Math/tan x) :cljs (js/Math.tan x))) + +(defn transpose + "returns a vector containing the transpose of a coll of colls" + [x] + (apply map vector x)) \ No newline at end of file diff --git a/src/propeller/tools/metrics.cljc b/src/propeller/tools/metrics.cljc index 9a051e1..3863e6a 100755 --- a/src/propeller/tools/metrics.cljc +++ b/src/propeller/tools/metrics.cljc @@ -2,11 +2,47 @@ "Functions to measure things." (:require [propeller.tools.math :as math])) +(defn argmins + "returns the indice(s) of the minimum value of a list. Could be more efficient, probably" + [coll] + (if (empty? coll) '() + (let [m (apply min coll)] + (keep-indexed #(when (= m %2) %1) coll)))) + +(defn argmax-last + "returns the index of the maximum value in a list, tiebreaking last" + [coll] + (->> coll + (map-indexed vector) + (apply max-key second) + first)) + +(defn argmax + "returns the index of the maximum value in a list, randomly tiebreaking" + [coll] + (if (zero? (count coll)) + :null + (->> coll + (map-indexed vector) + (filter #(= (apply max coll) (second %))) + rand-nth + first))) + (defn mean "Returns the mean of a collection." [coll] (if (empty? coll) 0 (math/div (apply + coll) (count coll)))) +(defn mean-of-colls + "returns the mean of multiple colls" + [coll] + (map mean (math/transpose coll))) + +(defn min-of-colls + "returns the smallest value of multiple colls" + [coll] + (map #(apply min %) (math/transpose coll))) + (defn median "Returns the median of a collection." [coll] @@ -29,12 +65,12 @@ "computes the next row using the prev-row current-element and the other seq" [prev-row current-element other-seq pred] (reduce - (fn [row [diagonal above other-element]] - (let [update-val (if (pred other-element current-element) + (fn [row [diagonal above other-element]] + (let [update-val (if (pred other-element current-element) ;; if the elements are deemed equivalent according to the predicate ;; pred, then no change has taken place to the string, so we are ;; going to set it the same value as diagonal (which is the previous edit-distance) - diagonal + diagonal ;; in the case where the elements are not considered equivalent, then we are going ;; to figure out if its a substitution (then there is a change of 1 from the previous ;; edit distance) thus the value is diagonal + 1 or if its a deletion, then the value @@ -42,18 +78,18 @@ ;; of last of row + 1 (since we will be using vectors, peek is more efficient) ;; or it could be a case of insertion, then the value is above+1, and we chose ;; the minimum of the three - (inc (min diagonal above (peek row))))] + (inc (min diagonal above (peek row))))] - (conj row update-val))) + (conj row update-val))) ;; we need to initialize the reduce function with the value of a row, since we are ;; constructing this row from the previous one, the row is a vector of 1 element which ;; consists of 1 + the first element in the previous row (edit distance between the prefix so far ;; and an empty string) - [(inc (first prev-row))] + [(inc (first prev-row))] ;; for the reduction to go over, we need to provide it with three values, the diagonal ;; which is the same as prev-row because it starts from 0, the above, which is the next element ;; from the list and finally the element from the other sequence itself. - (map vector prev-row (next prev-row) other-seq))) + (map vector prev-row (next prev-row) other-seq))) (defn levenshtein-distance "Levenshtein Distance - http://en.wikipedia.org/wiki/Levenshtein_distance @@ -66,17 +102,17 @@ (empty? (str a)) (count (str b)) ;; sometimes stack pushes numbers, force (empty? (str b)) (count (str a)) ;; a and b to be strings :else (peek - (reduce + (reduce ;; we use a simple reduction to convert the previous row into the ;; next-row using the compute-next-row which takes a current ;; element, the previous-row computed so far, and the predicate ;; to compare for equality - (fn [prev-row current-element] - (compute-next-row prev-row current-element (str b) p)) + (fn [prev-row current-element] + (compute-next-row prev-row current-element (str b) p)) ;; we need to initialize the prev-row with the edit distance ;; between the various prefixes of b and the empty string - (range (inc (count (str b)))) - (str a))))) + (range (inc (count (str b)))) + (str a))))) (defn sequence-similarity "Returns a number between 0 and 1, indicating how similar the sequences are diff --git a/src/propeller/utils.cljc b/src/propeller/utils.cljc index 43afafd..07202e4 100755 --- a/src/propeller/utils.cljc +++ b/src/propeller/utils.cljc @@ -3,6 +3,20 @@ (:require [clojure.zip :as zip] [clojure.repl :as repl])) +(defn filter-by-index + "filters a collection by a list of indices" + [coll idxs] + ;(prn {:func :filter-by-index :coll coll :idxs idxs}) + (map (partial nth coll) idxs)) + +(defn drop-nth + "drops the nth element from a collection" + [n coll] + ;(prn {:func :drop-nth :n n :coll coll}) + (concat + (take n coll) + (nthrest coll (inc n)))) + (defn first-non-nil "Returns the first non-nil values from the collection, or returns `nil` if the collection is empty or only contains `nil`." diff --git a/src/propeller/variation.cljc b/src/propeller/variation.cljc old mode 100755 new mode 100644 index 6ffbccb..d42d95f --- a/src/propeller/variation.cljc +++ b/src/propeller/variation.cljc @@ -254,6 +254,8 @@ The function `new-individual` returns a new individual produced by selection and "Returns a new individual produced by selection and variation of individuals in the population." [pop argmap] + (let [umad-parent (selection/select-parent pop argmap) + parent-ind (:index umad-parent)] ;this is a hack to log hyperselection, only works for umad {:plushy (let [r (rand) op (loop [accum 0.0 @@ -264,7 +266,7 @@ The function `new-individual` returns a new individual produced by selection and (if (>= (+ accum prob1) r) op1 (recur (+ accum prob1) - (rest ops-probs))))))] + (rest ops-probs))))))] (case op :crossover (crossover @@ -381,4 +383,4 @@ The function `new-individual` returns a new individual produced by selection and :else (throw #?(:clj (Exception. (str "No match in new-individual for " op)) :cljs (js/Error - (str "No match in new-individual for " op))))))}) + (str "No match in new-individual for " op))))))})) \ No newline at end of file diff --git a/test/propeller/push/downsample_test.cljc b/test/propeller/push/downsample_test.cljc new file mode 100644 index 0000000..3beb56b --- /dev/null +++ b/test/propeller/push/downsample_test.cljc @@ -0,0 +1,163 @@ +(ns propeller.push.downsample-test + (:require [clojure.test :as t] + [propeller.utils :as u] + [propeller.simplification :as s] + [propeller.downsample :as ds] + [propeller.hyperselection :as hs])) + + +(t/deftest assign-indices-to-data-test + (t/testing "assign-indices-to-data" + (t/testing "should return a map of the same length" + (t/is (= (count (ds/assign-indices-to-data (range 10))) 10)) + (t/is (= (count (ds/assign-indices-to-data (range 0))) 0))) + (t/testing "should return a map where each element has an index key" + (t/is (every? #(:index %) (ds/assign-indices-to-data (map #(assoc {} :input %) (range 10)))))) + (t/testing "should return distinct indices" + (t/is (= (map #(:index %) (ds/assign-indices-to-data (range 10))) (range 10)))))) + +(t/deftest select-downsample-random-test + (t/testing "select-downsample-random" + (t/testing "should select the correct amount of elements" + (t/is (= (count (ds/select-downsample-random (range 10) {:downsample-rate 0.1})) 1)) + (t/is (= (count (ds/select-downsample-random (range 10) {:downsample-rate 0.2})) 2)) + (t/is (= (count (ds/select-downsample-random (range 10) {:downsample-rate 0.5})) 5))) + (t/testing "should not return duplicate items (when called with set of numbers)" + (t/is (= (count (set (ds/select-downsample-random (range 10) {:downsample-rate 0.1}))) 1)) + (t/is (= (count (set (ds/select-downsample-random (range 10) {:downsample-rate 0.2}))) 2)) + (t/is (= (count (set (ds/select-downsample-random (range 10) {:downsample-rate 0.5}))) 5))) + (t/testing "should round down the number of elements selected if not whole" + (t/is (= (count (ds/select-downsample-random (range 3) {:downsample-rate 0.5})) 1)) + (t/is (= (count (ds/select-downsample-random (range 1) {:downsample-rate 0.5})) 0))) + (t/testing "should not return more elements than available" + (t/is (= (count (ds/select-downsample-random (range 10) {:downsample-rate 2})) 10)) + (t/is (= (count (ds/select-downsample-random (range 10) {:downsample-rate 1.5})) 10))))) + +(t/deftest get-distance-between-cases-test + (t/testing "get-distance-between-cases" + (t/testing "should return correct distance" + (t/is (= 3 (ds/get-distance-between-cases '((0 1 1) (0 1 1) (1 0 1)) 0 1)))) + (t/testing "should return 0 for the distance of a case to itself" + (t/is (= 0 (ds/get-distance-between-cases '((0 1 1) (0 1 1) (1 0 1)) 0 0)))) + (t/testing "should work for non binary values (0 is solved)" + (t/is (= 1 (ds/get-distance-between-cases '((0 2 2) (0 2 2) (1 0 50)) 1 2)))) + (t/testing "should return the max distance if one of the cases does not exist" + (t/is (= 3 (ds/get-distance-between-cases '((0 1 1) (0 1 1) (1 0 1)) 0 4)))))) + +(t/deftest merge-map-lists-at-index-test + (t/testing "merge-map-lists-at-index" + (t/testing "works properly" + (t/is (= '({:index 0 :a 3 :b 2} {:index 1 :a 2 :b 3}) (ds/merge-map-lists-at-index '({:index 0 :a 3 :b 2} {:index 1 :a 1 :b 2}) '({:index 1 :a 2 :b 3}))))) + (t/testing "doesn't change big list if no indices match" + (t/is (= '({:index 0 :a 3 :b 2} {:index 1 :a 1 :b 2}) (ds/merge-map-lists-at-index '({:index 0 :a 3 :b 2} {:index 1 :a 1 :b 2}) '({:index 3 :a 2 :b 3}))))) + (t/testing "doesn't fail on empty list" + (t/is (= '() (ds/merge-map-lists-at-index '() '())))) + (t/testing "shouldn't fail merging non-empty with empty" + (t/is (= '({:index 0 :a 3 :b 2} {:index 1 :a 1 :b 2}) (ds/merge-map-lists-at-index '({:index 0 :a 3 :b 2} {:index 1 :a 1 :b 2}) '())))))) + +(t/deftest update-at-indices-test + (t/testing "update-at-indices" + (t/testing "should update at correct indices" + (t/is (= (ds/update-at-indices [1 2 3 4] [5] [0]) [5 2 3 4])) + (t/is (= (ds/update-at-indices [1 2 3 4] [5] [0]) [5 2 3 4]))) + (t/testing "should update nothing if index list is empty" + (t/is (= (ds/update-at-indices [6 5 4 0 0] [] []) [6 5 4 0 0]))) + (t/testing "should update nothing if index list is out of bounds" + (t/is (= (ds/update-at-indices [6 5 4 0 0] [4 5 1] [-1 5 6]) [6 5 4 0 0]))) + (t/testing "should update only when indices are available (length mismatch)" + (t/is (= (ds/update-at-indices [6 5 4 0 0] [1 2 3 4] [0 1]) [1 2 4 0 0]))) + (t/testing "should not care about index order" + (t/is (= (ds/update-at-indices [6 5 4 0 0] [2 1] [1 0]) [1 2 4 0 0]))) + (t/testing "should work when input is a list" + (t/is (= (ds/update-at-indices '(6 5 4 0 0) '(2 1) '(1 0)) [1 2 4 0 0]))))) + + +(t/deftest ids-types-test + (t/testing "replace-close-zero-with-zero" + (t/testing "should replace the close to zero values with zero" + (t/is (= (ds/replace-close-zero-with-zero '(0.1 2 3 4 0.1 2 3 4) 0.2) '(0 2 3 4 0 2 3 4))) + (t/is (= (ds/replace-close-zero-with-zero '(0.1 0.1) 0.0) '(0.1 0.1))) + (t/is (= (ds/replace-close-zero-with-zero '(100 100 200) 100) '(0 0 200)))))) + +(t/deftest update-case-distances-test + (t/testing "update-case-distances" + (t/testing "should update correctly when fewer errors than all" + (t/is (= (ds/update-case-distances '({:errors (0 0)} {:errors (0 0)}) + '({:index 3 :distances [2 2 2 2 2]} {:index 4 :distances [2 2 2 2 2]}) + '({:index 0 :distances [2 2 2 2 2]} {:index 1 :distances [2 2 2 2 2]} {:index 2 :distances [2 2 2 2 2]} {:index 3 :distances [2 2 2 2 2]} {:index 4 :distances [2 2 2 2 2]}) + :solved) + '({:index 0 :distances [2 2 2 2 2]} {:index 1 :distances [2 2 2 2 2]} {:index 2 :distances [2 2 2 2 2]} + {:index 3 :distances [2 2 2 0 0]} {:index 4 :distances [2 2 2 0 0]})))) + (t/testing "should update correctly when same errors as all" + (t/is (= (ds/update-case-distances '({:errors (0 0 0 0 0)} {:errors (0 0 0 0 0)}) + '({:index 0 :distances [2 2 2 2 2]} {:index 1 :distances [2 2 2 2 2]} {:index 2 :distances [2 2 2 2 2]} {:index 3 :distances [2 2 2 2 2]} {:index 4 :distances [2 2 2 2 2]}) + '({:index 0 :distances [2 2 2 2 2]} {:index 1 :distances [2 2 2 2 2]} {:index 2 :distances [2 2 2 2 2]} {:index 3 :distances [2 2 2 2 2]} {:index 4 :distances [2 2 2 2 2]}) + :solved) + '({:index 0 :distances [0 0 0 0 0]} {:index 1 :distances [0 0 0 0 0]} {:index 2 :distances [0 0 0 0 0]} + {:index 3 :distances [0 0 0 0 0]} {:index 4 :distances [0 0 0 0 0]})))) + (t/testing "should update correctly for elite/not-elite" + (t/is (= (ds/update-case-distances '({:errors (1 1 1 2 2)} {:errors (2 2 2 1 1)}) + '({:index 0 :distances [2 2 2 2 2]} {:index 1 :distances [2 2 2 2 2]} {:index 2 :distances [2 2 2 2 2]} {:index 3 :distances [2 2 2 2 2]} {:index 4 :distances [2 2 2 2 2]}) + '({:index 0 :distances [2 2 2 2 2]} {:index 1 :distances [2 2 2 2 2]} {:index 2 :distances [2 2 2 2 2]} {:index 3 :distances [2 2 2 2 2]} {:index 4 :distances [2 2 2 2 2]}) + :elite) + '({:index 0 :distances [0 0 0 2 2]} {:index 1 :distances [0 0 0 2 2]} {:index 2 :distances [0 0 0 2 2]} + {:index 3 :distances [2 2 2 0 0]} {:index 4 :distances [2 2 2 0 0]}))) + ))) + +(t/deftest case-maxmin-test + (t/testing "case-maxmin selects correct downsample" + (let [selected (ds/select-downsample-maxmin + '({:input1 [0] :output1 [10] :index 0 :distances [0 5 0 0 0]} + {:input1 [1] :output1 [11] :index 1 :distances [5 0 5 5 5]} + {:input1 [2] :output1 [12] :index 2 :distances [0 5 0 0 0]} + {:input1 [3] :output1 [13] :index 3 :distances [0 5 0 0 0]} + {:input1 [4] :output1 [14] :index 4 :distances [0 5 0 0 0]}) + {:downsample-rate 0.4})] + (prn {:selected selected}) + (t/is (or (= (:index (first selected)) 1) (= (:index (second selected)) 1)))))) + +(t/deftest case-maxmin-adaptive + (t/testing "case-maxmin-adaptive selects correct downsample simple" + (let [selected (ds/select-downsample-maxmin-adaptive + '({:input1 [0] :output1 [10] :index 0 :distances [0 5 0 0 0]} + {:input1 [1] :output1 [11] :index 1 :distances [5 0 5 5 5]} + {:input1 [2] :output1 [12] :index 2 :distances [0 5 0 0 0]} + {:input1 [3] :output1 [13] :index 3 :distances [0 5 0 0 0]} + {:input1 [4] :output1 [14] :index 4 :distances [0 5 0 0 0]}) + {:case-delta 0})] + (prn {:selected selected}) + (t/is (or (= (:index (first selected)) 1) (= (:index (second selected)) 1))) + (t/is (= 2 (count selected))))) + (t/testing "case-maxmin-adaptive selects correct downsample when all identical" + (let [selected (ds/select-downsample-maxmin-adaptive + '({:input1 [0] :output1 [10] :index 0 :distances [0 0 0 0 0]} + {:input1 [1] :output1 [11] :index 1 :distances [0 0 0 0 0]} + {:input1 [2] :output1 [12] :index 2 :distances [0 0 0 0 0]} + {:input1 [3] :output1 [13] :index 3 :distances [0 0 0 0 0]} + {:input1 [4] :output1 [14] :index 4 :distances [0 0 0 0 0]}) + {:case-delta 0})] + (prn {:selected selected}) + (t/is (= 1 (count selected)))))) + + +(t/deftest hyperselection-test + (let [parents1 '({:blah 3 :index 1} {:blah 3 :index 1} + {:blah 3 :index 1} {:blah 3 :index 2}) + parents2 '({:plushy 2 :index 0} {:blah 3 :index 2} + {:blah 3 :index 3} {:index 4}) + emptyparents '({:blah 1} {:blah 1} {:blah 1})] + (t/testing "sum-list-map-indices function works correctly" + (t/is (= {1 3, 2 1} (hs/sum-list-map-indices parents1))) + (t/is (= {0 1, 2 1, 3 1, 4 1} (hs/sum-list-map-indices parents2)))) + (t/testing "ordered-freqs function works correctly" + (t/is (= '(3 1) (hs/ordered-freqs (hs/sum-list-map-indices parents1)))) + (t/is (= '(1 1 1 1) (hs/ordered-freqs (hs/sum-list-map-indices parents2))))) + (t/testing "hyperselection-track works correctly" + (t/is (= '(0.75 0.25) (hs/hyperselection-track parents1))) + (t/is (= '(0.25 0.25 0.25 0.25) (hs/hyperselection-track parents2)))) + (t/testing "reindex-pop works correctly" + (t/is (= '({:blah 3 :index 0} {:blah 3 :index 1} + {:blah 3 :index 2} {:blah 3 :index 3}) (hs/reindex-pop parents1))) + (t/is (= '({:plushy 2 :index 0} {:blah 3 :index 1} + {:blah 3 :index 2} {:index 3}) (hs/reindex-pop parents2))) + (t/is (= '({:blah 1 :index 0} {:blah 1 :index 1} {:blah 1 :index 2}) (hs/reindex-pop emptyparents)))))) diff --git a/test/propeller/selection_test.cljc b/test/propeller/selection_test.cljc new file mode 100644 index 0000000..cbb44e2 --- /dev/null +++ b/test/propeller/selection_test.cljc @@ -0,0 +1,23 @@ +(ns propeller.selection-test + (:require [clojure.test :as t] + [propeller.selection :as s])) + + +(t/deftest roulette-selection-test + (t/testing "fitness proportionate selection" + (t/testing "should correctly define the probabilities of selection" + (t/is (let [ret (s/fitness-proportionate-selection '({:index 0 :total-error 0} + {:index 1 :total-error 1} + {:index 2 :total-error 1} + {:index 3 :total-error 1}) {:empty :argmap})] + (case (:index ret) + 0 (= (:fitness ret) 1) ;if we selected index 0, check that fitness is correctly calculated to 1 + (= (float (:fitness ret)) 0.5) + )))) + (t/testing "should always return the same individual if there is only one" + (t/testing "desipte it having bad error" + (t/is (= (:index (s/fitness-proportionate-selection '({:index 99 :total-error 109012390123}) {:empty :argmap})) + 99))) + (t/testing "when it has low error" + (t/is (= (:index (s/fitness-proportionate-selection '({:index 22 :total-error 0}) {:empty :argmap})) + 22)))))) \ No newline at end of file diff --git a/test/propeller/tools/metrics_test.cljc b/test/propeller/tools/metrics_test.cljc index 686856b..011e389 100644 --- a/test/propeller/tools/metrics_test.cljc +++ b/test/propeller/tools/metrics_test.cljc @@ -3,6 +3,22 @@ [propeller.tools.metrics :as m] [propeller.tools.math :as a])) +(t/deftest argmax-test + (t/is (= (m/argmax '(1 2 3 4)) 3)) + (t/is (= 1 (nth '(1 1 1 1) (m/argmax '(1 1 1 1))))) + (t/is (= 3 (nth '(1 3 3 1) (m/argmax '(1 3 3 1))))) + (t/is (= 0 (m/argmax '(1))))) + +(t/deftest mean-of-colls-test + (t/is (= (m/mean-of-colls '((1 2 3 4) (4 3 2 1))) '(2.5 2.5 2.5 2.5))) + (t/is (= (m/mean-of-colls '((1 2 3) (4 3 2 1))) '(2.5 2.5 2.5))) + (t/is (= (m/mean-of-colls '((1))) '(1.0)))) + +(t/deftest min-of-colls-test + (t/is (= (m/min-of-colls '((1 2 3 4) (4 3 2 1))) '(1 2 2 1))) + (t/is (= (m/min-of-colls '((1 2 3) (4 3 2 1))) '(1 2 2))) + (t/is (= (m/min-of-colls '((1))) '(1)))) + (t/deftest mean-test (t/is (= (m/mean '(1 2 3 4)) 2.5)) (t/is (= (m/mean '()) 0))) @@ -13,9 +29,18 @@ ;(t/is (= (m/median '()) 0.0)) ) +(t/deftest argmins-test + (t/is (= (m/argmins '(1 2 3 4)) '(0))) + (t/is (= (m/argmins '(1 1 3 4)) '(0 1))) + (t/is (= (m/argmins '()) '())) + (t/is (= (m/argmins '(3 4 5 6 6 6)) '(0))) + (t/is (= (m/argmins '(6 4 5 6 6 6)) '(1))) + (t/is (= (m/argmins '(0 4 5 0 0 0)) '(0 3 4 5)))) + + (t/deftest levenshtein-distance-test (t/is (= (m/levenshtein-distance "kitten" "sipping") 5)) - (t/is (= (m/levenshtein-distance "" "hello")) 5)) + (t/is (= (m/levenshtein-distance "" "hello") 5))) (t/deftest sequence-similarity-test (t/is (a/approx= (m/sequence-similarity "kitten" "sipping") 0.2857 0.001)) diff --git a/test/propeller/utils_test.cljc b/test/propeller/utils_test.cljc index f8b4567..cc36fec 100644 --- a/test/propeller/utils_test.cljc +++ b/test/propeller/utils_test.cljc @@ -1,7 +1,9 @@ (ns propeller.utils-test (:require [clojure.test :as t] [propeller.utils :as u] - [propeller.simplification :as s])) + [propeller.simplification :as s] + [propeller.downsample :as ds] + [propeller.hyperselection :as hs])) (t/deftest first-non-nil-test (t/is (= 1 (u/first-non-nil '(1 2 3)))) @@ -22,57 +24,62 @@ (t/deftest random-instruction-test (t/is - (letfn [(instruct [] 1)] - (let [test (u/random-instruction [instruct 2])] - (if (= 1 test) - true - (= 2 test)))))) + (letfn [(instruct [] 1)] + (let [test (u/random-instruction [instruct 2])] + (if (= 1 test) + true + (= 2 test)))))) (t/deftest count-points-test (t/is (= 6 (u/count-points '(:a :b (:c :d))))) (t/is (= 1 (u/count-points '()))) (t/is (= 2 (u/count-points '(:a))))) -(t/testing "choose-random-k" - (t/testing "should return indices that are a member of the original array" - (t/is (every? identity (map #(contains? (set (range 10)) %) (s/choose-random-k 3 (range 10)))))) - (t/testing "should return a list of size k" - (t/is (= (count (s/choose-random-k 7 (range 10))) 7)))) +(t/deftest choose-random-k-test + (t/testing "choose-random-k" + (t/testing "should return indices that are a member of the original array" + (t/is (every? identity (map #(contains? (set (range 10)) %) (s/choose-random-k 3 (range 10)))))) + (t/testing "should return a list of size k" + (t/is (= (count (s/choose-random-k 7 (range 10))) 7))))) -(t/testing "delete-at-indices" - (t/testing "should actually remove indicated items" - (t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 3) '(:hi0 :hi1 :hi2 :hi3))))) - (t/testing "should work with numerical indices" - (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0) '(:hi0 :hi1 :hi2 :hi3))))) - (t/testing "should not delete anything for index out of bounds" - (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 10) '(:hi0 :hi1 :hi2 :hi3)))) - (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 -10) '(:hi0 :hi1 :hi2 :hi3)))) - (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(-0 -10) '(:hi0 :hi1 :hi2 :hi3))))) - (t/testing "should only delete at single index once" - (t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 0 0 0 3 3 3) '(:hi0 :hi1 :hi2 :hi3))))) - (t/testing "should return empty list when deleting from empty list" - (t/is (= '() (s/delete-at-indices '(0) '())))) - (t/testing "should be able to delete at arbitrary indices" - (t/is (= (count (s/delete-at-indices (s/choose-random-k 3 (range 10)) (range 10))) 7)))) -(t/testing "delete-random-k" - (t/testing "should remove the correct amount of items" - (t/is (= (count (s/delete-k-random 3 (range 10))) 7)) - (t/is (= (count (s/delete-k-random 10 (range 10))) 0)) - (t/is (= (count (s/delete-k-random 0 (range 10))) 10))) - (t/testing "should not fail if k >> size of collection" - (t/is (= (count (s/delete-k-random 300 (range 10))) 0)) - (t/is (= (s/delete-k-random 300 '(:hi1 :hi2 :hi3)) '()))) - (t/testing "should not fail if the collection is empty" - (t/is (= (count (s/delete-k-random 300 '())) 0)) - (t/is (= (count (s/delete-k-random 0 '())) 0))) - (t/testing "should maintain order of the remaining items" - (t/is (apply < (s/delete-k-random 3 (range 10)))))) +(t/deftest delete-at-indices-test + (t/testing "delete-at-indices" + (t/testing "should actually remove indicated items" + (t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 3) '(:hi0 :hi1 :hi2 :hi3))))) + (t/testing "should work with numerical indices" + (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0) '(:hi0 :hi1 :hi2 :hi3))))) + (t/testing "should not delete anything for index out of bounds" + (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 10) '(:hi0 :hi1 :hi2 :hi3)))) + (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 -10) '(:hi0 :hi1 :hi2 :hi3)))) + (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(-0 -10) '(:hi0 :hi1 :hi2 :hi3))))) + (t/testing "should only delete at single index once" + (t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 0 0 0 3 3 3) '(:hi0 :hi1 :hi2 :hi3))))) + (t/testing "should return empty list when deleting from empty list" + (t/is (= '() (s/delete-at-indices '(0) '())))) + (t/testing "should be able to delete at arbitrary indices" + (t/is (= (count (s/delete-at-indices (s/choose-random-k 3 (range 10)) (range 10))) 7))))) -(t/testing "auto-simplify-plushy" - (t/testing "should handle having an empty plushy" - (t/is (= (s/auto-simplify-plushy '() (fn [argmap data plushy] 0) {:simplification-steps 100 :simplification-k 4 :simplification-verbose? false}) '()))) - (let [plushy '(:exec_dup 1 :integer_add close :in1 :integer_add 0 :in1 :in1 :integer_mult :integer_add)] - (t/testing "should decrease size of plushy that always has perfect scores" - (t/is (< (count (s/auto-simplify-plushy plushy (fn [argmap data plushy] 0) {:simplification-steps 100 :simplification-k 4 :simplification-verbose? false})) (count plushy))) - (t/is (< (count (s/auto-simplify-plushy plushy (fn [argmap data plushy] 0) {:simplification-steps 100 :simplification-k 10 :simplification-verbose? false})) (count plushy)))))) \ No newline at end of file +(t/deftest delete-random-k-test + (t/testing "delete-random-k" + (t/testing "should remove the correct amount of items" + (t/is (= (count (s/delete-k-random 3 (range 10))) 7)) + (t/is (= (count (s/delete-k-random 10 (range 10))) 0)) + (t/is (= (count (s/delete-k-random 0 (range 10))) 10))) + (t/testing "should not fail if k >> size of collection" + (t/is (= (count (s/delete-k-random 300 (range 10))) 0)) + (t/is (= (s/delete-k-random 300 '(:hi1 :hi2 :hi3)) '()))) + (t/testing "should not fail if the collection is empty" + (t/is (= (count (s/delete-k-random 300 '())) 0)) + (t/is (= (count (s/delete-k-random 0 '())) 0))) + (t/testing "should maintain order of the remaining items" + (t/is (apply < (s/delete-k-random 3 (range 10))))))) + +(t/deftest auto-simplify-plushy-test + (t/testing "auto-simplify-plushy" + (t/testing "should handle having an empty plushy" + (t/is (= (s/auto-simplify-plushy '() (fn [argmap data plushy] 0) {:simplification-steps 100 :simplification-k 4 :simplification-verbose? false}) '()))) + (let [plushy '(:exec_dup 1 :integer_add close :in1 :integer_add 0 :in1 :in1 :integer_mult :integer_add)] + (t/testing "should decrease size of plushy that always has perfect scores" + (t/is (< (count (s/auto-simplify-plushy plushy (fn [argmap data plushy] 0) {:simplification-steps 100 :simplification-k 4 :simplification-verbose? false})) (count plushy))) + (t/is (< (count (s/auto-simplify-plushy plushy (fn [argmap data plushy] 0) {:simplification-steps 100 :simplification-k 10 :simplification-verbose? false})) (count plushy))))))) \ No newline at end of file