Merge pull request #60 from ryanboldi/main

down-sampling code, autosimplification code
This commit is contained in:
Lee Spector 2023-10-15 10:51:51 -04:00 committed by GitHub
commit e7130da06f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
32 changed files with 1120 additions and 231 deletions

1
.gitignore vendored
View File

@ -33,4 +33,5 @@ node_modules/
# https://github.com/thelmuth/program-synthesis-benchmark-datasets # https://github.com/thelmuth/program-synthesis-benchmark-datasets
/data /data
**/.DS_Store **/.DS_Store
*.edn
/.cpcache/ /.cpcache/

View File

@ -6,7 +6,8 @@
:dependencies [[org.clojure/clojure "1.10.0"] :dependencies [[org.clojure/clojure "1.10.0"]
[org.clojure/clojurescript "1.9.946"] [org.clojure/clojurescript "1.9.946"]
[org.clojure/test.check "1.1.0"] [org.clojure/test.check "1.1.0"]
[net.clojars.schneau/psb2 "1.1.1"]] [net.clojars.schneau/psb2 "1.1.1"]
[org.clojure/data.csv "1.0.1"]]
:profiles {:profiling {:dependencies [[com.clojure-goes-fast/clj-async-profiler "0.5.1"]]}} :profiles {:profiling {:dependencies [[com.clojure-goes-fast/clj-async-profiler "0.5.1"]]}}
:main ^:skip-aot propeller.core :main ^:skip-aot propeller.core
:repl-options {:init-ns propeller.core} :repl-options {:init-ns propeller.core}

View File

@ -0,0 +1,56 @@
Downsampling the Training Data
=
Downsampling is a very simple way to improve the efficiency of your evolutionary runs. It might allow for deeper evolutionary searches and a greater success rate.
Using Downsampled selection with propeller is easy:
Set the :parent-selection argument to whichever selection strategy you would like, and set the :downsample? argument to true as follows:
```clojure
lein run -m propeller.problems.simple-regression :parent-selection :lexicase :downsample? true <required downsampling args here>
```
The number of evaluations is held constant when comparing to a full training set run, so set the :max-generations to a number of generations that you would have gone to using a **full** sample.
## Downsample Functions
In this repository, you have access to 3 different downsampling functions. These are the methods used to take a down-sample from the entire training set.
To use them, add the argument ```:ds-function``` followed by which function you would like to us
The list is
- ```:case-maxmin``` - This is the method used for informed down-sampled lexicase selection
- ```:case-maxmin-auto``` - This method automatically determines the downsample size
- ```:case-rand```- Random Sampling
### Using ```:case-maxmin```:
In order to use regular informed down-sampled selection, you must specify a few things:
- ```:downsample-rate```- This is the $r$ parameter: what proportion of the full sample should be in the down-sample $\in [0,1]$
- ```:ds-parent-rate``` - This is the $\rho$ parameter: what proportion of parents are used to evaluate case distances $\in [0,1]$
- ```:ds-parent-gens``` - This is the $k$ parameter: How many generations in between parent evaluations for distances $\in \{1,2,3, \dots\}$
### Using ```:case-maxmin-auto```:
In order to use automatic informed down-sampled selection, you must specify a few things:
- ```:case-delta ```- This is the $\Delta$ parameter: How close can the farthest case be from its closest case before we stop adding to the down-sample
- ```:ids-type``` - Either ```:elite``` or ```:solved ``` - Specifies whether we are using elite/not-elite or solved/not-solved as our binary-fication of case solve vectors.
- ```:ds-parent-rate``` - This is the $\rho$ parameter: what proportion of parents are used to evaluate case distances $\in [0,1]$
- ```:ds-parent-gens``` - This is the $k$ parameter: How many generations in between parent evaluations for distances $\in \{1,2,3, \dots\}$
### Using ```:case-rand```:
In order to use regular randomly down-sampled selection, you must specify a few things:
- ```:downsample-rate```- This is the $r$ parameter: what proportion of the full sample should be in the down-sample $\in [0,1]$
Here's an example of running informed downsampled lexicase selection with $r=0.1$, $\rho=0.01$ and $k=100$ on the simple classification problem:
```clojure
lein run -m propeller.problems.simple-classification :parent-selection :lexicase :downsample? true :ds-function :case-maxmin :downsample-rate 0.1 :max-generations 300 :ds-parent-rate 0.01 :ds-parent-gens 100
```

View File

@ -0,0 +1,132 @@
(ns propeller.downsample
(:require [propeller.tools.math :as math]
[propeller.tools.metrics :as metrics]
[propeller.utils :as utils]))
(defn assign-indices-to-data
"assigns an index to each training case in order to differentiate them when downsampling"
[training-data]
(map (fn [data-map index]
(let [data-m (if (map? data-map) data-map (assoc {} :data data-map))] ;if data is not in a map, make it one
(assoc data-m :index index)))
training-data (range (count training-data))))
(defn initialize-case-distances
[{:keys [training-data population-size]}]
(map #(assoc % :distances (vec (repeat (count training-data) population-size))) training-data))
(defn select-downsample-random
"Selects a downsample from the training cases and returns it"
[training-data {:keys [downsample-rate]}]
(take (int (* downsample-rate (count training-data))) (shuffle training-data)))
(defn select-downsample-maxmin
"selects a downsample that has it's cases maximally far away by sequentially
adding cases to the downsample that have their closest case maximally far away"
[training-data {:keys [downsample-rate]}]
(let [shuffled-cases (shuffle training-data)
goal-size (int (* downsample-rate (count training-data)))]
(loop [new-downsample (conj [] (first shuffled-cases))
cases-to-pick-from (rest shuffled-cases)]
(if (>= (count new-downsample) goal-size)
new-downsample
(let [tournament cases-to-pick-from
min-case-distances (metrics/min-of-colls
(map (fn [distance-list]
(utils/filter-by-index distance-list (map #(:index %) tournament)))
(map #(:distances %) new-downsample)))
selected-case-index (metrics/argmax min-case-distances)]
(recur (conj new-downsample (nth tournament selected-case-index))
(shuffle (utils/drop-nth selected-case-index tournament))))))))
(defn select-downsample-maxmin-adaptive
"selects a downsample that has it's cases maximally far away by sequentially
adding cases to the downsample that have their closest case maximally far away
automatically stops when the maximum minimum distance is below delta"
[training-data {:keys [case-delta]}]
(let [shuffled-cases (shuffle training-data)]
(loop [new-downsample (conj [] (first shuffled-cases))
cases-to-pick-from (rest shuffled-cases)]
(let [tournament cases-to-pick-from
min-case-distances (metrics/min-of-colls
(map (fn [distance-list]
(utils/filter-by-index distance-list (map #(:index %) tournament)))
(map #(:distances %) new-downsample)))
selected-case-index (metrics/argmax min-case-distances)]
(if (or (= 0 (count tournament)) (<= (apply max min-case-distances) case-delta))
new-downsample
(recur (conj new-downsample (nth tournament selected-case-index))
(shuffle (utils/drop-nth selected-case-index tournament))))))))
(defn get-distance-between-cases
"returns the distance between two cases given a list of individual error vectors, and the index these
cases exist in the error vector. Only makes the distinction between zero and nonzero errors"
[error-lists case-index-1 case-index-2]
(if (or (< (count (first error-lists)) case-index-1)
(< (count (first error-lists)) case-index-2)
(neg? case-index-1) (neg? case-index-2))
(count error-lists) ;return the max distance
(let [errors-1 (map #(nth % case-index-1) error-lists)
errors-2 (map #(nth % case-index-2) error-lists)]
;compute distance between errors-1 and errors-2
(reduce + (map (fn [e1 e2] (math/abs (- (math/step e1) (math/step e2)))) errors-1 errors-2)))))
(defn update-at-indices
"merges two vectors at the indices provided by a third vector"
[big-vec small-vec indices]
(->> big-vec
(map-indexed (fn [idx itm] (let [index (.indexOf indices idx)]
(if (not= -1 index) (nth small-vec index) itm))))
vec))
(defn merge-map-lists-at-index
"merges two lists of maps, replacing the maps in the big
list with their corresponding (based on index) maps in the small list"
[big-list small-list]
(map
#(let [corresponding-small (some (fn [c] (when (= (:index %) (:index c)) c)) small-list)]
(if (nil? corresponding-small) % corresponding-small))
big-list))
(defn replace-mins-with-zero
"replaces the minimum value(s) in a list with zero"
[coll]
(if (empty? coll)
'()
(let [m (apply min coll)]
(map #(if (= m %) 0 %) coll))))
(defn replace-close-zero-with-zero
"replaces values within a delta of zero with zero, used for regression problems"
[coll delta]
(map #(if (>= delta %) 0 %) coll))
(defn convert-to-elite-error
"converts a set of errors into a list where all the elite errors are replaced with 0s so that we can use
it in the selection of down-samples with elite/not-elite selection"
[errors]
(map #(replace-mins-with-zero %) errors))
(defn convert-to-soft-error
[errors delta]
(map #(replace-close-zero-with-zero % delta) errors))
(defn update-case-distances
"updates the case distance field of training-data list, should be called after evaluation of individuals
evaluated-pop should be a list of individuals that all have the :errors field with a list of this
individuals performance on the each case in the training-data, in order. ids-type is :elite to use elite/not-elite, :soft to consider near solves, and :solved to use solve/not-solved"
([evaluated-pop ds-data training-data ids-type]
(update-case-distances evaluated-pop ds-data training-data ids-type 0)) ; default solution threshold is 0, only used if ids-type is :soft
([evaluated-pop ds-data training-data ids-type solution-threshold]
(let [ds-indices (map #(:index %) ds-data)
errors (map #(:errors %) evaluated-pop)
corr-errors (case ids-type
:elite (convert-to-elite-error errors)
:soft (convert-to-soft-error errors solution-threshold)
errors)] ;errors, including elite/not-elite distinction
(merge-map-lists-at-index
training-data (map-indexed
(fn [idx d-case] (update-in d-case
[:distances] #(update-at-indices
% (map (fn [other] (get-distance-between-cases corr-errors idx other))
(range (count ds-indices))) ds-indices))) ds-data)))))

View File

@ -5,6 +5,8 @@
[propeller.genome :as genome] [propeller.genome :as genome]
[propeller.simplification :as simplification] [propeller.simplification :as simplification]
[propeller.variation :as variation] [propeller.variation :as variation]
[propeller.downsample :as downsample]
[propeller.hyperselection :as hyperselection]
[propeller.push.instructions.bool] [propeller.push.instructions.bool]
[propeller.push.instructions.character] [propeller.push.instructions.character]
[propeller.push.instructions.code] [propeller.push.instructions.code]
@ -17,14 +19,16 @@
[propeller.utils :as utils])) [propeller.utils :as utils]))
(defn report (defn report
"Reports information for each generation." "Reports information each generation."
[pop generation argmap] [evaluations pop generation argmap training-data]
(let [best (first pop)] (let [best (first pop)]
(clojure.pprint/pprint (clojure.pprint/pprint
(merge {:generation generation (merge {:generation generation
:best-plushy (:plushy best) :best-plushy (:plushy best)
:best-program (genome/plushy->push (:plushy best) argmap) :best-program (genome/plushy->push (:plushy best) argmap)
:best-total-error (:total-error best) :best-total-error (:total-error best)
:evaluations evaluations
:ds-indices (map #(:index %) training-data)
:best-errors (:errors best) :best-errors (:errors best)
:best-behaviors (:behaviors best) :best-behaviors (:behaviors best)
:genotypic-diversity (float (/ (count (distinct (map :plushy pop))) (count pop))) :genotypic-diversity (float (/ (count (distinct (map :plushy pop))) (count pop)))
@ -47,59 +51,100 @@
(defn gp (defn gp
"Main GP loop." "Main GP loop."
[{:keys [population-size max-generations error-function instructions [{:keys [population-size max-generations error-function instructions
max-initial-plushy-size solution-error-threshold] max-initial-plushy-size solution-error-threshold mapper ds-parent-rate ds-parent-gens dont-end ids-type downsample?]
:or {solution-error-threshold 0.0} :or {solution-error-threshold 0.0
dont-end false
ds-parent-rate 0
ds-parent-gens 1
ids-type :solved ; :solved or :elite or :soft
downsample? false
;; The `mapper` will perform a `map`-like operation to apply a function to every individual
;; in the population. The default is `map` but other options include `mapv`, or `pmap`.
mapper #?(:clj pmap :cljs map)}
:as argmap}] :as argmap}]
;; ;;
(prn {:starting-args (update (update argmap :error-function str) :instructions str)}) (prn {:starting-args (update (update argmap :error-function str) :instructions str)})
(println) (println)
;; ;;
(loop [generation 0 (loop [generation 0
evaluations 0
population (utils/pmapallv population (utils/pmapallv
(fn [_] {:plushy (let [plushy (genome/make-random-plushy instructions max-initial-plushy-size)] (fn [_] {:plushy (let [plushy (genome/make-random-plushy instructions max-initial-plushy-size)]
(if (:diploid argmap) (if (:diploid argmap)
(interleave plushy plushy) (interleave plushy plushy)
plushy))}) plushy))}) (range population-size) argmap)
(range population-size) indexed-training-data (downsample/assign-indices-to-data (downsample/initialize-case-distances argmap))]
argmap)] ;creates population of random plushys (let [training-data (if downsample?
(let [evaluated-pop (sort-by :total-error (case (:ds-function argmap)
(utils/pmapallv :case-maxmin (downsample/select-downsample-maxmin indexed-training-data argmap)
(partial error-function argmap (:training-data argmap)) :case-maxmin-auto (downsample/select-downsample-maxmin-adaptive indexed-training-data argmap)
population :case-rand (downsample/select-downsample-random indexed-training-data argmap)
argmap)) (do (prn {:error "Invalid Downsample Function"}) (downsample/select-downsample-random indexed-training-data argmap)))
indexed-training-data) ;defaults to full training set
parent-reps (if
(and downsample? ; if we are down-sampling
(zero? (mod generation ds-parent-gens))) ;every ds-parent-gens generations
(take (* ds-parent-rate (count population)) (shuffle population))
'()) ;else just empty list
; parent representatives for down-sampling
rep-evaluated-pop (if downsample?
(sort-by :total-error
(utils/pmapallv
(partial error-function argmap indexed-training-data)
parent-reps
argmap))
'())
evaluated-pop (sort-by :total-error
(utils/pmapallv
(partial error-function argmap training-data)
population
argmap))
best-individual (first evaluated-pop) best-individual (first evaluated-pop)
best-individual-passes-ds (and downsample? (<= (:total-error best-individual) solution-error-threshold))
argmap (if (= (:parent-selection argmap) :epsilon-lexicase) argmap (if (= (:parent-selection argmap) :epsilon-lexicase)
(assoc argmap :epsilons (selection/epsilon-list evaluated-pop)) (assoc argmap :epsilons (selection/epsilon-list evaluated-pop))
argmap)] ;adds :epsilons if using epsilon-lexicase argmap)] ; epsilons
(if (:custom-report argmap) (if (:custom-report argmap)
((:custom-report argmap) evaluated-pop generation argmap) ((:custom-report argmap) evaluations evaluated-pop generation argmap)
(report evaluated-pop generation argmap)) (report evaluations evaluated-pop generation argmap training-data))
;;did the indvidual pass all cases in ds?
(when best-individual-passes-ds
(prn {:semi-success-generation generation}))
(cond (cond
;; Success on training cases is verified on testing cases ;; If either the best individual on the ds passes all training cases, or best individual on full sample passes all training cases
(<= (:total-error best-individual) solution-error-threshold) ;; We verify success on test cases and end evolution
(do (prn {:success-generation generation}) (if (or (and best-individual-passes-ds (<= (:total-error (error-function argmap indexed-training-data best-individual)) solution-error-threshold))
(prn {:total-test-error (and (not downsample?)
(:total-error (error-function argmap (:testing-data argmap) best-individual))}) (<= (:total-error best-individual) solution-error-threshold)))
(when (:simplification? argmap) (do (prn {:success-generation generation})
(let [simplified-plushy (simplification/auto-simplify-plushy (prn {:total-test-error
(:plushy best-individual) (:total-error (error-function argmap (:testing-data argmap) best-individual))})
error-function argmap)] (when (:simplification? argmap)
(prn {:total-test-error-simplified (let [simplified-plushy (simplification/auto-simplify-plushy (:plushy best-individual) error-function argmap)]
(:total-error (error-function argmap (prn {:total-test-error-simplified (:total-error (error-function argmap (:testing-data argmap) (hash-map :plushy simplified-plushy)))})))
(:testing-data argmap) (if dont-end false true))
(hash-map :plushy simplified-plushy)))}))) false)
#?(:clj (shutdown-agents))) nil
;; ;;
(>= generation max-generations) (and (not downsample?) (>= generation max-generations))
(do #?(:clj (shutdown-agents))) nil
;;
(and downsample? (>= evaluations (* max-generations population-size (count indexed-training-data))))
nil
;; ;;
:else (recur (inc generation) :else (recur (inc generation)
(if (:elitism argmap) (+ evaluations (* population-size (count training-data)) ;every member evaluated on the current sample
(conj (utils/pmapallv (fn [_] (variation/new-individual evaluated-pop argmap)) (if (zero? (mod generation ds-parent-gens)) (* (count parent-reps) (- (count indexed-training-data) (count training-data))) 0) ; the parent-reps not evaluted already on down-sample
(range (dec population-size)) (if best-individual-passes-ds (- (count indexed-training-data) (count training-data)) 0)) ; if we checked for generalization or not
argmap) (let [reindexed-pop (hyperselection/reindex-pop evaluated-pop)] ; give every individual an index for hyperselection loggin
(first evaluated-pop)) ;elitism maintains the most-fit individual (hyperselection/log-hyperselection-and-ret
(utils/pmapallv (fn [_] (variation/new-individual evaluated-pop argmap)) (if (:elitism argmap)
(range population-size) (conj (repeatedly (dec population-size) #(variation/new-individual reindexed-pop argmap))
argmap))))))) (first reindexed-pop))
(repeatedly population-size ;need to count occurance of each parent, and reset IDs
#(variation/new-individual reindexed-pop argmap)))))
(if downsample?
(if (zero? (mod generation ds-parent-gens))
(downsample/update-case-distances rep-evaluated-pop indexed-training-data indexed-training-data ids-type (/ solution-error-threshold (count indexed-training-data))) ; update distances every ds-parent-gens generations
indexed-training-data)
indexed-training-data))))))

View File

@ -0,0 +1,35 @@
(ns propeller.hyperselection)
(defn sum-list-map-indices
"sums a list of maps that have the :index property's index multiplicity"
[list-of-maps]
(->> list-of-maps
(map #(:index %))
frequencies))
(defn ordered-freqs
"takes a map from indices to frequencies, and returns a sorted list of the frequences is descencing order"
[freqs]
(->> freqs
vals
(sort >)))
(defn normalize-list-by-popsize [popsize lst]
(map #(double (/ % popsize)) lst))
(defn hyperselection-track
"outputs a normalized list of the hyperselection proportion for each parent"
[new-pop]
(->> new-pop
sum-list-map-indices
ordered-freqs
(normalize-list-by-popsize (count new-pop))))
(defn log-hyperselection-and-ret [new-pop]
(prn {:hyperselection (hyperselection-track new-pop)})
new-pop)
(defn reindex-pop
"assigns each member of the population a unique index before selection to track hyperselection"
[pop]
(map (fn [indiv index] (assoc indiv :index index)) pop (range (count pop))))

View File

@ -65,6 +65,9 @@
:error-function error-function :error-function error-function
:training-data train-data :training-data train-data
:testing-data test-data :testing-data test-data
:case-t-size (count train-data)
:ds-parent-rate 0
:ds-parent-gens 1
:max-generations 300 :max-generations 300
:population-size 1000 :population-size 1000
:max-initial-plushy-size 250 :max-initial-plushy-size 250

View File

@ -83,6 +83,9 @@
:error-function error-function :error-function error-function
:training-data train-data :training-data train-data
:testing-data test-data :testing-data test-data
:case-t-size (count train-data)
:ds-parent-rate 0
:ds-parent-gens 1
:max-generations 300 :max-generations 300
:population-size 1000 :population-size 1000
:max-initial-plushy-size 250 :max-initial-plushy-size 250
@ -92,4 +95,5 @@
:umad-rate 0.1 :umad-rate 0.1
:variation {:umad 1.0 :crossover 0.0} :variation {:umad 1.0 :crossover 0.0}
:elitism false} :elitism false}
(apply hash-map (map #(if (string? %) (read-string %) %) args))))) (apply hash-map (map #(if (string? %) (read-string %) %) args))))
(#?(:clj shutdown-agents)))

View File

@ -134,6 +134,9 @@
:error-function error-function :error-function error-function
:training-data train-data :training-data train-data
:testing-data test-data :testing-data test-data
:case-t-size (count train-data)
:ds-parent-rate 0
:ds-parent-gens 1
:max-generations 300 :max-generations 300
:population-size 1000 :population-size 1000
:max-initial-plushy-size 250 :max-initial-plushy-size 250

View File

@ -66,6 +66,9 @@
:error-function error-function :error-function error-function
:training-data train-data :training-data train-data
:testing-data test-data :testing-data test-data
:case-t-size (count train-data)
:ds-parent-rate 0
:ds-parent-gens 1
:max-generations 300 :max-generations 300
:population-size 1000 :population-size 1000
:max-initial-plushy-size 250 :max-initial-plushy-size 250

View File

@ -0,0 +1,108 @@
(ns propeller.problems.PSB2.find-pair
(:require [psb2.core :as psb2]
[propeller.genome :as genome]
[propeller.push.interpreter :as interpreter]
[propeller.problems.data-creation :as dc]
[propeller.utils :as utils]
[propeller.push.instructions :refer [def-instruction get-stack-instructions]]
[propeller.push.state :as state]
[propeller.tools.math :as math]
[propeller.gp :as gp]
#?(:cljs [cljs.reader :refer [read-string]])))
(def train-and-test-data (psb2/fetch-examples "data" "find-pair" 200 2000))
(def train-data (:train train-and-test-data))
(def test-data (:test train-and-test-data))
(defn random-int [] (- (rand-int 201) 100))
(defn map-vals-input
"Returns all the input values of a map"
[i]
(vals (select-keys i [:input1 :input2])))
(defn map-vals-output
"Returns the output values of a map"
[i]
(vals (select-keys i [:output1 :output2])))
(def-instruction :output-one
^{:stacks #{:integer :output}}
(fn [state]
(if (empty? (:integer state))
state
(let [top-int (state/peek-stack state :integer)]
(assoc-in state [:output :out1] top-int)))))
(def-instruction :output-two
^{:stacks #{:integer :output}}
(fn [state]
(if (empty? (:integer state))
state
(let [top-int (state/peek-stack state :integer)]
(assoc-in state [:output :out2] top-int)))))
(def instructions
(utils/not-lazy
(concat
;;; stack-specific instructions
(get-stack-instructions #{:exec :integer :vector_integer :boolean})
(list :output-one :output-two)
;;; input instructions
(list :in1 :in2)
;;; close
(list 'close)
;;; ERCs (constants)
(list -1 0 1 2 random-int))))
(defn error-function
[argmap data individual]
(let [program (genome/plushy->push (:plushy individual) argmap)
inputs (map (fn [i] (map-vals-input i)) data)
correct-outputs (map (fn [i] (map-vals-output i)) data)
outputs (map (fn [input]
(:output
(interpreter/interpret-program
program
(assoc state/empty-state :input {:in1 (nth input 0)
:in2 (nth input 1)})
(:step-limit argmap))))
inputs)
outputs-1 (map #(:out1 %) outputs)
outputs-2 (map #(:out2 %) outputs)
;_ (prn {:o1 outputs-1 :o2 outputs-2})
errors (map (fn [correct-output output-1 output-2]
(if (not (and (number? output-2) (number? output-1)))
100000
(+ (math/abs (- (first correct-output) output-1))
(math/abs (- (second correct-output) output-2)))))
correct-outputs outputs-1 outputs-2)]
(assoc individual
:behavior outputs
:errors errors
:total-error #?(:clj (apply +' errors)
:cljs (apply + errors)))))
(defn -main
"Runs propel-gp, giving it a map of arguments."
[& args]
(gp/gp
(merge
{:instructions instructions
:error-function error-function
:training-data train-data
:testing-data test-data
:case-t-size (count train-data)
:ds-parent-rate 0
:ds-parent-gens 1
:max-generations 300
:population-size 1000
:max-initial-plushy-size 250
:step-limit 2000
:parent-selection :lexicase
:tournament-size 5
:umad-rate 0.1
:variation {:umad 1.0 :crossover 0.0}
:elitism false}
(apply hash-map (map #(if (string? %) (read-string %) %) args))))
(#?(:clj shutdown-agents)))

View File

@ -10,6 +10,7 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
(:require [psb2.core :as psb2] (:require [psb2.core :as psb2]
[propeller.genome :as genome] [propeller.genome :as genome]
[propeller.push.interpreter :as interpreter] [propeller.push.interpreter :as interpreter]
[propeller.problems.data-creation :as dc]
[propeller.utils :as utils] [propeller.utils :as utils]
[propeller.push.instructions :refer [get-stack-instructions]] [propeller.push.instructions :refer [get-stack-instructions]]
[propeller.push.state :as state] [propeller.push.state :as state]
@ -17,7 +18,21 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
[propeller.gp :as gp] [propeller.gp :as gp]
#?(:cljs [cljs.reader :refer [read-string]]))) #?(:cljs [cljs.reader :refer [read-string]])))
; =========== PROBLEM DESCRIPTION =========================
; FIZZ BUZZ from PSB2
; Given an integer x, return "Fizz" if x is
; divisible by 3, "Buzz" if x is divisible by 5, "FizzBuzz" if x
; is divisible by 3 and 5, and a string version of x if none of
; the above hold.
;
; Source: https://arxiv.org/pdf/2106.06086.pdf
; ============================================================
(def train-and-test-data "Data taken from https://zenodo.org/record/5084812" (psb2/fetch-examples "data" "fizz-buzz" 200 2000)) (def train-and-test-data "Data taken from https://zenodo.org/record/5084812" (psb2/fetch-examples "data" "fizz-buzz" 200 2000))
(def train-data (:train train-and-test-data))
(def test-data (:test train-and-test-data))
(def instructions (def instructions
"Stack-specific instructions, input instructions, close, and constants" "Stack-specific instructions, input instructions, close, and constants"
@ -71,8 +86,11 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
(merge (merge
{:instructions instructions {:instructions instructions
:error-function error-function :error-function error-function
:training-data (:train train-and-test-data) :training-data train-data
:testing-data (:test train-and-test-data) :testing-data test-data
:case-t-size (count train-data)
:ds-parent-rate 0
:ds-parent-gens 1
:max-generations 300 :max-generations 300
:population-size 1000 :population-size 1000
:max-initial-plushy-size 250 :max-initial-plushy-size 250

View File

@ -11,6 +11,7 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
(:require [psb2.core :as psb2] (:require [psb2.core :as psb2]
[propeller.genome :as genome] [propeller.genome :as genome]
[propeller.push.interpreter :as interpreter] [propeller.push.interpreter :as interpreter]
[propeller.problems.data-creation :as dc]
[propeller.utils :as utils] [propeller.utils :as utils]
[propeller.push.instructions :refer [get-stack-instructions]] [propeller.push.instructions :refer [get-stack-instructions]]
[propeller.push.state :as state] [propeller.push.state :as state]
@ -18,7 +19,19 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
[propeller.gp :as gp] [propeller.gp :as gp]
#?(:cljs [cljs.reader :refer [read-string]]))) #?(:cljs [cljs.reader :refer [read-string]])))
; =========== PROBLEM DESCRIPTION =========================
; FUEL COST from PSB2
; Given a vector of positive integers, divide
; each by 3, round the result down to the nearest integer, and
; subtract 2. Return the sum of all of the new integers in the
; vector
;
; Source: https://arxiv.org/pdf/2106.06086.pdf
; ============================================================
(def train-and-test-data "Data taken from https://zenodo.org/record/5084812" (psb2/fetch-examples "data" "fuel-cost" 200 2000)) (def train-and-test-data "Data taken from https://zenodo.org/record/5084812" (psb2/fetch-examples "data" "fuel-cost" 200 2000))
(def train-data (:train train-and-test-data))
(def test-data (:test train-and-test-data))
; Random integer between -100 and 100 (from smallest) ; Random integer between -100 and 100 (from smallest)
(defn random-int "Random integer between -100 and 100" [] (- (rand-int 201) 100)) (defn random-int "Random integer between -100 and 100" [] (- (rand-int 201) 100))
@ -26,15 +39,15 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
(def instructions (def instructions
"Stack-specific instructions, input instructions, close, and constants" "Stack-specific instructions, input instructions, close, and constants"
(utils/not-lazy (utils/not-lazy
(concat (concat
;;; stack-specific instructions ;;; stack-specific instructions
(get-stack-instructions #{:exec :integer :boolean :vector_integer :print}) (get-stack-instructions #{:exec :integer :boolean :vector_integer :print})
;;; input instructions ;;; input instructions
(list :in1) (list :in1)
;;; close ;;; close
(list 'close) (list 'close)
;;; ERCs (constants) ;;; ERCs (constants)
(list random-int 0 1 2 3)))) (list random-int 0 1 2 3))))
(defn error-function (defn error-function
"Finds the behaviors and errors of an individual: Error is 0 if the value and "Finds the behaviors and errors of an individual: Error is 0 if the value and
@ -47,11 +60,11 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
correct-outputs (map (fn [i] (get i :output1)) data) correct-outputs (map (fn [i] (get i :output1)) data)
outputs (map (fn [input] outputs (map (fn [input]
(state/peek-stack (state/peek-stack
(interpreter/interpret-program (interpreter/interpret-program
program program
(assoc state/empty-state :input {:in1 input}) (assoc state/empty-state :input {:in1 input})
(:step-limit argmap)) (:step-limit argmap))
:integer)) :integer))
inputs) inputs)
errors (map (fn [correct-output output] errors (map (fn [correct-output output]
(if (= output :no-stack-item) (if (= output :no-stack-item)
@ -60,10 +73,10 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
correct-outputs correct-outputs
outputs)] outputs)]
(assoc individual (assoc individual
:behaviors outputs :behaviors outputs
:errors errors :errors errors
:total-error #?(:clj (apply +' errors) :total-error #?(:clj (apply +' errors)
:cljs (apply + errors))))) :cljs (apply + errors)))))
(defn -main (defn -main
"Runs the top-level genetic programming function, giving it a map of "Runs the top-level genetic programming function, giving it a map of
@ -71,18 +84,21 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
or through a passed map." or through a passed map."
[& args] [& args]
(gp/gp (gp/gp
(merge (merge
{:instructions instructions {:instructions instructions
:error-function error-function :error-function error-function
:training-data (:train train-and-test-data) :training-data train-data
:testing-data (:test train-and-test-data) :testing-data test-data
:max-generations 300 :case-t-size (count train-data)
:population-size 1000 :ds-parent-rate 0
:max-initial-plushy-size 250 :ds-parent-gens 1
:step-limit 2000 :max-generations 300
:parent-selection :lexicase :population-size 1000
:tournament-size 5 :max-initial-plushy-size 250
:umad-rate 0.1 :step-limit 2000
:variation {:umad 1.0 :crossover 0.0} :parent-selection :lexicase
:elitism false} :tournament-size 5
(apply hash-map (map #(if (string? %) (read-string %) %) args))))) :umad-rate 0.1
:variation {:umad 1.0 :crossover 0.0}
:elitism false}
(apply hash-map (map #(if (string? %) (read-string %) %) args)))))

View File

@ -9,6 +9,7 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
(:require [psb2.core :as psb2] (:require [psb2.core :as psb2]
[propeller.genome :as genome] [propeller.genome :as genome]
[propeller.push.interpreter :as interpreter] [propeller.push.interpreter :as interpreter]
[propeller.problems.data-creation :as dc]
[propeller.utils :as utils] [propeller.utils :as utils]
[propeller.push.instructions :refer [get-stack-instructions]] [propeller.push.instructions :refer [get-stack-instructions]]
[propeller.push.state :as state] [propeller.push.state :as state]
@ -18,6 +19,11 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
(def train-and-test-data "Data taken from https://zenodo.org/record/5084812" (psb2/fetch-examples "data" "gcd" 200 2000)) (def train-and-test-data "Data taken from https://zenodo.org/record/5084812" (psb2/fetch-examples "data" "gcd" 200 2000))
(def train-and-test-data (psb2/fetch-examples "data" "gcd" 200 2000))
(def train-data (:train train-and-test-data))
(def test-data (:test train-and-test-data))
(defn random-int "Random integer between -100 and 100" [] (- (rand-int 201) 100)) (defn random-int "Random integer between -100 and 100" [] (- (rand-int 201) 100))
(defn map-vals-input (defn map-vals-input
@ -82,8 +88,11 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
(merge (merge
{:instructions instructions {:instructions instructions
:error-function error-function :error-function error-function
:training-data (:train train-and-test-data) :training-data train-data
:testing-data (:test train-and-test-data) :testing-data test-data
:case-t-size (count train-data)
:ds-parent-rate 0
:ds-parent-gens 1
:max-generations 300 :max-generations 300
:population-size 1000 :population-size 1000
:max-initial-plushy-size 250 :max-initial-plushy-size 250

View File

@ -0,0 +1,128 @@
(ns propeller.problems.UBall5D
(:require [propeller.genome :as genome]
[propeller.push.interpreter :as interpreter]
[propeller.push.state :as state]
[propeller.tools.math :as math]
[propeller.gp :as gp]
#?(:cljs [cljs.reader :refer [read-string]])))
(defn- target-function
"Target function: f(x) = 10/(5 + SUM_i=1^5 (x_i - 3)^2)"
[x1 x2 x3 x4 x5]
(->> (list x1 x2 x3 x4 x5)
(map #(* (- % 3) (- % 3)))
(apply +)
(+ 5)
(/ 10)))
(target-function 3 7 3 3 3)
(defn map-vals-input
"Returns all the input values of a map"
[i]
(vals (select-keys i [:input1 :input2 :input3 :input4 :input5])))
(defn get-random-input
"returns a random input between two ranges"
[a b]
(->> (rand)
(* (- b a))
(+ a)
(float)))
(def train-data
(repeatedly 1024 (fn [] (repeatedly 5 #(get-random-input 0.05 6.05)))))
(def test-data
(repeatedly 5000 (fn [] (repeatedly 5 #(get-random-input -0.25 6.35)))))
(def train-and-test-data
(let [train-inputs train-data
test-inputs test-data]
{:train (map (fn [x] {:input1 (first x)
:input2 (nth x 1)
:input3 (nth x 2)
:input4 (nth x 3)
:input5 (nth x 4)
:output1 (apply target-function (map #(nth x %) (range 5)))}) train-inputs)
:test (map (fn [x] {:input1 (first x)
:input2 (nth x 1)
:input3 (nth x 2)
:input4 (nth x 3)
:input5 (nth x 4)
:output1 (apply target-function (map #(nth x %) (range 5)))}) test-inputs)}))
(def instructions
(list :in1
:in2
:in3
:in4
:in5
:float_add
:float_subtract
:float_mult
:float_quot
:float_eq
:exec_dup
:exec_if
'close
0.0
1.0))
(def data (:train train-and-test-data))
(defn error-function
"Finds the behaviors and errors of an individual. The error is the absolute
deviation between the target output value and the program's selected behavior,
or 1000000 if no behavior is produced. The behavior is here defined as the
final top item on the FLOAT stack."
([argmap data individual]
(let [program (genome/plushy->push (:plushy individual) argmap)
inputs (map (fn [i] (map-vals-input i)) data)
correct-outputs (map (fn [x] (:output1 x)) data)
outputs (map (fn [input]
(state/peek-stack
(interpreter/interpret-program
program
(assoc state/empty-state :input {:in1 (nth input 0)
:in2 (nth input 1)
:in3 (nth input 2)
:in4 (nth input 3)
:in5 (nth input 4)})
(:step-limit argmap))
:float))
inputs)
errors (map (fn [correct-output output]
(if (= output :no-stack-item)
1000000
(math/abs (- correct-output output))))
correct-outputs
outputs)]
(assoc individual
:behaviors outputs
:errors errors
:total-error #?(:clj (apply +' errors)
:cljs (apply + errors))))))
(defn -main
"Runs propel-gp, giving it a map of arguments."
[& args]
(gp/gp
(merge
{:instructions instructions
:error-function error-function
:training-data (:train train-and-test-data)
:testing-data (:test train-and-test-data)
:max-generations 300
:population-size 1000
:max-initial-plushy-size 100
:step-limit 200
:parent-selection :lexicase
:tournament-size 5
:umad-rate 0.1
:solution-error-threshold 0.1
:variation {:umad 1.0 :crossover 0.0}
:elitism false}
(apply hash-map (map #(if (string? %) (read-string %) %) args))))
(#?(:clj shutdown-agents)))

View File

@ -75,7 +75,11 @@
:population-size 500 :population-size 500
:max-initial-plushy-size 100 :max-initial-plushy-size 100
:step-limit 200 :step-limit 200
:parent-selection :lexicase :parent-selection :ds-lexicase
:ds-function :case-maxmin
:downsample-rate 0.1
:case-t-size 20
:tournament-size 5
:umad-rate 0.1 :umad-rate 0.1
:variation {:umad 1.0 :crossover 0.0} :variation {:umad 1.0 :crossover 0.0}
:elitism false} :elitism false}

View File

@ -0,0 +1,103 @@
(ns propeller.problems.data-creation
(:require [psb2.core :as psb2]
[clojure.data.csv :as csv]
[clojure.java.io :as io]
[clojure.string :as s]))
(defn generate-data [problem train-or-test]
(let [train-and-test-data (psb2/fetch-examples "data" problem 200 1000)
cleaned-data (cons (vector "input1" "output1") (map #(vector (:input1 %) (:output1 %)) ((keyword train-or-test) train-and-test-data)))]
(prn cleaned-data)
(with-open [writer (io/writer (str problem "-" train-or-test ".csv"))]
(csv/write-csv writer
(doall cleaned-data)))))
(defn generate-data-gcd [train-or-test]
(let [train-and-test-data (psb2/fetch-examples "data" "gcd" 200 1000)
cleaned-data (cons (vector "input1" "input2" "output1") (map #(vector (:input1 %) (:input2 %) (:output1 %)) ((keyword train-or-test) train-and-test-data)))]
(prn cleaned-data)
(with-open [writer (io/writer (str "gcd-" train-or-test ".csv"))]
(csv/write-csv writer
(doall cleaned-data)))))
(defn generate-data-find-pair [train-or-test]
(let [train-and-test-data (psb2/fetch-examples "data" "find-pair" 200 1000)
cleaned-data (cons (vector "input1" "input2" "output1" "output2") (map #(vector (:input1 %) (:input2 %) (:output1 %) (:output2 %)) ((keyword train-or-test) train-and-test-data)))]
(prn cleaned-data)
(with-open [writer (io/writer (str "find-pair-" train-or-test ".csv"))]
(csv/write-csv writer
(doall cleaned-data)))))
(defn generate-data-for-problem [problem]
(map (partial generate-data problem) '["test" "train"]))
(defn generate-data-for-all-problems []
(map (partial generate-data-for-problem) '["gcd"
"find-pair"]))
;--------PSB1
(defn read-data [problem qual]
(with-open [reader (io/reader (str "src/propeller/problems/PSB1/" problem "-" qual ".csv"))]
(doall
(csv/read-csv reader))))
(defn edge-cases-for-problem [problem]
(read-data problem "edge"))
(defn training-cases-for-problem [shuffled-data problem]
(let [edge-cases (edge-cases-for-problem problem)
left (- 201 (count edge-cases)) ;because labels is first
random-cases (take left shuffled-data)]
(concat edge-cases random-cases)))
(defn testing-cases-for-problem [shuffled-data problem]
(take 1000 (drop 500 shuffled-data)))
(defn save-train-test-data [shuffled-data problem train-or-test]
(with-open [writer (io/writer (str problem "-" train-or-test ".csv"))]
(csv/write-csv writer
(if (= train-or-test "train")
(training-cases-for-problem shuffled-data problem)
(testing-cases-for-problem shuffled-data problem)))))
(defn save-data-for-problem [problem]
(let [shuffled-data (shuffle (rest (read-data problem "random")))]
(map (partial save-train-test-data shuffled-data problem) '["test" "train"])))
(defn save-data-for-all-problems []
(map (partial save-data-for-problem) '["small-or-large"
"scrabble-score"
"grade"
"count-odds"]))
(defn read-string-and-convert [elem]
(if (= elem "")
""
(let [before (read-string elem)]
(if (symbol? before)
elem
before))))
(defn read-data-formatted [problem train-or-test]
(apply list (with-open [reader (io/reader (str "picked/" problem "-" train-or-test ".csv"))]
(let [csv-data (csv/read-csv reader)]
(mapv zipmap
(->> (first csv-data) ;; First row is the header
(map keyword) ;; Drop if you want string keys instead
repeat)
(map (fn [elem] (map #(read-string-and-convert %) elem)) (rest csv-data)))))))
;scrabble-score doesn't play nice with read-string, hacky solution below
(defn scrabble-score-read-data-formatted [problem train-or-test]
(apply list (with-open [reader (io/reader (str "picked/" problem "-" train-or-test ".csv"))]
(let [csv-data (csv/read-csv reader)]
(prn {:csv-data csv-data})
(mapv zipmap
(->> (first csv-data) ;; First row is the header
(map keyword) ;; Drop if you want string keys instead
repeat)
(map (fn [elem] (list (s/replace (first elem) "\\n" "\n") (read-string (second elem)))) (rest csv-data)))))))

View File

@ -23,6 +23,10 @@
:float_subtract :float_subtract
:float_mult :float_mult
:float_quot :float_quot
:float_eq
:exec_dup
:exec_if
'close
0.0 0.0
1.0)) 1.0))

View File

@ -12,12 +12,15 @@
(+ (* x x x) (* 2 x x) x 3)) (+ (* x x x) (* 2 x x) x 3))
(def train-and-test-data (def train-and-test-data
"Training data: Inputs and outputs with -10 <= x < 11
Test data: Inputs and outputs of -20 <= x < -10 and 11 <= x < 21"
(let [train-inputs (range -10 11) (let [train-inputs (range -10 11)
test-inputs (concat (range -20 -10) (range 11 21))] test-inputs (concat (range -20 -10) (range 11 21))]
{:train (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) train-inputs) {:train (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) train-inputs)
:test (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) test-inputs)})) :test (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) test-inputs)}))
(def instructions (def instructions
"stack-specific instructions, input instructions, close, and constants"
(list :in1 (list :in1
:integer_add :integer_add
:integer_subtract :integer_subtract

View File

@ -87,11 +87,17 @@
:error-function error-function :error-function error-function
:training-data (:train train-and-test-data) :training-data (:train train-and-test-data)
:testing-data (:test train-and-test-data) :testing-data (:test train-and-test-data)
:case-t-size (count (:train train-and-test-data))
:ds-parent-rate 0
:ds-parent-gens 1
:ds-function :case-rand
:max-generations 500 :max-generations 500
:population-size 500 :population-size 500
:max-initial-plushy-size 100 :max-initial-plushy-size 100
:step-limit 200 :step-limit 200
:parent-selection :lexicase :parent-selection :lexicase
:downsample? false
:tournament-size 5
:umad-rate 0.1 :umad-rate 0.1
:variation {:umad 1.0 :crossover 0.0} :variation {:umad 1.0 :crossover 0.0}
:elitism false} :elitism false}

View File

@ -1,90 +0,0 @@
(ns propeller.problems.simple-regression
"Simple Regression:
Given inputs and outputs, find the target function."
{:doc/format :markdown}
(:require [propeller.genome :as genome]
[propeller.push.interpreter :as interpreter]
[propeller.push.state :as state]
[propeller.tools.math :as math]
[propeller.gp :as gp]
#?(:cljs [cljs.reader :refer [read-string]])))
(defn- target-function
"Target function: f(x) = x^3 + x + 3"
[x]
(+ (* x x x) x 3))
(def train-and-test-data
"Training data: Inputs and outputs with -10 <= x < 11
Test data: Inputs and outputs of -20 <= x < -10 and 11 <= x < 21"
(let [train-inputs (range -10 11)
test-inputs (concat (range -20 -10) (range 11 21))]
{:train (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) train-inputs)
:test (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) test-inputs)}))
(def instructions
"stack-specific instructions, input instructions, close, and constants"
(list :in1
:integer_add
:integer_subtract
:integer_mult
:integer_quot
:integer_eq
:exec_dup
:exec_if
'close
0
1))
(defn error-function
"Finds the behaviors and errors of an individual. The error is the absolute
deviation between the target output value and the program's selected behavior,
or 1000000 if no behavior is produced. The behavior is here defined as the
final top item on the INTEGER stack."
([argmap data individual]
(let [program (genome/plushy->push (:plushy individual) argmap)
inputs (map (fn [x] (first (:input1 x))) data)
correct-outputs (map (fn [x] (first (:output1 x))) data)
outputs (map (fn [input]
(state/peek-stack
(interpreter/interpret-program
program
(assoc state/empty-state :input {:in1 input})
(:step-limit argmap))
:integer))
inputs)
errors (map (fn [correct-output output]
(if (= output :no-stack-item)
1000000
(math/abs (- correct-output output))))
correct-outputs
outputs)]
(assoc individual
:behaviors outputs
:errors errors
:total-error #?(:clj (apply +' errors)
:cljs (apply + errors))))))
(defn -main
"Runs the top-level genetic programming function, giving it a map of
arguments with defaults that can be overridden from the command line
or through a passed map."
[& args]
(gp/gp
(merge
{:instructions instructions
:error-function error-function
:training-data (:train train-and-test-data)
:testing-data (:test train-and-test-data)
:max-generations 500
:population-size 500
:max-initial-plushy-size 100
:step-limit 200
:parent-selection :lexicase
:tournament-size 5
:umad-rate 0.1
:variation {:umad 0.5 :crossover 0.5}
:elitism false}
(apply hash-map (map #(if (string? %) (read-string %) %) args)))))

View File

@ -105,6 +105,6 @@ Given a string, return true if it contains A, C, G, and T. Else return false."
:parent-selection :lexicase :parent-selection :lexicase
:tournament-size 5 :tournament-size 5
:umad-rate 0.1 :umad-rate 0.1
:variation {:umad 0.5 :crossover 0.5} :variation {:umad 1.0 :crossover 0.0}
:elitism false} :elitism false}
(apply hash-map (map #(if (string? %) (read-string %) %) args))))) (apply hash-map (map #(if (string? %) (read-string %) %) args)))))

18
src/propeller/selection.cljc Executable file → Normal file
View File

@ -30,6 +30,23 @@
survivors) survivors)
(rest cases)))))) (rest cases))))))
(defn fitness-proportionate-selection
"Selects an individual from the population using a fitness proportionate selection."
[pop argmap]
(let [pop-fits (->> pop ;convert from error to fitness, where fitness (probability) is (1/ (1+ tot_err))
(map #(assoc % :fitness (/ 1 (inc (:total-error %))))))
pop-total-fit (->> pop-fits
(map :fitness)
(reduce +))
random-num (* (rand) pop-total-fit)
sorted-by-fitness (->> pop-fits
(sort-by :fitness)
(reverse))]
(loop [tot (:fitness (first sorted-by-fitness)) individuals sorted-by-fitness]
(if (< random-num tot)
(first individuals)
(recur (+ tot (:fitness (first (rest individuals)))) (rest individuals))))))
(defn motley-batch-lexicase-selection (defn motley-batch-lexicase-selection
"Selects an individual from the population using motley batch lexicase selection. "Selects an individual from the population using motley batch lexicase selection.
Cases are combined in random collections of max size (:max-batch-size argmap)." Cases are combined in random collections of max size (:max-batch-size argmap)."
@ -91,5 +108,6 @@
(case (:parent-selection argmap) (case (:parent-selection argmap)
:tournament (tournament-selection pop argmap) :tournament (tournament-selection pop argmap)
:lexicase (lexicase-selection pop argmap) :lexicase (lexicase-selection pop argmap)
:roulette (fitness-proportionate-selection pop argmap)
:epsilon-lexicase (epsilon-lexicase-selection pop argmap) :epsilon-lexicase (epsilon-lexicase-selection pop argmap)
:motley-batch-lexicase (motley-batch-lexicase-selection pop argmap))) :motley-batch-lexicase (motley-batch-lexicase-selection pop argmap)))

View File

@ -26,8 +26,7 @@ The output with verbose adds the following lines to the output:
(:require [propeller.genome :as genome] (:require [propeller.genome :as genome]
[propeller.push.interpreter :as interpreter] [propeller.push.interpreter :as interpreter]
[propeller.push.state :as state] [propeller.push.state :as state]
[propeller.tools.math :as math] [propeller.tools.math :as math]))
))
(defn choose-random-k (defn choose-random-k
"Takes k random indices" "Takes k random indices"

10
src/propeller/tools/math.cljc Executable file → Normal file
View File

@ -7,6 +7,11 @@
(defonce ^{:no-doc true :const true} E #?(:clj Math/E (defonce ^{:no-doc true :const true} E #?(:clj Math/E
:cljs js/Math.PI)) :cljs js/Math.PI))
(defn step
"returns 1 if number is nonzero, 0 otherwise"
[x]
(if (zero? x) 0 1))
(defn mean (defn mean
"Returns the mean." "Returns the mean."
[coll] [coll]
@ -132,3 +137,8 @@
[x] [x]
#?(:clj (Math/tan x) #?(:clj (Math/tan x)
:cljs (js/Math.tan x))) :cljs (js/Math.tan x)))
(defn transpose
"returns a vector containing the transpose of a coll of colls"
[x]
(apply map vector x))

View File

@ -2,11 +2,47 @@
"Functions to measure things." "Functions to measure things."
(:require [propeller.tools.math :as math])) (:require [propeller.tools.math :as math]))
(defn argmins
"returns the indice(s) of the minimum value of a list. Could be more efficient, probably"
[coll]
(if (empty? coll) '()
(let [m (apply min coll)]
(keep-indexed #(when (= m %2) %1) coll))))
(defn argmax-last
"returns the index of the maximum value in a list, tiebreaking last"
[coll]
(->> coll
(map-indexed vector)
(apply max-key second)
first))
(defn argmax
"returns the index of the maximum value in a list, randomly tiebreaking"
[coll]
(if (zero? (count coll))
:null
(->> coll
(map-indexed vector)
(filter #(= (apply max coll) (second %)))
rand-nth
first)))
(defn mean (defn mean
"Returns the mean of a collection." "Returns the mean of a collection."
[coll] [coll]
(if (empty? coll) 0 (math/div (apply + coll) (count coll)))) (if (empty? coll) 0 (math/div (apply + coll) (count coll))))
(defn mean-of-colls
"returns the mean of multiple colls"
[coll]
(map mean (math/transpose coll)))
(defn min-of-colls
"returns the smallest value of multiple colls"
[coll]
(map #(apply min %) (math/transpose coll)))
(defn median (defn median
"Returns the median of a collection." "Returns the median of a collection."
[coll] [coll]
@ -29,12 +65,12 @@
"computes the next row using the prev-row current-element and the other seq" "computes the next row using the prev-row current-element and the other seq"
[prev-row current-element other-seq pred] [prev-row current-element other-seq pred]
(reduce (reduce
(fn [row [diagonal above other-element]] (fn [row [diagonal above other-element]]
(let [update-val (if (pred other-element current-element) (let [update-val (if (pred other-element current-element)
;; if the elements are deemed equivalent according to the predicate ;; if the elements are deemed equivalent according to the predicate
;; pred, then no change has taken place to the string, so we are ;; pred, then no change has taken place to the string, so we are
;; going to set it the same value as diagonal (which is the previous edit-distance) ;; going to set it the same value as diagonal (which is the previous edit-distance)
diagonal diagonal
;; in the case where the elements are not considered equivalent, then we are going ;; in the case where the elements are not considered equivalent, then we are going
;; to figure out if its a substitution (then there is a change of 1 from the previous ;; to figure out if its a substitution (then there is a change of 1 from the previous
;; edit distance) thus the value is diagonal + 1 or if its a deletion, then the value ;; edit distance) thus the value is diagonal + 1 or if its a deletion, then the value
@ -42,18 +78,18 @@
;; of last of row + 1 (since we will be using vectors, peek is more efficient) ;; of last of row + 1 (since we will be using vectors, peek is more efficient)
;; or it could be a case of insertion, then the value is above+1, and we chose ;; or it could be a case of insertion, then the value is above+1, and we chose
;; the minimum of the three ;; the minimum of the three
(inc (min diagonal above (peek row))))] (inc (min diagonal above (peek row))))]
(conj row update-val))) (conj row update-val)))
;; we need to initialize the reduce function with the value of a row, since we are ;; we need to initialize the reduce function with the value of a row, since we are
;; constructing this row from the previous one, the row is a vector of 1 element which ;; constructing this row from the previous one, the row is a vector of 1 element which
;; consists of 1 + the first element in the previous row (edit distance between the prefix so far ;; consists of 1 + the first element in the previous row (edit distance between the prefix so far
;; and an empty string) ;; and an empty string)
[(inc (first prev-row))] [(inc (first prev-row))]
;; for the reduction to go over, we need to provide it with three values, the diagonal ;; for the reduction to go over, we need to provide it with three values, the diagonal
;; which is the same as prev-row because it starts from 0, the above, which is the next element ;; which is the same as prev-row because it starts from 0, the above, which is the next element
;; from the list and finally the element from the other sequence itself. ;; from the list and finally the element from the other sequence itself.
(map vector prev-row (next prev-row) other-seq))) (map vector prev-row (next prev-row) other-seq)))
(defn levenshtein-distance (defn levenshtein-distance
"Levenshtein Distance - http://en.wikipedia.org/wiki/Levenshtein_distance "Levenshtein Distance - http://en.wikipedia.org/wiki/Levenshtein_distance
@ -66,17 +102,17 @@
(empty? (str a)) (count (str b)) ;; sometimes stack pushes numbers, force (empty? (str a)) (count (str b)) ;; sometimes stack pushes numbers, force
(empty? (str b)) (count (str a)) ;; a and b to be strings (empty? (str b)) (count (str a)) ;; a and b to be strings
:else (peek :else (peek
(reduce (reduce
;; we use a simple reduction to convert the previous row into the ;; we use a simple reduction to convert the previous row into the
;; next-row using the compute-next-row which takes a current ;; next-row using the compute-next-row which takes a current
;; element, the previous-row computed so far, and the predicate ;; element, the previous-row computed so far, and the predicate
;; to compare for equality ;; to compare for equality
(fn [prev-row current-element] (fn [prev-row current-element]
(compute-next-row prev-row current-element (str b) p)) (compute-next-row prev-row current-element (str b) p))
;; we need to initialize the prev-row with the edit distance ;; we need to initialize the prev-row with the edit distance
;; between the various prefixes of b and the empty string ;; between the various prefixes of b and the empty string
(range (inc (count (str b)))) (range (inc (count (str b))))
(str a))))) (str a)))))
(defn sequence-similarity (defn sequence-similarity
"Returns a number between 0 and 1, indicating how similar the sequences are "Returns a number between 0 and 1, indicating how similar the sequences are

View File

@ -3,6 +3,20 @@
(:require [clojure.zip :as zip] (:require [clojure.zip :as zip]
[clojure.repl :as repl])) [clojure.repl :as repl]))
(defn filter-by-index
"filters a collection by a list of indices"
[coll idxs]
;(prn {:func :filter-by-index :coll coll :idxs idxs})
(map (partial nth coll) idxs))
(defn drop-nth
"drops the nth element from a collection"
[n coll]
;(prn {:func :drop-nth :n n :coll coll})
(concat
(take n coll)
(nthrest coll (inc n))))
(defn first-non-nil (defn first-non-nil
"Returns the first non-nil values from the collection, or returns `nil` if "Returns the first non-nil values from the collection, or returns `nil` if
the collection is empty or only contains `nil`." the collection is empty or only contains `nil`."

4
src/propeller/variation.cljc Executable file → Normal file
View File

@ -254,6 +254,8 @@ The function `new-individual` returns a new individual produced by selection and
"Returns a new individual produced by selection and variation of "Returns a new individual produced by selection and variation of
individuals in the population." individuals in the population."
[pop argmap] [pop argmap]
(let [umad-parent (selection/select-parent pop argmap)
parent-ind (:index umad-parent)] ;this is a hack to log hyperselection, only works for umad
{:plushy {:plushy
(let [r (rand) (let [r (rand)
op (loop [accum 0.0 op (loop [accum 0.0
@ -381,4 +383,4 @@ The function `new-individual` returns a new individual produced by selection and
:else :else
(throw #?(:clj (Exception. (str "No match in new-individual for " op)) (throw #?(:clj (Exception. (str "No match in new-individual for " op))
:cljs (js/Error :cljs (js/Error
(str "No match in new-individual for " op))))))}) (str "No match in new-individual for " op))))))}))

View File

@ -0,0 +1,163 @@
(ns propeller.push.downsample-test
(:require [clojure.test :as t]
[propeller.utils :as u]
[propeller.simplification :as s]
[propeller.downsample :as ds]
[propeller.hyperselection :as hs]))
(t/deftest assign-indices-to-data-test
(t/testing "assign-indices-to-data"
(t/testing "should return a map of the same length"
(t/is (= (count (ds/assign-indices-to-data (range 10))) 10))
(t/is (= (count (ds/assign-indices-to-data (range 0))) 0)))
(t/testing "should return a map where each element has an index key"
(t/is (every? #(:index %) (ds/assign-indices-to-data (map #(assoc {} :input %) (range 10))))))
(t/testing "should return distinct indices"
(t/is (= (map #(:index %) (ds/assign-indices-to-data (range 10))) (range 10))))))
(t/deftest select-downsample-random-test
(t/testing "select-downsample-random"
(t/testing "should select the correct amount of elements"
(t/is (= (count (ds/select-downsample-random (range 10) {:downsample-rate 0.1})) 1))
(t/is (= (count (ds/select-downsample-random (range 10) {:downsample-rate 0.2})) 2))
(t/is (= (count (ds/select-downsample-random (range 10) {:downsample-rate 0.5})) 5)))
(t/testing "should not return duplicate items (when called with set of numbers)"
(t/is (= (count (set (ds/select-downsample-random (range 10) {:downsample-rate 0.1}))) 1))
(t/is (= (count (set (ds/select-downsample-random (range 10) {:downsample-rate 0.2}))) 2))
(t/is (= (count (set (ds/select-downsample-random (range 10) {:downsample-rate 0.5}))) 5)))
(t/testing "should round down the number of elements selected if not whole"
(t/is (= (count (ds/select-downsample-random (range 3) {:downsample-rate 0.5})) 1))
(t/is (= (count (ds/select-downsample-random (range 1) {:downsample-rate 0.5})) 0)))
(t/testing "should not return more elements than available"
(t/is (= (count (ds/select-downsample-random (range 10) {:downsample-rate 2})) 10))
(t/is (= (count (ds/select-downsample-random (range 10) {:downsample-rate 1.5})) 10)))))
(t/deftest get-distance-between-cases-test
(t/testing "get-distance-between-cases"
(t/testing "should return correct distance"
(t/is (= 3 (ds/get-distance-between-cases '((0 1 1) (0 1 1) (1 0 1)) 0 1))))
(t/testing "should return 0 for the distance of a case to itself"
(t/is (= 0 (ds/get-distance-between-cases '((0 1 1) (0 1 1) (1 0 1)) 0 0))))
(t/testing "should work for non binary values (0 is solved)"
(t/is (= 1 (ds/get-distance-between-cases '((0 2 2) (0 2 2) (1 0 50)) 1 2))))
(t/testing "should return the max distance if one of the cases does not exist"
(t/is (= 3 (ds/get-distance-between-cases '((0 1 1) (0 1 1) (1 0 1)) 0 4))))))
(t/deftest merge-map-lists-at-index-test
(t/testing "merge-map-lists-at-index"
(t/testing "works properly"
(t/is (= '({:index 0 :a 3 :b 2} {:index 1 :a 2 :b 3}) (ds/merge-map-lists-at-index '({:index 0 :a 3 :b 2} {:index 1 :a 1 :b 2}) '({:index 1 :a 2 :b 3})))))
(t/testing "doesn't change big list if no indices match"
(t/is (= '({:index 0 :a 3 :b 2} {:index 1 :a 1 :b 2}) (ds/merge-map-lists-at-index '({:index 0 :a 3 :b 2} {:index 1 :a 1 :b 2}) '({:index 3 :a 2 :b 3})))))
(t/testing "doesn't fail on empty list"
(t/is (= '() (ds/merge-map-lists-at-index '() '()))))
(t/testing "shouldn't fail merging non-empty with empty"
(t/is (= '({:index 0 :a 3 :b 2} {:index 1 :a 1 :b 2}) (ds/merge-map-lists-at-index '({:index 0 :a 3 :b 2} {:index 1 :a 1 :b 2}) '()))))))
(t/deftest update-at-indices-test
(t/testing "update-at-indices"
(t/testing "should update at correct indices"
(t/is (= (ds/update-at-indices [1 2 3 4] [5] [0]) [5 2 3 4]))
(t/is (= (ds/update-at-indices [1 2 3 4] [5] [0]) [5 2 3 4])))
(t/testing "should update nothing if index list is empty"
(t/is (= (ds/update-at-indices [6 5 4 0 0] [] []) [6 5 4 0 0])))
(t/testing "should update nothing if index list is out of bounds"
(t/is (= (ds/update-at-indices [6 5 4 0 0] [4 5 1] [-1 5 6]) [6 5 4 0 0])))
(t/testing "should update only when indices are available (length mismatch)"
(t/is (= (ds/update-at-indices [6 5 4 0 0] [1 2 3 4] [0 1]) [1 2 4 0 0])))
(t/testing "should not care about index order"
(t/is (= (ds/update-at-indices [6 5 4 0 0] [2 1] [1 0]) [1 2 4 0 0])))
(t/testing "should work when input is a list"
(t/is (= (ds/update-at-indices '(6 5 4 0 0) '(2 1) '(1 0)) [1 2 4 0 0])))))
(t/deftest ids-types-test
(t/testing "replace-close-zero-with-zero"
(t/testing "should replace the close to zero values with zero"
(t/is (= (ds/replace-close-zero-with-zero '(0.1 2 3 4 0.1 2 3 4) 0.2) '(0 2 3 4 0 2 3 4)))
(t/is (= (ds/replace-close-zero-with-zero '(0.1 0.1) 0.0) '(0.1 0.1)))
(t/is (= (ds/replace-close-zero-with-zero '(100 100 200) 100) '(0 0 200))))))
(t/deftest update-case-distances-test
(t/testing "update-case-distances"
(t/testing "should update correctly when fewer errors than all"
(t/is (= (ds/update-case-distances '({:errors (0 0)} {:errors (0 0)})
'({:index 3 :distances [2 2 2 2 2]} {:index 4 :distances [2 2 2 2 2]})
'({:index 0 :distances [2 2 2 2 2]} {:index 1 :distances [2 2 2 2 2]} {:index 2 :distances [2 2 2 2 2]} {:index 3 :distances [2 2 2 2 2]} {:index 4 :distances [2 2 2 2 2]})
:solved)
'({:index 0 :distances [2 2 2 2 2]} {:index 1 :distances [2 2 2 2 2]} {:index 2 :distances [2 2 2 2 2]}
{:index 3 :distances [2 2 2 0 0]} {:index 4 :distances [2 2 2 0 0]}))))
(t/testing "should update correctly when same errors as all"
(t/is (= (ds/update-case-distances '({:errors (0 0 0 0 0)} {:errors (0 0 0 0 0)})
'({:index 0 :distances [2 2 2 2 2]} {:index 1 :distances [2 2 2 2 2]} {:index 2 :distances [2 2 2 2 2]} {:index 3 :distances [2 2 2 2 2]} {:index 4 :distances [2 2 2 2 2]})
'({:index 0 :distances [2 2 2 2 2]} {:index 1 :distances [2 2 2 2 2]} {:index 2 :distances [2 2 2 2 2]} {:index 3 :distances [2 2 2 2 2]} {:index 4 :distances [2 2 2 2 2]})
:solved)
'({:index 0 :distances [0 0 0 0 0]} {:index 1 :distances [0 0 0 0 0]} {:index 2 :distances [0 0 0 0 0]}
{:index 3 :distances [0 0 0 0 0]} {:index 4 :distances [0 0 0 0 0]}))))
(t/testing "should update correctly for elite/not-elite"
(t/is (= (ds/update-case-distances '({:errors (1 1 1 2 2)} {:errors (2 2 2 1 1)})
'({:index 0 :distances [2 2 2 2 2]} {:index 1 :distances [2 2 2 2 2]} {:index 2 :distances [2 2 2 2 2]} {:index 3 :distances [2 2 2 2 2]} {:index 4 :distances [2 2 2 2 2]})
'({:index 0 :distances [2 2 2 2 2]} {:index 1 :distances [2 2 2 2 2]} {:index 2 :distances [2 2 2 2 2]} {:index 3 :distances [2 2 2 2 2]} {:index 4 :distances [2 2 2 2 2]})
:elite)
'({:index 0 :distances [0 0 0 2 2]} {:index 1 :distances [0 0 0 2 2]} {:index 2 :distances [0 0 0 2 2]}
{:index 3 :distances [2 2 2 0 0]} {:index 4 :distances [2 2 2 0 0]})))
)))
(t/deftest case-maxmin-test
(t/testing "case-maxmin selects correct downsample"
(let [selected (ds/select-downsample-maxmin
'({:input1 [0] :output1 [10] :index 0 :distances [0 5 0 0 0]}
{:input1 [1] :output1 [11] :index 1 :distances [5 0 5 5 5]}
{:input1 [2] :output1 [12] :index 2 :distances [0 5 0 0 0]}
{:input1 [3] :output1 [13] :index 3 :distances [0 5 0 0 0]}
{:input1 [4] :output1 [14] :index 4 :distances [0 5 0 0 0]})
{:downsample-rate 0.4})]
(prn {:selected selected})
(t/is (or (= (:index (first selected)) 1) (= (:index (second selected)) 1))))))
(t/deftest case-maxmin-adaptive
(t/testing "case-maxmin-adaptive selects correct downsample simple"
(let [selected (ds/select-downsample-maxmin-adaptive
'({:input1 [0] :output1 [10] :index 0 :distances [0 5 0 0 0]}
{:input1 [1] :output1 [11] :index 1 :distances [5 0 5 5 5]}
{:input1 [2] :output1 [12] :index 2 :distances [0 5 0 0 0]}
{:input1 [3] :output1 [13] :index 3 :distances [0 5 0 0 0]}
{:input1 [4] :output1 [14] :index 4 :distances [0 5 0 0 0]})
{:case-delta 0})]
(prn {:selected selected})
(t/is (or (= (:index (first selected)) 1) (= (:index (second selected)) 1)))
(t/is (= 2 (count selected)))))
(t/testing "case-maxmin-adaptive selects correct downsample when all identical"
(let [selected (ds/select-downsample-maxmin-adaptive
'({:input1 [0] :output1 [10] :index 0 :distances [0 0 0 0 0]}
{:input1 [1] :output1 [11] :index 1 :distances [0 0 0 0 0]}
{:input1 [2] :output1 [12] :index 2 :distances [0 0 0 0 0]}
{:input1 [3] :output1 [13] :index 3 :distances [0 0 0 0 0]}
{:input1 [4] :output1 [14] :index 4 :distances [0 0 0 0 0]})
{:case-delta 0})]
(prn {:selected selected})
(t/is (= 1 (count selected))))))
(t/deftest hyperselection-test
(let [parents1 '({:blah 3 :index 1} {:blah 3 :index 1}
{:blah 3 :index 1} {:blah 3 :index 2})
parents2 '({:plushy 2 :index 0} {:blah 3 :index 2}
{:blah 3 :index 3} {:index 4})
emptyparents '({:blah 1} {:blah 1} {:blah 1})]
(t/testing "sum-list-map-indices function works correctly"
(t/is (= {1 3, 2 1} (hs/sum-list-map-indices parents1)))
(t/is (= {0 1, 2 1, 3 1, 4 1} (hs/sum-list-map-indices parents2))))
(t/testing "ordered-freqs function works correctly"
(t/is (= '(3 1) (hs/ordered-freqs (hs/sum-list-map-indices parents1))))
(t/is (= '(1 1 1 1) (hs/ordered-freqs (hs/sum-list-map-indices parents2)))))
(t/testing "hyperselection-track works correctly"
(t/is (= '(0.75 0.25) (hs/hyperselection-track parents1)))
(t/is (= '(0.25 0.25 0.25 0.25) (hs/hyperselection-track parents2))))
(t/testing "reindex-pop works correctly"
(t/is (= '({:blah 3 :index 0} {:blah 3 :index 1}
{:blah 3 :index 2} {:blah 3 :index 3}) (hs/reindex-pop parents1)))
(t/is (= '({:plushy 2 :index 0} {:blah 3 :index 1}
{:blah 3 :index 2} {:index 3}) (hs/reindex-pop parents2)))
(t/is (= '({:blah 1 :index 0} {:blah 1 :index 1} {:blah 1 :index 2}) (hs/reindex-pop emptyparents))))))

View File

@ -0,0 +1,23 @@
(ns propeller.selection-test
(:require [clojure.test :as t]
[propeller.selection :as s]))
(t/deftest roulette-selection-test
(t/testing "fitness proportionate selection"
(t/testing "should correctly define the probabilities of selection"
(t/is (let [ret (s/fitness-proportionate-selection '({:index 0 :total-error 0}
{:index 1 :total-error 1}
{:index 2 :total-error 1}
{:index 3 :total-error 1}) {:empty :argmap})]
(case (:index ret)
0 (= (:fitness ret) 1) ;if we selected index 0, check that fitness is correctly calculated to 1
(= (float (:fitness ret)) 0.5)
))))
(t/testing "should always return the same individual if there is only one"
(t/testing "desipte it having bad error"
(t/is (= (:index (s/fitness-proportionate-selection '({:index 99 :total-error 109012390123}) {:empty :argmap}))
99)))
(t/testing "when it has low error"
(t/is (= (:index (s/fitness-proportionate-selection '({:index 22 :total-error 0}) {:empty :argmap}))
22))))))

View File

@ -3,6 +3,22 @@
[propeller.tools.metrics :as m] [propeller.tools.metrics :as m]
[propeller.tools.math :as a])) [propeller.tools.math :as a]))
(t/deftest argmax-test
(t/is (= (m/argmax '(1 2 3 4)) 3))
(t/is (= 1 (nth '(1 1 1 1) (m/argmax '(1 1 1 1)))))
(t/is (= 3 (nth '(1 3 3 1) (m/argmax '(1 3 3 1)))))
(t/is (= 0 (m/argmax '(1)))))
(t/deftest mean-of-colls-test
(t/is (= (m/mean-of-colls '((1 2 3 4) (4 3 2 1))) '(2.5 2.5 2.5 2.5)))
(t/is (= (m/mean-of-colls '((1 2 3) (4 3 2 1))) '(2.5 2.5 2.5)))
(t/is (= (m/mean-of-colls '((1))) '(1.0))))
(t/deftest min-of-colls-test
(t/is (= (m/min-of-colls '((1 2 3 4) (4 3 2 1))) '(1 2 2 1)))
(t/is (= (m/min-of-colls '((1 2 3) (4 3 2 1))) '(1 2 2)))
(t/is (= (m/min-of-colls '((1))) '(1))))
(t/deftest mean-test (t/deftest mean-test
(t/is (= (m/mean '(1 2 3 4)) 2.5)) (t/is (= (m/mean '(1 2 3 4)) 2.5))
(t/is (= (m/mean '()) 0))) (t/is (= (m/mean '()) 0)))
@ -13,9 +29,18 @@
;(t/is (= (m/median '()) 0.0)) ;(t/is (= (m/median '()) 0.0))
) )
(t/deftest argmins-test
(t/is (= (m/argmins '(1 2 3 4)) '(0)))
(t/is (= (m/argmins '(1 1 3 4)) '(0 1)))
(t/is (= (m/argmins '()) '()))
(t/is (= (m/argmins '(3 4 5 6 6 6)) '(0)))
(t/is (= (m/argmins '(6 4 5 6 6 6)) '(1)))
(t/is (= (m/argmins '(0 4 5 0 0 0)) '(0 3 4 5))))
(t/deftest levenshtein-distance-test (t/deftest levenshtein-distance-test
(t/is (= (m/levenshtein-distance "kitten" "sipping") 5)) (t/is (= (m/levenshtein-distance "kitten" "sipping") 5))
(t/is (= (m/levenshtein-distance "" "hello")) 5)) (t/is (= (m/levenshtein-distance "" "hello") 5)))
(t/deftest sequence-similarity-test (t/deftest sequence-similarity-test
(t/is (a/approx= (m/sequence-similarity "kitten" "sipping") 0.2857 0.001)) (t/is (a/approx= (m/sequence-similarity "kitten" "sipping") 0.2857 0.001))

View File

@ -1,7 +1,9 @@
(ns propeller.utils-test (ns propeller.utils-test
(:require [clojure.test :as t] (:require [clojure.test :as t]
[propeller.utils :as u] [propeller.utils :as u]
[propeller.simplification :as s])) [propeller.simplification :as s]
[propeller.downsample :as ds]
[propeller.hyperselection :as hs]))
(t/deftest first-non-nil-test (t/deftest first-non-nil-test
(t/is (= 1 (u/first-non-nil '(1 2 3)))) (t/is (= 1 (u/first-non-nil '(1 2 3))))
@ -22,57 +24,62 @@
(t/deftest random-instruction-test (t/deftest random-instruction-test
(t/is (t/is
(letfn [(instruct [] 1)] (letfn [(instruct [] 1)]
(let [test (u/random-instruction [instruct 2])] (let [test (u/random-instruction [instruct 2])]
(if (= 1 test) (if (= 1 test)
true true
(= 2 test)))))) (= 2 test))))))
(t/deftest count-points-test (t/deftest count-points-test
(t/is (= 6 (u/count-points '(:a :b (:c :d))))) (t/is (= 6 (u/count-points '(:a :b (:c :d)))))
(t/is (= 1 (u/count-points '()))) (t/is (= 1 (u/count-points '())))
(t/is (= 2 (u/count-points '(:a))))) (t/is (= 2 (u/count-points '(:a)))))
(t/testing "choose-random-k" (t/deftest choose-random-k-test
(t/testing "should return indices that are a member of the original array" (t/testing "choose-random-k"
(t/is (every? identity (map #(contains? (set (range 10)) %) (s/choose-random-k 3 (range 10)))))) (t/testing "should return indices that are a member of the original array"
(t/testing "should return a list of size k" (t/is (every? identity (map #(contains? (set (range 10)) %) (s/choose-random-k 3 (range 10))))))
(t/is (= (count (s/choose-random-k 7 (range 10))) 7)))) (t/testing "should return a list of size k"
(t/is (= (count (s/choose-random-k 7 (range 10))) 7)))))
(t/testing "delete-at-indices"
(t/testing "should actually remove indicated items"
(t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 3) '(:hi0 :hi1 :hi2 :hi3)))))
(t/testing "should work with numerical indices"
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0) '(:hi0 :hi1 :hi2 :hi3)))))
(t/testing "should not delete anything for index out of bounds"
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 10) '(:hi0 :hi1 :hi2 :hi3))))
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 -10) '(:hi0 :hi1 :hi2 :hi3))))
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(-0 -10) '(:hi0 :hi1 :hi2 :hi3)))))
(t/testing "should only delete at single index once"
(t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 0 0 0 3 3 3) '(:hi0 :hi1 :hi2 :hi3)))))
(t/testing "should return empty list when deleting from empty list"
(t/is (= '() (s/delete-at-indices '(0) '()))))
(t/testing "should be able to delete at arbitrary indices"
(t/is (= (count (s/delete-at-indices (s/choose-random-k 3 (range 10)) (range 10))) 7))))
(t/testing "delete-random-k" (t/deftest delete-at-indices-test
(t/testing "should remove the correct amount of items" (t/testing "delete-at-indices"
(t/is (= (count (s/delete-k-random 3 (range 10))) 7)) (t/testing "should actually remove indicated items"
(t/is (= (count (s/delete-k-random 10 (range 10))) 0)) (t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 3) '(:hi0 :hi1 :hi2 :hi3)))))
(t/is (= (count (s/delete-k-random 0 (range 10))) 10))) (t/testing "should work with numerical indices"
(t/testing "should not fail if k >> size of collection" (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0) '(:hi0 :hi1 :hi2 :hi3)))))
(t/is (= (count (s/delete-k-random 300 (range 10))) 0)) (t/testing "should not delete anything for index out of bounds"
(t/is (= (s/delete-k-random 300 '(:hi1 :hi2 :hi3)) '()))) (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 10) '(:hi0 :hi1 :hi2 :hi3))))
(t/testing "should not fail if the collection is empty" (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 -10) '(:hi0 :hi1 :hi2 :hi3))))
(t/is (= (count (s/delete-k-random 300 '())) 0)) (t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(-0 -10) '(:hi0 :hi1 :hi2 :hi3)))))
(t/is (= (count (s/delete-k-random 0 '())) 0))) (t/testing "should only delete at single index once"
(t/testing "should maintain order of the remaining items" (t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 0 0 0 3 3 3) '(:hi0 :hi1 :hi2 :hi3)))))
(t/is (apply < (s/delete-k-random 3 (range 10)))))) (t/testing "should return empty list when deleting from empty list"
(t/is (= '() (s/delete-at-indices '(0) '()))))
(t/testing "should be able to delete at arbitrary indices"
(t/is (= (count (s/delete-at-indices (s/choose-random-k 3 (range 10)) (range 10))) 7)))))
(t/testing "auto-simplify-plushy" (t/deftest delete-random-k-test
(t/testing "should handle having an empty plushy" (t/testing "delete-random-k"
(t/is (= (s/auto-simplify-plushy '() (fn [argmap data plushy] 0) {:simplification-steps 100 :simplification-k 4 :simplification-verbose? false}) '()))) (t/testing "should remove the correct amount of items"
(let [plushy '(:exec_dup 1 :integer_add close :in1 :integer_add 0 :in1 :in1 :integer_mult :integer_add)] (t/is (= (count (s/delete-k-random 3 (range 10))) 7))
(t/testing "should decrease size of plushy that always has perfect scores" (t/is (= (count (s/delete-k-random 10 (range 10))) 0))
(t/is (< (count (s/auto-simplify-plushy plushy (fn [argmap data plushy] 0) {:simplification-steps 100 :simplification-k 4 :simplification-verbose? false})) (count plushy))) (t/is (= (count (s/delete-k-random 0 (range 10))) 10)))
(t/is (< (count (s/auto-simplify-plushy plushy (fn [argmap data plushy] 0) {:simplification-steps 100 :simplification-k 10 :simplification-verbose? false})) (count plushy)))))) (t/testing "should not fail if k >> size of collection"
(t/is (= (count (s/delete-k-random 300 (range 10))) 0))
(t/is (= (s/delete-k-random 300 '(:hi1 :hi2 :hi3)) '())))
(t/testing "should not fail if the collection is empty"
(t/is (= (count (s/delete-k-random 300 '())) 0))
(t/is (= (count (s/delete-k-random 0 '())) 0)))
(t/testing "should maintain order of the remaining items"
(t/is (apply < (s/delete-k-random 3 (range 10)))))))
(t/deftest auto-simplify-plushy-test
(t/testing "auto-simplify-plushy"
(t/testing "should handle having an empty plushy"
(t/is (= (s/auto-simplify-plushy '() (fn [argmap data plushy] 0) {:simplification-steps 100 :simplification-k 4 :simplification-verbose? false}) '())))
(let [plushy '(:exec_dup 1 :integer_add close :in1 :integer_add 0 :in1 :in1 :integer_mult :integer_add)]
(t/testing "should decrease size of plushy that always has perfect scores"
(t/is (< (count (s/auto-simplify-plushy plushy (fn [argmap data plushy] 0) {:simplification-steps 100 :simplification-k 4 :simplification-verbose? false})) (count plushy)))
(t/is (< (count (s/auto-simplify-plushy plushy (fn [argmap data plushy] 0) {:simplification-steps 100 :simplification-k 10 :simplification-verbose? false})) (count plushy)))))))