Merge pull request #60 from ryanboldi/main
down-sampling code, autosimplification code
This commit is contained in:
commit
e7130da06f
1
.gitignore
vendored
1
.gitignore
vendored
@ -33,4 +33,5 @@ node_modules/
|
|||||||
# https://github.com/thelmuth/program-synthesis-benchmark-datasets
|
# https://github.com/thelmuth/program-synthesis-benchmark-datasets
|
||||||
/data
|
/data
|
||||||
**/.DS_Store
|
**/.DS_Store
|
||||||
|
*.edn
|
||||||
/.cpcache/
|
/.cpcache/
|
||||||
|
@ -6,7 +6,8 @@
|
|||||||
:dependencies [[org.clojure/clojure "1.10.0"]
|
:dependencies [[org.clojure/clojure "1.10.0"]
|
||||||
[org.clojure/clojurescript "1.9.946"]
|
[org.clojure/clojurescript "1.9.946"]
|
||||||
[org.clojure/test.check "1.1.0"]
|
[org.clojure/test.check "1.1.0"]
|
||||||
[net.clojars.schneau/psb2 "1.1.1"]]
|
[net.clojars.schneau/psb2 "1.1.1"]
|
||||||
|
[org.clojure/data.csv "1.0.1"]]
|
||||||
:profiles {:profiling {:dependencies [[com.clojure-goes-fast/clj-async-profiler "0.5.1"]]}}
|
:profiles {:profiling {:dependencies [[com.clojure-goes-fast/clj-async-profiler "0.5.1"]]}}
|
||||||
:main ^:skip-aot propeller.core
|
:main ^:skip-aot propeller.core
|
||||||
:repl-options {:init-ns propeller.core}
|
:repl-options {:init-ns propeller.core}
|
||||||
|
56
src/docs_src/Downsampling_training_data.md
Normal file
56
src/docs_src/Downsampling_training_data.md
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
Downsampling the Training Data
|
||||||
|
=
|
||||||
|
|
||||||
|
Downsampling is a very simple way to improve the efficiency of your evolutionary runs. It might allow for deeper evolutionary searches and a greater success rate.
|
||||||
|
|
||||||
|
Using Downsampled selection with propeller is easy:
|
||||||
|
|
||||||
|
Set the :parent-selection argument to whichever selection strategy you would like, and set the :downsample? argument to true as follows:
|
||||||
|
|
||||||
|
```clojure
|
||||||
|
lein run -m propeller.problems.simple-regression :parent-selection :lexicase :downsample? true <required downsampling args here>
|
||||||
|
```
|
||||||
|
|
||||||
|
The number of evaluations is held constant when comparing to a full training set run, so set the :max-generations to a number of generations that you would have gone to using a **full** sample.
|
||||||
|
|
||||||
|
## Downsample Functions
|
||||||
|
|
||||||
|
In this repository, you have access to 3 different downsampling functions. These are the methods used to take a down-sample from the entire training set.
|
||||||
|
|
||||||
|
To use them, add the argument ```:ds-function``` followed by which function you would like to us
|
||||||
|
|
||||||
|
The list is
|
||||||
|
- ```:case-maxmin``` - This is the method used for informed down-sampled lexicase selection
|
||||||
|
- ```:case-maxmin-auto``` - This method automatically determines the downsample size
|
||||||
|
- ```:case-rand```- Random Sampling
|
||||||
|
|
||||||
|
### Using ```:case-maxmin```:
|
||||||
|
|
||||||
|
In order to use regular informed down-sampled selection, you must specify a few things:
|
||||||
|
- ```:downsample-rate```- This is the $r$ parameter: what proportion of the full sample should be in the down-sample $\in [0,1]$
|
||||||
|
- ```:ds-parent-rate``` - This is the $\rho$ parameter: what proportion of parents are used to evaluate case distances $\in [0,1]$
|
||||||
|
- ```:ds-parent-gens``` - This is the $k$ parameter: How many generations in between parent evaluations for distances $\in \{1,2,3, \dots\}$
|
||||||
|
|
||||||
|
### Using ```:case-maxmin-auto```:
|
||||||
|
|
||||||
|
In order to use automatic informed down-sampled selection, you must specify a few things:
|
||||||
|
- ```:case-delta ```- This is the $\Delta$ parameter: How close can the farthest case be from its closest case before we stop adding to the down-sample
|
||||||
|
- ```:ids-type``` - Either ```:elite``` or ```:solved ``` - Specifies whether we are using elite/not-elite or solved/not-solved as our binary-fication of case solve vectors.
|
||||||
|
- ```:ds-parent-rate``` - This is the $\rho$ parameter: what proportion of parents are used to evaluate case distances $\in [0,1]$
|
||||||
|
- ```:ds-parent-gens``` - This is the $k$ parameter: How many generations in between parent evaluations for distances $\in \{1,2,3, \dots\}$
|
||||||
|
|
||||||
|
### Using ```:case-rand```:
|
||||||
|
|
||||||
|
In order to use regular randomly down-sampled selection, you must specify a few things:
|
||||||
|
- ```:downsample-rate```- This is the $r$ parameter: what proportion of the full sample should be in the down-sample $\in [0,1]$
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Here's an example of running informed downsampled lexicase selection with $r=0.1$, $\rho=0.01$ and $k=100$ on the simple classification problem:
|
||||||
|
|
||||||
|
```clojure
|
||||||
|
lein run -m propeller.problems.simple-classification :parent-selection :lexicase :downsample? true :ds-function :case-maxmin :downsample-rate 0.1 :max-generations 300 :ds-parent-rate 0.01 :ds-parent-gens 100
|
||||||
|
```
|
||||||
|
|
132
src/propeller/downsample.cljc
Normal file
132
src/propeller/downsample.cljc
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
(ns propeller.downsample
|
||||||
|
(:require [propeller.tools.math :as math]
|
||||||
|
[propeller.tools.metrics :as metrics]
|
||||||
|
[propeller.utils :as utils]))
|
||||||
|
|
||||||
|
(defn assign-indices-to-data
|
||||||
|
"assigns an index to each training case in order to differentiate them when downsampling"
|
||||||
|
[training-data]
|
||||||
|
(map (fn [data-map index]
|
||||||
|
(let [data-m (if (map? data-map) data-map (assoc {} :data data-map))] ;if data is not in a map, make it one
|
||||||
|
(assoc data-m :index index)))
|
||||||
|
training-data (range (count training-data))))
|
||||||
|
|
||||||
|
(defn initialize-case-distances
|
||||||
|
[{:keys [training-data population-size]}]
|
||||||
|
(map #(assoc % :distances (vec (repeat (count training-data) population-size))) training-data))
|
||||||
|
|
||||||
|
(defn select-downsample-random
|
||||||
|
"Selects a downsample from the training cases and returns it"
|
||||||
|
[training-data {:keys [downsample-rate]}]
|
||||||
|
(take (int (* downsample-rate (count training-data))) (shuffle training-data)))
|
||||||
|
|
||||||
|
(defn select-downsample-maxmin
|
||||||
|
"selects a downsample that has it's cases maximally far away by sequentially
|
||||||
|
adding cases to the downsample that have their closest case maximally far away"
|
||||||
|
[training-data {:keys [downsample-rate]}]
|
||||||
|
(let [shuffled-cases (shuffle training-data)
|
||||||
|
goal-size (int (* downsample-rate (count training-data)))]
|
||||||
|
(loop [new-downsample (conj [] (first shuffled-cases))
|
||||||
|
cases-to-pick-from (rest shuffled-cases)]
|
||||||
|
(if (>= (count new-downsample) goal-size)
|
||||||
|
new-downsample
|
||||||
|
(let [tournament cases-to-pick-from
|
||||||
|
min-case-distances (metrics/min-of-colls
|
||||||
|
(map (fn [distance-list]
|
||||||
|
(utils/filter-by-index distance-list (map #(:index %) tournament)))
|
||||||
|
(map #(:distances %) new-downsample)))
|
||||||
|
selected-case-index (metrics/argmax min-case-distances)]
|
||||||
|
(recur (conj new-downsample (nth tournament selected-case-index))
|
||||||
|
(shuffle (utils/drop-nth selected-case-index tournament))))))))
|
||||||
|
|
||||||
|
(defn select-downsample-maxmin-adaptive
|
||||||
|
"selects a downsample that has it's cases maximally far away by sequentially
|
||||||
|
adding cases to the downsample that have their closest case maximally far away
|
||||||
|
automatically stops when the maximum minimum distance is below delta"
|
||||||
|
[training-data {:keys [case-delta]}]
|
||||||
|
(let [shuffled-cases (shuffle training-data)]
|
||||||
|
(loop [new-downsample (conj [] (first shuffled-cases))
|
||||||
|
cases-to-pick-from (rest shuffled-cases)]
|
||||||
|
(let [tournament cases-to-pick-from
|
||||||
|
min-case-distances (metrics/min-of-colls
|
||||||
|
(map (fn [distance-list]
|
||||||
|
(utils/filter-by-index distance-list (map #(:index %) tournament)))
|
||||||
|
(map #(:distances %) new-downsample)))
|
||||||
|
selected-case-index (metrics/argmax min-case-distances)]
|
||||||
|
(if (or (= 0 (count tournament)) (<= (apply max min-case-distances) case-delta))
|
||||||
|
new-downsample
|
||||||
|
(recur (conj new-downsample (nth tournament selected-case-index))
|
||||||
|
(shuffle (utils/drop-nth selected-case-index tournament))))))))
|
||||||
|
|
||||||
|
(defn get-distance-between-cases
|
||||||
|
"returns the distance between two cases given a list of individual error vectors, and the index these
|
||||||
|
cases exist in the error vector. Only makes the distinction between zero and nonzero errors"
|
||||||
|
[error-lists case-index-1 case-index-2]
|
||||||
|
(if (or (< (count (first error-lists)) case-index-1)
|
||||||
|
(< (count (first error-lists)) case-index-2)
|
||||||
|
(neg? case-index-1) (neg? case-index-2))
|
||||||
|
(count error-lists) ;return the max distance
|
||||||
|
(let [errors-1 (map #(nth % case-index-1) error-lists)
|
||||||
|
errors-2 (map #(nth % case-index-2) error-lists)]
|
||||||
|
;compute distance between errors-1 and errors-2
|
||||||
|
(reduce + (map (fn [e1 e2] (math/abs (- (math/step e1) (math/step e2)))) errors-1 errors-2)))))
|
||||||
|
|
||||||
|
(defn update-at-indices
|
||||||
|
"merges two vectors at the indices provided by a third vector"
|
||||||
|
[big-vec small-vec indices]
|
||||||
|
(->> big-vec
|
||||||
|
(map-indexed (fn [idx itm] (let [index (.indexOf indices idx)]
|
||||||
|
(if (not= -1 index) (nth small-vec index) itm))))
|
||||||
|
vec))
|
||||||
|
|
||||||
|
(defn merge-map-lists-at-index
|
||||||
|
"merges two lists of maps, replacing the maps in the big
|
||||||
|
list with their corresponding (based on index) maps in the small list"
|
||||||
|
[big-list small-list]
|
||||||
|
(map
|
||||||
|
#(let [corresponding-small (some (fn [c] (when (= (:index %) (:index c)) c)) small-list)]
|
||||||
|
(if (nil? corresponding-small) % corresponding-small))
|
||||||
|
big-list))
|
||||||
|
|
||||||
|
(defn replace-mins-with-zero
|
||||||
|
"replaces the minimum value(s) in a list with zero"
|
||||||
|
[coll]
|
||||||
|
(if (empty? coll)
|
||||||
|
'()
|
||||||
|
(let [m (apply min coll)]
|
||||||
|
(map #(if (= m %) 0 %) coll))))
|
||||||
|
|
||||||
|
(defn replace-close-zero-with-zero
|
||||||
|
"replaces values within a delta of zero with zero, used for regression problems"
|
||||||
|
[coll delta]
|
||||||
|
(map #(if (>= delta %) 0 %) coll))
|
||||||
|
|
||||||
|
(defn convert-to-elite-error
|
||||||
|
"converts a set of errors into a list where all the elite errors are replaced with 0s so that we can use
|
||||||
|
it in the selection of down-samples with elite/not-elite selection"
|
||||||
|
[errors]
|
||||||
|
(map #(replace-mins-with-zero %) errors))
|
||||||
|
|
||||||
|
(defn convert-to-soft-error
|
||||||
|
[errors delta]
|
||||||
|
(map #(replace-close-zero-with-zero % delta) errors))
|
||||||
|
|
||||||
|
(defn update-case-distances
|
||||||
|
"updates the case distance field of training-data list, should be called after evaluation of individuals
|
||||||
|
evaluated-pop should be a list of individuals that all have the :errors field with a list of this
|
||||||
|
individuals performance on the each case in the training-data, in order. ids-type is :elite to use elite/not-elite, :soft to consider near solves, and :solved to use solve/not-solved"
|
||||||
|
([evaluated-pop ds-data training-data ids-type]
|
||||||
|
(update-case-distances evaluated-pop ds-data training-data ids-type 0)) ; default solution threshold is 0, only used if ids-type is :soft
|
||||||
|
([evaluated-pop ds-data training-data ids-type solution-threshold]
|
||||||
|
(let [ds-indices (map #(:index %) ds-data)
|
||||||
|
errors (map #(:errors %) evaluated-pop)
|
||||||
|
corr-errors (case ids-type
|
||||||
|
:elite (convert-to-elite-error errors)
|
||||||
|
:soft (convert-to-soft-error errors solution-threshold)
|
||||||
|
errors)] ;errors, including elite/not-elite distinction
|
||||||
|
(merge-map-lists-at-index
|
||||||
|
training-data (map-indexed
|
||||||
|
(fn [idx d-case] (update-in d-case
|
||||||
|
[:distances] #(update-at-indices
|
||||||
|
% (map (fn [other] (get-distance-between-cases corr-errors idx other))
|
||||||
|
(range (count ds-indices))) ds-indices))) ds-data)))))
|
@ -5,6 +5,8 @@
|
|||||||
[propeller.genome :as genome]
|
[propeller.genome :as genome]
|
||||||
[propeller.simplification :as simplification]
|
[propeller.simplification :as simplification]
|
||||||
[propeller.variation :as variation]
|
[propeller.variation :as variation]
|
||||||
|
[propeller.downsample :as downsample]
|
||||||
|
[propeller.hyperselection :as hyperselection]
|
||||||
[propeller.push.instructions.bool]
|
[propeller.push.instructions.bool]
|
||||||
[propeller.push.instructions.character]
|
[propeller.push.instructions.character]
|
||||||
[propeller.push.instructions.code]
|
[propeller.push.instructions.code]
|
||||||
@ -17,14 +19,16 @@
|
|||||||
[propeller.utils :as utils]))
|
[propeller.utils :as utils]))
|
||||||
|
|
||||||
(defn report
|
(defn report
|
||||||
"Reports information for each generation."
|
"Reports information each generation."
|
||||||
[pop generation argmap]
|
[evaluations pop generation argmap training-data]
|
||||||
(let [best (first pop)]
|
(let [best (first pop)]
|
||||||
(clojure.pprint/pprint
|
(clojure.pprint/pprint
|
||||||
(merge {:generation generation
|
(merge {:generation generation
|
||||||
:best-plushy (:plushy best)
|
:best-plushy (:plushy best)
|
||||||
:best-program (genome/plushy->push (:plushy best) argmap)
|
:best-program (genome/plushy->push (:plushy best) argmap)
|
||||||
:best-total-error (:total-error best)
|
:best-total-error (:total-error best)
|
||||||
|
:evaluations evaluations
|
||||||
|
:ds-indices (map #(:index %) training-data)
|
||||||
:best-errors (:errors best)
|
:best-errors (:errors best)
|
||||||
:best-behaviors (:behaviors best)
|
:best-behaviors (:behaviors best)
|
||||||
:genotypic-diversity (float (/ (count (distinct (map :plushy pop))) (count pop)))
|
:genotypic-diversity (float (/ (count (distinct (map :plushy pop))) (count pop)))
|
||||||
@ -47,59 +51,100 @@
|
|||||||
(defn gp
|
(defn gp
|
||||||
"Main GP loop."
|
"Main GP loop."
|
||||||
[{:keys [population-size max-generations error-function instructions
|
[{:keys [population-size max-generations error-function instructions
|
||||||
max-initial-plushy-size solution-error-threshold]
|
max-initial-plushy-size solution-error-threshold mapper ds-parent-rate ds-parent-gens dont-end ids-type downsample?]
|
||||||
:or {solution-error-threshold 0.0}
|
:or {solution-error-threshold 0.0
|
||||||
|
dont-end false
|
||||||
|
ds-parent-rate 0
|
||||||
|
ds-parent-gens 1
|
||||||
|
ids-type :solved ; :solved or :elite or :soft
|
||||||
|
downsample? false
|
||||||
|
;; The `mapper` will perform a `map`-like operation to apply a function to every individual
|
||||||
|
;; in the population. The default is `map` but other options include `mapv`, or `pmap`.
|
||||||
|
mapper #?(:clj pmap :cljs map)}
|
||||||
:as argmap}]
|
:as argmap}]
|
||||||
;;
|
;;
|
||||||
(prn {:starting-args (update (update argmap :error-function str) :instructions str)})
|
(prn {:starting-args (update (update argmap :error-function str) :instructions str)})
|
||||||
(println)
|
(println)
|
||||||
;;
|
;;
|
||||||
(loop [generation 0
|
(loop [generation 0
|
||||||
|
evaluations 0
|
||||||
population (utils/pmapallv
|
population (utils/pmapallv
|
||||||
(fn [_] {:plushy (let [plushy (genome/make-random-plushy instructions max-initial-plushy-size)]
|
(fn [_] {:plushy (let [plushy (genome/make-random-plushy instructions max-initial-plushy-size)]
|
||||||
(if (:diploid argmap)
|
(if (:diploid argmap)
|
||||||
(interleave plushy plushy)
|
(interleave plushy plushy)
|
||||||
plushy))})
|
plushy))}) (range population-size) argmap)
|
||||||
(range population-size)
|
indexed-training-data (downsample/assign-indices-to-data (downsample/initialize-case-distances argmap))]
|
||||||
argmap)] ;creates population of random plushys
|
(let [training-data (if downsample?
|
||||||
(let [evaluated-pop (sort-by :total-error
|
(case (:ds-function argmap)
|
||||||
(utils/pmapallv
|
:case-maxmin (downsample/select-downsample-maxmin indexed-training-data argmap)
|
||||||
(partial error-function argmap (:training-data argmap))
|
:case-maxmin-auto (downsample/select-downsample-maxmin-adaptive indexed-training-data argmap)
|
||||||
population
|
:case-rand (downsample/select-downsample-random indexed-training-data argmap)
|
||||||
argmap))
|
(do (prn {:error "Invalid Downsample Function"}) (downsample/select-downsample-random indexed-training-data argmap)))
|
||||||
|
indexed-training-data) ;defaults to full training set
|
||||||
|
parent-reps (if
|
||||||
|
(and downsample? ; if we are down-sampling
|
||||||
|
(zero? (mod generation ds-parent-gens))) ;every ds-parent-gens generations
|
||||||
|
(take (* ds-parent-rate (count population)) (shuffle population))
|
||||||
|
'()) ;else just empty list
|
||||||
|
; parent representatives for down-sampling
|
||||||
|
rep-evaluated-pop (if downsample?
|
||||||
|
(sort-by :total-error
|
||||||
|
(utils/pmapallv
|
||||||
|
(partial error-function argmap indexed-training-data)
|
||||||
|
parent-reps
|
||||||
|
argmap))
|
||||||
|
'())
|
||||||
|
evaluated-pop (sort-by :total-error
|
||||||
|
(utils/pmapallv
|
||||||
|
(partial error-function argmap training-data)
|
||||||
|
population
|
||||||
|
argmap))
|
||||||
best-individual (first evaluated-pop)
|
best-individual (first evaluated-pop)
|
||||||
|
best-individual-passes-ds (and downsample? (<= (:total-error best-individual) solution-error-threshold))
|
||||||
argmap (if (= (:parent-selection argmap) :epsilon-lexicase)
|
argmap (if (= (:parent-selection argmap) :epsilon-lexicase)
|
||||||
(assoc argmap :epsilons (selection/epsilon-list evaluated-pop))
|
(assoc argmap :epsilons (selection/epsilon-list evaluated-pop))
|
||||||
argmap)] ;adds :epsilons if using epsilon-lexicase
|
argmap)] ; epsilons
|
||||||
(if (:custom-report argmap)
|
(if (:custom-report argmap)
|
||||||
((:custom-report argmap) evaluated-pop generation argmap)
|
((:custom-report argmap) evaluations evaluated-pop generation argmap)
|
||||||
(report evaluated-pop generation argmap))
|
(report evaluations evaluated-pop generation argmap training-data))
|
||||||
|
;;did the indvidual pass all cases in ds?
|
||||||
|
(when best-individual-passes-ds
|
||||||
|
(prn {:semi-success-generation generation}))
|
||||||
(cond
|
(cond
|
||||||
;; Success on training cases is verified on testing cases
|
;; If either the best individual on the ds passes all training cases, or best individual on full sample passes all training cases
|
||||||
(<= (:total-error best-individual) solution-error-threshold)
|
;; We verify success on test cases and end evolution
|
||||||
(do (prn {:success-generation generation})
|
(if (or (and best-individual-passes-ds (<= (:total-error (error-function argmap indexed-training-data best-individual)) solution-error-threshold))
|
||||||
(prn {:total-test-error
|
(and (not downsample?)
|
||||||
(:total-error (error-function argmap (:testing-data argmap) best-individual))})
|
(<= (:total-error best-individual) solution-error-threshold)))
|
||||||
(when (:simplification? argmap)
|
(do (prn {:success-generation generation})
|
||||||
(let [simplified-plushy (simplification/auto-simplify-plushy
|
(prn {:total-test-error
|
||||||
(:plushy best-individual)
|
(:total-error (error-function argmap (:testing-data argmap) best-individual))})
|
||||||
error-function argmap)]
|
(when (:simplification? argmap)
|
||||||
(prn {:total-test-error-simplified
|
(let [simplified-plushy (simplification/auto-simplify-plushy (:plushy best-individual) error-function argmap)]
|
||||||
(:total-error (error-function argmap
|
(prn {:total-test-error-simplified (:total-error (error-function argmap (:testing-data argmap) (hash-map :plushy simplified-plushy)))})))
|
||||||
(:testing-data argmap)
|
(if dont-end false true))
|
||||||
(hash-map :plushy simplified-plushy)))})))
|
false)
|
||||||
#?(:clj (shutdown-agents)))
|
nil
|
||||||
;;
|
;;
|
||||||
(>= generation max-generations)
|
(and (not downsample?) (>= generation max-generations))
|
||||||
(do #?(:clj (shutdown-agents)))
|
nil
|
||||||
|
;;
|
||||||
|
(and downsample? (>= evaluations (* max-generations population-size (count indexed-training-data))))
|
||||||
|
nil
|
||||||
;;
|
;;
|
||||||
:else (recur (inc generation)
|
:else (recur (inc generation)
|
||||||
(if (:elitism argmap)
|
(+ evaluations (* population-size (count training-data)) ;every member evaluated on the current sample
|
||||||
(conj (utils/pmapallv (fn [_] (variation/new-individual evaluated-pop argmap))
|
(if (zero? (mod generation ds-parent-gens)) (* (count parent-reps) (- (count indexed-training-data) (count training-data))) 0) ; the parent-reps not evaluted already on down-sample
|
||||||
(range (dec population-size))
|
(if best-individual-passes-ds (- (count indexed-training-data) (count training-data)) 0)) ; if we checked for generalization or not
|
||||||
argmap)
|
(let [reindexed-pop (hyperselection/reindex-pop evaluated-pop)] ; give every individual an index for hyperselection loggin
|
||||||
(first evaluated-pop)) ;elitism maintains the most-fit individual
|
(hyperselection/log-hyperselection-and-ret
|
||||||
(utils/pmapallv (fn [_] (variation/new-individual evaluated-pop argmap))
|
(if (:elitism argmap)
|
||||||
(range population-size)
|
(conj (repeatedly (dec population-size) #(variation/new-individual reindexed-pop argmap))
|
||||||
argmap)))))))
|
(first reindexed-pop))
|
||||||
|
(repeatedly population-size ;need to count occurance of each parent, and reset IDs
|
||||||
|
#(variation/new-individual reindexed-pop argmap)))))
|
||||||
|
(if downsample?
|
||||||
|
(if (zero? (mod generation ds-parent-gens))
|
||||||
|
(downsample/update-case-distances rep-evaluated-pop indexed-training-data indexed-training-data ids-type (/ solution-error-threshold (count indexed-training-data))) ; update distances every ds-parent-gens generations
|
||||||
|
indexed-training-data)
|
||||||
|
indexed-training-data))))))
|
||||||
|
35
src/propeller/hyperselection.cljc
Normal file
35
src/propeller/hyperselection.cljc
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
(ns propeller.hyperselection)
|
||||||
|
|
||||||
|
(defn sum-list-map-indices
|
||||||
|
"sums a list of maps that have the :index property's index multiplicity"
|
||||||
|
[list-of-maps]
|
||||||
|
(->> list-of-maps
|
||||||
|
(map #(:index %))
|
||||||
|
frequencies))
|
||||||
|
|
||||||
|
(defn ordered-freqs
|
||||||
|
"takes a map from indices to frequencies, and returns a sorted list of the frequences is descencing order"
|
||||||
|
[freqs]
|
||||||
|
(->> freqs
|
||||||
|
vals
|
||||||
|
(sort >)))
|
||||||
|
|
||||||
|
(defn normalize-list-by-popsize [popsize lst]
|
||||||
|
(map #(double (/ % popsize)) lst))
|
||||||
|
|
||||||
|
(defn hyperselection-track
|
||||||
|
"outputs a normalized list of the hyperselection proportion for each parent"
|
||||||
|
[new-pop]
|
||||||
|
(->> new-pop
|
||||||
|
sum-list-map-indices
|
||||||
|
ordered-freqs
|
||||||
|
(normalize-list-by-popsize (count new-pop))))
|
||||||
|
|
||||||
|
(defn log-hyperselection-and-ret [new-pop]
|
||||||
|
(prn {:hyperselection (hyperselection-track new-pop)})
|
||||||
|
new-pop)
|
||||||
|
|
||||||
|
(defn reindex-pop
|
||||||
|
"assigns each member of the population a unique index before selection to track hyperselection"
|
||||||
|
[pop]
|
||||||
|
(map (fn [indiv index] (assoc indiv :index index)) pop (range (count pop))))
|
@ -65,6 +65,9 @@
|
|||||||
:error-function error-function
|
:error-function error-function
|
||||||
:training-data train-data
|
:training-data train-data
|
||||||
:testing-data test-data
|
:testing-data test-data
|
||||||
|
:case-t-size (count train-data)
|
||||||
|
:ds-parent-rate 0
|
||||||
|
:ds-parent-gens 1
|
||||||
:max-generations 300
|
:max-generations 300
|
||||||
:population-size 1000
|
:population-size 1000
|
||||||
:max-initial-plushy-size 250
|
:max-initial-plushy-size 250
|
||||||
|
@ -83,6 +83,9 @@
|
|||||||
:error-function error-function
|
:error-function error-function
|
||||||
:training-data train-data
|
:training-data train-data
|
||||||
:testing-data test-data
|
:testing-data test-data
|
||||||
|
:case-t-size (count train-data)
|
||||||
|
:ds-parent-rate 0
|
||||||
|
:ds-parent-gens 1
|
||||||
:max-generations 300
|
:max-generations 300
|
||||||
:population-size 1000
|
:population-size 1000
|
||||||
:max-initial-plushy-size 250
|
:max-initial-plushy-size 250
|
||||||
@ -92,4 +95,5 @@
|
|||||||
:umad-rate 0.1
|
:umad-rate 0.1
|
||||||
:variation {:umad 1.0 :crossover 0.0}
|
:variation {:umad 1.0 :crossover 0.0}
|
||||||
:elitism false}
|
:elitism false}
|
||||||
(apply hash-map (map #(if (string? %) (read-string %) %) args)))))
|
(apply hash-map (map #(if (string? %) (read-string %) %) args))))
|
||||||
|
(#?(:clj shutdown-agents)))
|
||||||
|
@ -134,6 +134,9 @@
|
|||||||
:error-function error-function
|
:error-function error-function
|
||||||
:training-data train-data
|
:training-data train-data
|
||||||
:testing-data test-data
|
:testing-data test-data
|
||||||
|
:case-t-size (count train-data)
|
||||||
|
:ds-parent-rate 0
|
||||||
|
:ds-parent-gens 1
|
||||||
:max-generations 300
|
:max-generations 300
|
||||||
:population-size 1000
|
:population-size 1000
|
||||||
:max-initial-plushy-size 250
|
:max-initial-plushy-size 250
|
||||||
|
@ -66,6 +66,9 @@
|
|||||||
:error-function error-function
|
:error-function error-function
|
||||||
:training-data train-data
|
:training-data train-data
|
||||||
:testing-data test-data
|
:testing-data test-data
|
||||||
|
:case-t-size (count train-data)
|
||||||
|
:ds-parent-rate 0
|
||||||
|
:ds-parent-gens 1
|
||||||
:max-generations 300
|
:max-generations 300
|
||||||
:population-size 1000
|
:population-size 1000
|
||||||
:max-initial-plushy-size 250
|
:max-initial-plushy-size 250
|
||||||
|
108
src/propeller/problems/PSB2/find_pair.cljc
Normal file
108
src/propeller/problems/PSB2/find_pair.cljc
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
(ns propeller.problems.PSB2.find-pair
|
||||||
|
(:require [psb2.core :as psb2]
|
||||||
|
[propeller.genome :as genome]
|
||||||
|
[propeller.push.interpreter :as interpreter]
|
||||||
|
[propeller.problems.data-creation :as dc]
|
||||||
|
[propeller.utils :as utils]
|
||||||
|
[propeller.push.instructions :refer [def-instruction get-stack-instructions]]
|
||||||
|
[propeller.push.state :as state]
|
||||||
|
[propeller.tools.math :as math]
|
||||||
|
[propeller.gp :as gp]
|
||||||
|
#?(:cljs [cljs.reader :refer [read-string]])))
|
||||||
|
|
||||||
|
(def train-and-test-data (psb2/fetch-examples "data" "find-pair" 200 2000))
|
||||||
|
(def train-data (:train train-and-test-data))
|
||||||
|
(def test-data (:test train-and-test-data))
|
||||||
|
|
||||||
|
(defn random-int [] (- (rand-int 201) 100))
|
||||||
|
|
||||||
|
(defn map-vals-input
|
||||||
|
"Returns all the input values of a map"
|
||||||
|
[i]
|
||||||
|
(vals (select-keys i [:input1 :input2])))
|
||||||
|
|
||||||
|
(defn map-vals-output
|
||||||
|
"Returns the output values of a map"
|
||||||
|
[i]
|
||||||
|
(vals (select-keys i [:output1 :output2])))
|
||||||
|
|
||||||
|
(def-instruction :output-one
|
||||||
|
^{:stacks #{:integer :output}}
|
||||||
|
(fn [state]
|
||||||
|
(if (empty? (:integer state))
|
||||||
|
state
|
||||||
|
(let [top-int (state/peek-stack state :integer)]
|
||||||
|
(assoc-in state [:output :out1] top-int)))))
|
||||||
|
|
||||||
|
(def-instruction :output-two
|
||||||
|
^{:stacks #{:integer :output}}
|
||||||
|
(fn [state]
|
||||||
|
(if (empty? (:integer state))
|
||||||
|
state
|
||||||
|
(let [top-int (state/peek-stack state :integer)]
|
||||||
|
(assoc-in state [:output :out2] top-int)))))
|
||||||
|
|
||||||
|
(def instructions
|
||||||
|
(utils/not-lazy
|
||||||
|
(concat
|
||||||
|
;;; stack-specific instructions
|
||||||
|
(get-stack-instructions #{:exec :integer :vector_integer :boolean})
|
||||||
|
(list :output-one :output-two)
|
||||||
|
;;; input instructions
|
||||||
|
(list :in1 :in2)
|
||||||
|
;;; close
|
||||||
|
(list 'close)
|
||||||
|
;;; ERCs (constants)
|
||||||
|
(list -1 0 1 2 random-int))))
|
||||||
|
|
||||||
|
(defn error-function
|
||||||
|
[argmap data individual]
|
||||||
|
(let [program (genome/plushy->push (:plushy individual) argmap)
|
||||||
|
inputs (map (fn [i] (map-vals-input i)) data)
|
||||||
|
correct-outputs (map (fn [i] (map-vals-output i)) data)
|
||||||
|
outputs (map (fn [input]
|
||||||
|
(:output
|
||||||
|
(interpreter/interpret-program
|
||||||
|
program
|
||||||
|
(assoc state/empty-state :input {:in1 (nth input 0)
|
||||||
|
:in2 (nth input 1)})
|
||||||
|
(:step-limit argmap))))
|
||||||
|
inputs)
|
||||||
|
outputs-1 (map #(:out1 %) outputs)
|
||||||
|
outputs-2 (map #(:out2 %) outputs)
|
||||||
|
;_ (prn {:o1 outputs-1 :o2 outputs-2})
|
||||||
|
errors (map (fn [correct-output output-1 output-2]
|
||||||
|
(if (not (and (number? output-2) (number? output-1)))
|
||||||
|
100000
|
||||||
|
(+ (math/abs (- (first correct-output) output-1))
|
||||||
|
(math/abs (- (second correct-output) output-2)))))
|
||||||
|
correct-outputs outputs-1 outputs-2)]
|
||||||
|
(assoc individual
|
||||||
|
:behavior outputs
|
||||||
|
:errors errors
|
||||||
|
:total-error #?(:clj (apply +' errors)
|
||||||
|
:cljs (apply + errors)))))
|
||||||
|
|
||||||
|
(defn -main
|
||||||
|
"Runs propel-gp, giving it a map of arguments."
|
||||||
|
[& args]
|
||||||
|
(gp/gp
|
||||||
|
(merge
|
||||||
|
{:instructions instructions
|
||||||
|
:error-function error-function
|
||||||
|
:training-data train-data
|
||||||
|
:testing-data test-data
|
||||||
|
:case-t-size (count train-data)
|
||||||
|
:ds-parent-rate 0
|
||||||
|
:ds-parent-gens 1
|
||||||
|
:max-generations 300
|
||||||
|
:population-size 1000
|
||||||
|
:max-initial-plushy-size 250
|
||||||
|
:step-limit 2000
|
||||||
|
:parent-selection :lexicase
|
||||||
|
:tournament-size 5
|
||||||
|
:umad-rate 0.1
|
||||||
|
:variation {:umad 1.0 :crossover 0.0}
|
||||||
|
:elitism false}
|
||||||
|
(apply hash-map (map #(if (string? %) (read-string %) %) args))))
|
||||||
|
(#?(:clj shutdown-agents)))
|
@ -10,6 +10,7 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
|
|||||||
(:require [psb2.core :as psb2]
|
(:require [psb2.core :as psb2]
|
||||||
[propeller.genome :as genome]
|
[propeller.genome :as genome]
|
||||||
[propeller.push.interpreter :as interpreter]
|
[propeller.push.interpreter :as interpreter]
|
||||||
|
[propeller.problems.data-creation :as dc]
|
||||||
[propeller.utils :as utils]
|
[propeller.utils :as utils]
|
||||||
[propeller.push.instructions :refer [get-stack-instructions]]
|
[propeller.push.instructions :refer [get-stack-instructions]]
|
||||||
[propeller.push.state :as state]
|
[propeller.push.state :as state]
|
||||||
@ -17,7 +18,21 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
|
|||||||
[propeller.gp :as gp]
|
[propeller.gp :as gp]
|
||||||
#?(:cljs [cljs.reader :refer [read-string]])))
|
#?(:cljs [cljs.reader :refer [read-string]])))
|
||||||
|
|
||||||
|
; =========== PROBLEM DESCRIPTION =========================
|
||||||
|
; FIZZ BUZZ from PSB2
|
||||||
|
; Given an integer x, return "Fizz" if x is
|
||||||
|
; divisible by 3, "Buzz" if x is divisible by 5, "FizzBuzz" if x
|
||||||
|
; is divisible by 3 and 5, and a string version of x if none of
|
||||||
|
; the above hold.
|
||||||
|
;
|
||||||
|
; Source: https://arxiv.org/pdf/2106.06086.pdf
|
||||||
|
; ============================================================
|
||||||
|
|
||||||
|
|
||||||
(def train-and-test-data "Data taken from https://zenodo.org/record/5084812" (psb2/fetch-examples "data" "fizz-buzz" 200 2000))
|
(def train-and-test-data "Data taken from https://zenodo.org/record/5084812" (psb2/fetch-examples "data" "fizz-buzz" 200 2000))
|
||||||
|
(def train-data (:train train-and-test-data))
|
||||||
|
(def test-data (:test train-and-test-data))
|
||||||
|
|
||||||
|
|
||||||
(def instructions
|
(def instructions
|
||||||
"Stack-specific instructions, input instructions, close, and constants"
|
"Stack-specific instructions, input instructions, close, and constants"
|
||||||
@ -71,8 +86,11 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
|
|||||||
(merge
|
(merge
|
||||||
{:instructions instructions
|
{:instructions instructions
|
||||||
:error-function error-function
|
:error-function error-function
|
||||||
:training-data (:train train-and-test-data)
|
:training-data train-data
|
||||||
:testing-data (:test train-and-test-data)
|
:testing-data test-data
|
||||||
|
:case-t-size (count train-data)
|
||||||
|
:ds-parent-rate 0
|
||||||
|
:ds-parent-gens 1
|
||||||
:max-generations 300
|
:max-generations 300
|
||||||
:population-size 1000
|
:population-size 1000
|
||||||
:max-initial-plushy-size 250
|
:max-initial-plushy-size 250
|
||||||
|
@ -11,6 +11,7 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
|
|||||||
(:require [psb2.core :as psb2]
|
(:require [psb2.core :as psb2]
|
||||||
[propeller.genome :as genome]
|
[propeller.genome :as genome]
|
||||||
[propeller.push.interpreter :as interpreter]
|
[propeller.push.interpreter :as interpreter]
|
||||||
|
[propeller.problems.data-creation :as dc]
|
||||||
[propeller.utils :as utils]
|
[propeller.utils :as utils]
|
||||||
[propeller.push.instructions :refer [get-stack-instructions]]
|
[propeller.push.instructions :refer [get-stack-instructions]]
|
||||||
[propeller.push.state :as state]
|
[propeller.push.state :as state]
|
||||||
@ -18,7 +19,19 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
|
|||||||
[propeller.gp :as gp]
|
[propeller.gp :as gp]
|
||||||
#?(:cljs [cljs.reader :refer [read-string]])))
|
#?(:cljs [cljs.reader :refer [read-string]])))
|
||||||
|
|
||||||
|
; =========== PROBLEM DESCRIPTION =========================
|
||||||
|
; FUEL COST from PSB2
|
||||||
|
; Given a vector of positive integers, divide
|
||||||
|
; each by 3, round the result down to the nearest integer, and
|
||||||
|
; subtract 2. Return the sum of all of the new integers in the
|
||||||
|
; vector
|
||||||
|
;
|
||||||
|
; Source: https://arxiv.org/pdf/2106.06086.pdf
|
||||||
|
; ============================================================
|
||||||
|
|
||||||
(def train-and-test-data "Data taken from https://zenodo.org/record/5084812" (psb2/fetch-examples "data" "fuel-cost" 200 2000))
|
(def train-and-test-data "Data taken from https://zenodo.org/record/5084812" (psb2/fetch-examples "data" "fuel-cost" 200 2000))
|
||||||
|
(def train-data (:train train-and-test-data))
|
||||||
|
(def test-data (:test train-and-test-data))
|
||||||
|
|
||||||
; Random integer between -100 and 100 (from smallest)
|
; Random integer between -100 and 100 (from smallest)
|
||||||
(defn random-int "Random integer between -100 and 100" [] (- (rand-int 201) 100))
|
(defn random-int "Random integer between -100 and 100" [] (- (rand-int 201) 100))
|
||||||
@ -26,15 +39,15 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
|
|||||||
(def instructions
|
(def instructions
|
||||||
"Stack-specific instructions, input instructions, close, and constants"
|
"Stack-specific instructions, input instructions, close, and constants"
|
||||||
(utils/not-lazy
|
(utils/not-lazy
|
||||||
(concat
|
(concat
|
||||||
;;; stack-specific instructions
|
;;; stack-specific instructions
|
||||||
(get-stack-instructions #{:exec :integer :boolean :vector_integer :print})
|
(get-stack-instructions #{:exec :integer :boolean :vector_integer :print})
|
||||||
;;; input instructions
|
;;; input instructions
|
||||||
(list :in1)
|
(list :in1)
|
||||||
;;; close
|
;;; close
|
||||||
(list 'close)
|
(list 'close)
|
||||||
;;; ERCs (constants)
|
;;; ERCs (constants)
|
||||||
(list random-int 0 1 2 3))))
|
(list random-int 0 1 2 3))))
|
||||||
|
|
||||||
(defn error-function
|
(defn error-function
|
||||||
"Finds the behaviors and errors of an individual: Error is 0 if the value and
|
"Finds the behaviors and errors of an individual: Error is 0 if the value and
|
||||||
@ -47,11 +60,11 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
|
|||||||
correct-outputs (map (fn [i] (get i :output1)) data)
|
correct-outputs (map (fn [i] (get i :output1)) data)
|
||||||
outputs (map (fn [input]
|
outputs (map (fn [input]
|
||||||
(state/peek-stack
|
(state/peek-stack
|
||||||
(interpreter/interpret-program
|
(interpreter/interpret-program
|
||||||
program
|
program
|
||||||
(assoc state/empty-state :input {:in1 input})
|
(assoc state/empty-state :input {:in1 input})
|
||||||
(:step-limit argmap))
|
(:step-limit argmap))
|
||||||
:integer))
|
:integer))
|
||||||
inputs)
|
inputs)
|
||||||
errors (map (fn [correct-output output]
|
errors (map (fn [correct-output output]
|
||||||
(if (= output :no-stack-item)
|
(if (= output :no-stack-item)
|
||||||
@ -60,10 +73,10 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
|
|||||||
correct-outputs
|
correct-outputs
|
||||||
outputs)]
|
outputs)]
|
||||||
(assoc individual
|
(assoc individual
|
||||||
:behaviors outputs
|
:behaviors outputs
|
||||||
:errors errors
|
:errors errors
|
||||||
:total-error #?(:clj (apply +' errors)
|
:total-error #?(:clj (apply +' errors)
|
||||||
:cljs (apply + errors)))))
|
:cljs (apply + errors)))))
|
||||||
|
|
||||||
(defn -main
|
(defn -main
|
||||||
"Runs the top-level genetic programming function, giving it a map of
|
"Runs the top-level genetic programming function, giving it a map of
|
||||||
@ -71,18 +84,21 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
|
|||||||
or through a passed map."
|
or through a passed map."
|
||||||
[& args]
|
[& args]
|
||||||
(gp/gp
|
(gp/gp
|
||||||
(merge
|
(merge
|
||||||
{:instructions instructions
|
{:instructions instructions
|
||||||
:error-function error-function
|
:error-function error-function
|
||||||
:training-data (:train train-and-test-data)
|
:training-data train-data
|
||||||
:testing-data (:test train-and-test-data)
|
:testing-data test-data
|
||||||
:max-generations 300
|
:case-t-size (count train-data)
|
||||||
:population-size 1000
|
:ds-parent-rate 0
|
||||||
:max-initial-plushy-size 250
|
:ds-parent-gens 1
|
||||||
:step-limit 2000
|
:max-generations 300
|
||||||
:parent-selection :lexicase
|
:population-size 1000
|
||||||
:tournament-size 5
|
:max-initial-plushy-size 250
|
||||||
:umad-rate 0.1
|
:step-limit 2000
|
||||||
:variation {:umad 1.0 :crossover 0.0}
|
:parent-selection :lexicase
|
||||||
:elitism false}
|
:tournament-size 5
|
||||||
(apply hash-map (map #(if (string? %) (read-string %) %) args)))))
|
:umad-rate 0.1
|
||||||
|
:variation {:umad 1.0 :crossover 0.0}
|
||||||
|
:elitism false}
|
||||||
|
(apply hash-map (map #(if (string? %) (read-string %) %) args)))))
|
||||||
|
@ -9,6 +9,7 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
|
|||||||
(:require [psb2.core :as psb2]
|
(:require [psb2.core :as psb2]
|
||||||
[propeller.genome :as genome]
|
[propeller.genome :as genome]
|
||||||
[propeller.push.interpreter :as interpreter]
|
[propeller.push.interpreter :as interpreter]
|
||||||
|
[propeller.problems.data-creation :as dc]
|
||||||
[propeller.utils :as utils]
|
[propeller.utils :as utils]
|
||||||
[propeller.push.instructions :refer [get-stack-instructions]]
|
[propeller.push.instructions :refer [get-stack-instructions]]
|
||||||
[propeller.push.state :as state]
|
[propeller.push.state :as state]
|
||||||
@ -18,6 +19,11 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
|
|||||||
|
|
||||||
(def train-and-test-data "Data taken from https://zenodo.org/record/5084812" (psb2/fetch-examples "data" "gcd" 200 2000))
|
(def train-and-test-data "Data taken from https://zenodo.org/record/5084812" (psb2/fetch-examples "data" "gcd" 200 2000))
|
||||||
|
|
||||||
|
|
||||||
|
(def train-and-test-data (psb2/fetch-examples "data" "gcd" 200 2000))
|
||||||
|
(def train-data (:train train-and-test-data))
|
||||||
|
(def test-data (:test train-and-test-data))
|
||||||
|
|
||||||
(defn random-int "Random integer between -100 and 100" [] (- (rand-int 201) 100))
|
(defn random-int "Random integer between -100 and 100" [] (- (rand-int 201) 100))
|
||||||
|
|
||||||
(defn map-vals-input
|
(defn map-vals-input
|
||||||
@ -82,8 +88,11 @@ Source: https://arxiv.org/pdf/2106.06086.pdf"
|
|||||||
(merge
|
(merge
|
||||||
{:instructions instructions
|
{:instructions instructions
|
||||||
:error-function error-function
|
:error-function error-function
|
||||||
:training-data (:train train-and-test-data)
|
:training-data train-data
|
||||||
:testing-data (:test train-and-test-data)
|
:testing-data test-data
|
||||||
|
:case-t-size (count train-data)
|
||||||
|
:ds-parent-rate 0
|
||||||
|
:ds-parent-gens 1
|
||||||
:max-generations 300
|
:max-generations 300
|
||||||
:population-size 1000
|
:population-size 1000
|
||||||
:max-initial-plushy-size 250
|
:max-initial-plushy-size 250
|
||||||
|
128
src/propeller/problems/UBall5D.cljc
Normal file
128
src/propeller/problems/UBall5D.cljc
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
(ns propeller.problems.UBall5D
|
||||||
|
(:require [propeller.genome :as genome]
|
||||||
|
[propeller.push.interpreter :as interpreter]
|
||||||
|
[propeller.push.state :as state]
|
||||||
|
[propeller.tools.math :as math]
|
||||||
|
[propeller.gp :as gp]
|
||||||
|
#?(:cljs [cljs.reader :refer [read-string]])))
|
||||||
|
|
||||||
|
(defn- target-function
|
||||||
|
"Target function: f(x) = 10/(5 + SUM_i=1^5 (x_i - 3)^2)"
|
||||||
|
[x1 x2 x3 x4 x5]
|
||||||
|
(->> (list x1 x2 x3 x4 x5)
|
||||||
|
(map #(* (- % 3) (- % 3)))
|
||||||
|
(apply +)
|
||||||
|
(+ 5)
|
||||||
|
(/ 10)))
|
||||||
|
|
||||||
|
(target-function 3 7 3 3 3)
|
||||||
|
|
||||||
|
(defn map-vals-input
|
||||||
|
"Returns all the input values of a map"
|
||||||
|
[i]
|
||||||
|
(vals (select-keys i [:input1 :input2 :input3 :input4 :input5])))
|
||||||
|
|
||||||
|
|
||||||
|
(defn get-random-input
|
||||||
|
"returns a random input between two ranges"
|
||||||
|
[a b]
|
||||||
|
(->> (rand)
|
||||||
|
(* (- b a))
|
||||||
|
(+ a)
|
||||||
|
(float)))
|
||||||
|
|
||||||
|
(def train-data
|
||||||
|
(repeatedly 1024 (fn [] (repeatedly 5 #(get-random-input 0.05 6.05)))))
|
||||||
|
|
||||||
|
(def test-data
|
||||||
|
(repeatedly 5000 (fn [] (repeatedly 5 #(get-random-input -0.25 6.35)))))
|
||||||
|
|
||||||
|
(def train-and-test-data
|
||||||
|
(let [train-inputs train-data
|
||||||
|
test-inputs test-data]
|
||||||
|
{:train (map (fn [x] {:input1 (first x)
|
||||||
|
:input2 (nth x 1)
|
||||||
|
:input3 (nth x 2)
|
||||||
|
:input4 (nth x 3)
|
||||||
|
:input5 (nth x 4)
|
||||||
|
:output1 (apply target-function (map #(nth x %) (range 5)))}) train-inputs)
|
||||||
|
:test (map (fn [x] {:input1 (first x)
|
||||||
|
:input2 (nth x 1)
|
||||||
|
:input3 (nth x 2)
|
||||||
|
:input4 (nth x 3)
|
||||||
|
:input5 (nth x 4)
|
||||||
|
:output1 (apply target-function (map #(nth x %) (range 5)))}) test-inputs)}))
|
||||||
|
|
||||||
|
(def instructions
|
||||||
|
(list :in1
|
||||||
|
:in2
|
||||||
|
:in3
|
||||||
|
:in4
|
||||||
|
:in5
|
||||||
|
:float_add
|
||||||
|
:float_subtract
|
||||||
|
:float_mult
|
||||||
|
:float_quot
|
||||||
|
:float_eq
|
||||||
|
:exec_dup
|
||||||
|
:exec_if
|
||||||
|
'close
|
||||||
|
0.0
|
||||||
|
1.0))
|
||||||
|
|
||||||
|
(def data (:train train-and-test-data))
|
||||||
|
|
||||||
|
(defn error-function
|
||||||
|
"Finds the behaviors and errors of an individual. The error is the absolute
|
||||||
|
deviation between the target output value and the program's selected behavior,
|
||||||
|
or 1000000 if no behavior is produced. The behavior is here defined as the
|
||||||
|
final top item on the FLOAT stack."
|
||||||
|
([argmap data individual]
|
||||||
|
(let [program (genome/plushy->push (:plushy individual) argmap)
|
||||||
|
inputs (map (fn [i] (map-vals-input i)) data)
|
||||||
|
correct-outputs (map (fn [x] (:output1 x)) data)
|
||||||
|
outputs (map (fn [input]
|
||||||
|
(state/peek-stack
|
||||||
|
(interpreter/interpret-program
|
||||||
|
program
|
||||||
|
(assoc state/empty-state :input {:in1 (nth input 0)
|
||||||
|
:in2 (nth input 1)
|
||||||
|
:in3 (nth input 2)
|
||||||
|
:in4 (nth input 3)
|
||||||
|
:in5 (nth input 4)})
|
||||||
|
(:step-limit argmap))
|
||||||
|
:float))
|
||||||
|
inputs)
|
||||||
|
errors (map (fn [correct-output output]
|
||||||
|
(if (= output :no-stack-item)
|
||||||
|
1000000
|
||||||
|
(math/abs (- correct-output output))))
|
||||||
|
correct-outputs
|
||||||
|
outputs)]
|
||||||
|
(assoc individual
|
||||||
|
:behaviors outputs
|
||||||
|
:errors errors
|
||||||
|
:total-error #?(:clj (apply +' errors)
|
||||||
|
:cljs (apply + errors))))))
|
||||||
|
|
||||||
|
(defn -main
|
||||||
|
"Runs propel-gp, giving it a map of arguments."
|
||||||
|
[& args]
|
||||||
|
(gp/gp
|
||||||
|
(merge
|
||||||
|
{:instructions instructions
|
||||||
|
:error-function error-function
|
||||||
|
:training-data (:train train-and-test-data)
|
||||||
|
:testing-data (:test train-and-test-data)
|
||||||
|
:max-generations 300
|
||||||
|
:population-size 1000
|
||||||
|
:max-initial-plushy-size 100
|
||||||
|
:step-limit 200
|
||||||
|
:parent-selection :lexicase
|
||||||
|
:tournament-size 5
|
||||||
|
:umad-rate 0.1
|
||||||
|
:solution-error-threshold 0.1
|
||||||
|
:variation {:umad 1.0 :crossover 0.0}
|
||||||
|
:elitism false}
|
||||||
|
(apply hash-map (map #(if (string? %) (read-string %) %) args))))
|
||||||
|
(#?(:clj shutdown-agents)))
|
@ -75,7 +75,11 @@
|
|||||||
:population-size 500
|
:population-size 500
|
||||||
:max-initial-plushy-size 100
|
:max-initial-plushy-size 100
|
||||||
:step-limit 200
|
:step-limit 200
|
||||||
:parent-selection :lexicase
|
:parent-selection :ds-lexicase
|
||||||
|
:ds-function :case-maxmin
|
||||||
|
:downsample-rate 0.1
|
||||||
|
:case-t-size 20
|
||||||
|
:tournament-size 5
|
||||||
:umad-rate 0.1
|
:umad-rate 0.1
|
||||||
:variation {:umad 1.0 :crossover 0.0}
|
:variation {:umad 1.0 :crossover 0.0}
|
||||||
:elitism false}
|
:elitism false}
|
||||||
|
103
src/propeller/problems/data_creation.cljc
Normal file
103
src/propeller/problems/data_creation.cljc
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
(ns propeller.problems.data-creation
|
||||||
|
(:require [psb2.core :as psb2]
|
||||||
|
[clojure.data.csv :as csv]
|
||||||
|
[clojure.java.io :as io]
|
||||||
|
[clojure.string :as s]))
|
||||||
|
|
||||||
|
(defn generate-data [problem train-or-test]
|
||||||
|
(let [train-and-test-data (psb2/fetch-examples "data" problem 200 1000)
|
||||||
|
cleaned-data (cons (vector "input1" "output1") (map #(vector (:input1 %) (:output1 %)) ((keyword train-or-test) train-and-test-data)))]
|
||||||
|
(prn cleaned-data)
|
||||||
|
(with-open [writer (io/writer (str problem "-" train-or-test ".csv"))]
|
||||||
|
(csv/write-csv writer
|
||||||
|
(doall cleaned-data)))))
|
||||||
|
|
||||||
|
(defn generate-data-gcd [train-or-test]
|
||||||
|
(let [train-and-test-data (psb2/fetch-examples "data" "gcd" 200 1000)
|
||||||
|
cleaned-data (cons (vector "input1" "input2" "output1") (map #(vector (:input1 %) (:input2 %) (:output1 %)) ((keyword train-or-test) train-and-test-data)))]
|
||||||
|
(prn cleaned-data)
|
||||||
|
(with-open [writer (io/writer (str "gcd-" train-or-test ".csv"))]
|
||||||
|
(csv/write-csv writer
|
||||||
|
(doall cleaned-data)))))
|
||||||
|
|
||||||
|
(defn generate-data-find-pair [train-or-test]
|
||||||
|
(let [train-and-test-data (psb2/fetch-examples "data" "find-pair" 200 1000)
|
||||||
|
cleaned-data (cons (vector "input1" "input2" "output1" "output2") (map #(vector (:input1 %) (:input2 %) (:output1 %) (:output2 %)) ((keyword train-or-test) train-and-test-data)))]
|
||||||
|
(prn cleaned-data)
|
||||||
|
(with-open [writer (io/writer (str "find-pair-" train-or-test ".csv"))]
|
||||||
|
(csv/write-csv writer
|
||||||
|
(doall cleaned-data)))))
|
||||||
|
|
||||||
|
(defn generate-data-for-problem [problem]
|
||||||
|
(map (partial generate-data problem) '["test" "train"]))
|
||||||
|
|
||||||
|
(defn generate-data-for-all-problems []
|
||||||
|
(map (partial generate-data-for-problem) '["gcd"
|
||||||
|
"find-pair"]))
|
||||||
|
|
||||||
|
|
||||||
|
;--------PSB1
|
||||||
|
|
||||||
|
(defn read-data [problem qual]
|
||||||
|
(with-open [reader (io/reader (str "src/propeller/problems/PSB1/" problem "-" qual ".csv"))]
|
||||||
|
(doall
|
||||||
|
(csv/read-csv reader))))
|
||||||
|
|
||||||
|
(defn edge-cases-for-problem [problem]
|
||||||
|
(read-data problem "edge"))
|
||||||
|
|
||||||
|
(defn training-cases-for-problem [shuffled-data problem]
|
||||||
|
(let [edge-cases (edge-cases-for-problem problem)
|
||||||
|
left (- 201 (count edge-cases)) ;because labels is first
|
||||||
|
random-cases (take left shuffled-data)]
|
||||||
|
(concat edge-cases random-cases)))
|
||||||
|
|
||||||
|
(defn testing-cases-for-problem [shuffled-data problem]
|
||||||
|
(take 1000 (drop 500 shuffled-data)))
|
||||||
|
|
||||||
|
(defn save-train-test-data [shuffled-data problem train-or-test]
|
||||||
|
(with-open [writer (io/writer (str problem "-" train-or-test ".csv"))]
|
||||||
|
(csv/write-csv writer
|
||||||
|
(if (= train-or-test "train")
|
||||||
|
(training-cases-for-problem shuffled-data problem)
|
||||||
|
(testing-cases-for-problem shuffled-data problem)))))
|
||||||
|
|
||||||
|
(defn save-data-for-problem [problem]
|
||||||
|
(let [shuffled-data (shuffle (rest (read-data problem "random")))]
|
||||||
|
(map (partial save-train-test-data shuffled-data problem) '["test" "train"])))
|
||||||
|
|
||||||
|
(defn save-data-for-all-problems []
|
||||||
|
(map (partial save-data-for-problem) '["small-or-large"
|
||||||
|
"scrabble-score"
|
||||||
|
"grade"
|
||||||
|
"count-odds"]))
|
||||||
|
|
||||||
|
(defn read-string-and-convert [elem]
|
||||||
|
(if (= elem "")
|
||||||
|
""
|
||||||
|
(let [before (read-string elem)]
|
||||||
|
(if (symbol? before)
|
||||||
|
elem
|
||||||
|
before))))
|
||||||
|
|
||||||
|
|
||||||
|
(defn read-data-formatted [problem train-or-test]
|
||||||
|
(apply list (with-open [reader (io/reader (str "picked/" problem "-" train-or-test ".csv"))]
|
||||||
|
(let [csv-data (csv/read-csv reader)]
|
||||||
|
(mapv zipmap
|
||||||
|
(->> (first csv-data) ;; First row is the header
|
||||||
|
(map keyword) ;; Drop if you want string keys instead
|
||||||
|
repeat)
|
||||||
|
(map (fn [elem] (map #(read-string-and-convert %) elem)) (rest csv-data)))))))
|
||||||
|
|
||||||
|
|
||||||
|
;scrabble-score doesn't play nice with read-string, hacky solution below
|
||||||
|
(defn scrabble-score-read-data-formatted [problem train-or-test]
|
||||||
|
(apply list (with-open [reader (io/reader (str "picked/" problem "-" train-or-test ".csv"))]
|
||||||
|
(let [csv-data (csv/read-csv reader)]
|
||||||
|
(prn {:csv-data csv-data})
|
||||||
|
(mapv zipmap
|
||||||
|
(->> (first csv-data) ;; First row is the header
|
||||||
|
(map keyword) ;; Drop if you want string keys instead
|
||||||
|
repeat)
|
||||||
|
(map (fn [elem] (list (s/replace (first elem) "\\n" "\n") (read-string (second elem)))) (rest csv-data)))))))
|
@ -23,6 +23,10 @@
|
|||||||
:float_subtract
|
:float_subtract
|
||||||
:float_mult
|
:float_mult
|
||||||
:float_quot
|
:float_quot
|
||||||
|
:float_eq
|
||||||
|
:exec_dup
|
||||||
|
:exec_if
|
||||||
|
'close
|
||||||
0.0
|
0.0
|
||||||
1.0))
|
1.0))
|
||||||
|
|
||||||
|
@ -12,12 +12,15 @@
|
|||||||
(+ (* x x x) (* 2 x x) x 3))
|
(+ (* x x x) (* 2 x x) x 3))
|
||||||
|
|
||||||
(def train-and-test-data
|
(def train-and-test-data
|
||||||
|
"Training data: Inputs and outputs with -10 <= x < 11
|
||||||
|
Test data: Inputs and outputs of -20 <= x < -10 and 11 <= x < 21"
|
||||||
(let [train-inputs (range -10 11)
|
(let [train-inputs (range -10 11)
|
||||||
test-inputs (concat (range -20 -10) (range 11 21))]
|
test-inputs (concat (range -20 -10) (range 11 21))]
|
||||||
{:train (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) train-inputs)
|
{:train (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) train-inputs)
|
||||||
:test (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) test-inputs)}))
|
:test (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) test-inputs)}))
|
||||||
|
|
||||||
(def instructions
|
(def instructions
|
||||||
|
"stack-specific instructions, input instructions, close, and constants"
|
||||||
(list :in1
|
(list :in1
|
||||||
:integer_add
|
:integer_add
|
||||||
:integer_subtract
|
:integer_subtract
|
||||||
|
@ -87,11 +87,17 @@
|
|||||||
:error-function error-function
|
:error-function error-function
|
||||||
:training-data (:train train-and-test-data)
|
:training-data (:train train-and-test-data)
|
||||||
:testing-data (:test train-and-test-data)
|
:testing-data (:test train-and-test-data)
|
||||||
|
:case-t-size (count (:train train-and-test-data))
|
||||||
|
:ds-parent-rate 0
|
||||||
|
:ds-parent-gens 1
|
||||||
|
:ds-function :case-rand
|
||||||
:max-generations 500
|
:max-generations 500
|
||||||
:population-size 500
|
:population-size 500
|
||||||
:max-initial-plushy-size 100
|
:max-initial-plushy-size 100
|
||||||
:step-limit 200
|
:step-limit 200
|
||||||
:parent-selection :lexicase
|
:parent-selection :lexicase
|
||||||
|
:downsample? false
|
||||||
|
:tournament-size 5
|
||||||
:umad-rate 0.1
|
:umad-rate 0.1
|
||||||
:variation {:umad 1.0 :crossover 0.0}
|
:variation {:umad 1.0 :crossover 0.0}
|
||||||
:elitism false}
|
:elitism false}
|
||||||
|
@ -1,90 +0,0 @@
|
|||||||
(ns propeller.problems.simple-regression
|
|
||||||
"Simple Regression:
|
|
||||||
|
|
||||||
Given inputs and outputs, find the target function."
|
|
||||||
{:doc/format :markdown}
|
|
||||||
(:require [propeller.genome :as genome]
|
|
||||||
[propeller.push.interpreter :as interpreter]
|
|
||||||
[propeller.push.state :as state]
|
|
||||||
[propeller.tools.math :as math]
|
|
||||||
[propeller.gp :as gp]
|
|
||||||
#?(:cljs [cljs.reader :refer [read-string]])))
|
|
||||||
|
|
||||||
(defn- target-function
|
|
||||||
"Target function: f(x) = x^3 + x + 3"
|
|
||||||
[x]
|
|
||||||
(+ (* x x x) x 3))
|
|
||||||
|
|
||||||
(def train-and-test-data
|
|
||||||
"Training data: Inputs and outputs with -10 <= x < 11
|
|
||||||
|
|
||||||
Test data: Inputs and outputs of -20 <= x < -10 and 11 <= x < 21"
|
|
||||||
(let [train-inputs (range -10 11)
|
|
||||||
test-inputs (concat (range -20 -10) (range 11 21))]
|
|
||||||
{:train (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) train-inputs)
|
|
||||||
:test (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) test-inputs)}))
|
|
||||||
|
|
||||||
(def instructions
|
|
||||||
"stack-specific instructions, input instructions, close, and constants"
|
|
||||||
(list :in1
|
|
||||||
:integer_add
|
|
||||||
:integer_subtract
|
|
||||||
:integer_mult
|
|
||||||
:integer_quot
|
|
||||||
:integer_eq
|
|
||||||
:exec_dup
|
|
||||||
:exec_if
|
|
||||||
'close
|
|
||||||
0
|
|
||||||
1))
|
|
||||||
|
|
||||||
(defn error-function
|
|
||||||
"Finds the behaviors and errors of an individual. The error is the absolute
|
|
||||||
deviation between the target output value and the program's selected behavior,
|
|
||||||
or 1000000 if no behavior is produced. The behavior is here defined as the
|
|
||||||
final top item on the INTEGER stack."
|
|
||||||
([argmap data individual]
|
|
||||||
(let [program (genome/plushy->push (:plushy individual) argmap)
|
|
||||||
inputs (map (fn [x] (first (:input1 x))) data)
|
|
||||||
correct-outputs (map (fn [x] (first (:output1 x))) data)
|
|
||||||
outputs (map (fn [input]
|
|
||||||
(state/peek-stack
|
|
||||||
(interpreter/interpret-program
|
|
||||||
program
|
|
||||||
(assoc state/empty-state :input {:in1 input})
|
|
||||||
(:step-limit argmap))
|
|
||||||
:integer))
|
|
||||||
inputs)
|
|
||||||
errors (map (fn [correct-output output]
|
|
||||||
(if (= output :no-stack-item)
|
|
||||||
1000000
|
|
||||||
(math/abs (- correct-output output))))
|
|
||||||
correct-outputs
|
|
||||||
outputs)]
|
|
||||||
(assoc individual
|
|
||||||
:behaviors outputs
|
|
||||||
:errors errors
|
|
||||||
:total-error #?(:clj (apply +' errors)
|
|
||||||
:cljs (apply + errors))))))
|
|
||||||
|
|
||||||
(defn -main
|
|
||||||
"Runs the top-level genetic programming function, giving it a map of
|
|
||||||
arguments with defaults that can be overridden from the command line
|
|
||||||
or through a passed map."
|
|
||||||
[& args]
|
|
||||||
(gp/gp
|
|
||||||
(merge
|
|
||||||
{:instructions instructions
|
|
||||||
:error-function error-function
|
|
||||||
:training-data (:train train-and-test-data)
|
|
||||||
:testing-data (:test train-and-test-data)
|
|
||||||
:max-generations 500
|
|
||||||
:population-size 500
|
|
||||||
:max-initial-plushy-size 100
|
|
||||||
:step-limit 200
|
|
||||||
:parent-selection :lexicase
|
|
||||||
:tournament-size 5
|
|
||||||
:umad-rate 0.1
|
|
||||||
:variation {:umad 0.5 :crossover 0.5}
|
|
||||||
:elitism false}
|
|
||||||
(apply hash-map (map #(if (string? %) (read-string %) %) args)))))
|
|
@ -105,6 +105,6 @@ Given a string, return true if it contains A, C, G, and T. Else return false."
|
|||||||
:parent-selection :lexicase
|
:parent-selection :lexicase
|
||||||
:tournament-size 5
|
:tournament-size 5
|
||||||
:umad-rate 0.1
|
:umad-rate 0.1
|
||||||
:variation {:umad 0.5 :crossover 0.5}
|
:variation {:umad 1.0 :crossover 0.0}
|
||||||
:elitism false}
|
:elitism false}
|
||||||
(apply hash-map (map #(if (string? %) (read-string %) %) args)))))
|
(apply hash-map (map #(if (string? %) (read-string %) %) args)))))
|
||||||
|
18
src/propeller/selection.cljc
Executable file → Normal file
18
src/propeller/selection.cljc
Executable file → Normal file
@ -30,6 +30,23 @@
|
|||||||
survivors)
|
survivors)
|
||||||
(rest cases))))))
|
(rest cases))))))
|
||||||
|
|
||||||
|
(defn fitness-proportionate-selection
|
||||||
|
"Selects an individual from the population using a fitness proportionate selection."
|
||||||
|
[pop argmap]
|
||||||
|
(let [pop-fits (->> pop ;convert from error to fitness, where fitness (probability) is (1/ (1+ tot_err))
|
||||||
|
(map #(assoc % :fitness (/ 1 (inc (:total-error %))))))
|
||||||
|
pop-total-fit (->> pop-fits
|
||||||
|
(map :fitness)
|
||||||
|
(reduce +))
|
||||||
|
random-num (* (rand) pop-total-fit)
|
||||||
|
sorted-by-fitness (->> pop-fits
|
||||||
|
(sort-by :fitness)
|
||||||
|
(reverse))]
|
||||||
|
(loop [tot (:fitness (first sorted-by-fitness)) individuals sorted-by-fitness]
|
||||||
|
(if (< random-num tot)
|
||||||
|
(first individuals)
|
||||||
|
(recur (+ tot (:fitness (first (rest individuals)))) (rest individuals))))))
|
||||||
|
|
||||||
(defn motley-batch-lexicase-selection
|
(defn motley-batch-lexicase-selection
|
||||||
"Selects an individual from the population using motley batch lexicase selection.
|
"Selects an individual from the population using motley batch lexicase selection.
|
||||||
Cases are combined in random collections of max size (:max-batch-size argmap)."
|
Cases are combined in random collections of max size (:max-batch-size argmap)."
|
||||||
@ -91,5 +108,6 @@
|
|||||||
(case (:parent-selection argmap)
|
(case (:parent-selection argmap)
|
||||||
:tournament (tournament-selection pop argmap)
|
:tournament (tournament-selection pop argmap)
|
||||||
:lexicase (lexicase-selection pop argmap)
|
:lexicase (lexicase-selection pop argmap)
|
||||||
|
:roulette (fitness-proportionate-selection pop argmap)
|
||||||
:epsilon-lexicase (epsilon-lexicase-selection pop argmap)
|
:epsilon-lexicase (epsilon-lexicase-selection pop argmap)
|
||||||
:motley-batch-lexicase (motley-batch-lexicase-selection pop argmap)))
|
:motley-batch-lexicase (motley-batch-lexicase-selection pop argmap)))
|
||||||
|
@ -26,8 +26,7 @@ The output with verbose adds the following lines to the output:
|
|||||||
(:require [propeller.genome :as genome]
|
(:require [propeller.genome :as genome]
|
||||||
[propeller.push.interpreter :as interpreter]
|
[propeller.push.interpreter :as interpreter]
|
||||||
[propeller.push.state :as state]
|
[propeller.push.state :as state]
|
||||||
[propeller.tools.math :as math]
|
[propeller.tools.math :as math]))
|
||||||
))
|
|
||||||
|
|
||||||
(defn choose-random-k
|
(defn choose-random-k
|
||||||
"Takes k random indices"
|
"Takes k random indices"
|
||||||
|
10
src/propeller/tools/math.cljc
Executable file → Normal file
10
src/propeller/tools/math.cljc
Executable file → Normal file
@ -7,6 +7,11 @@
|
|||||||
(defonce ^{:no-doc true :const true} E #?(:clj Math/E
|
(defonce ^{:no-doc true :const true} E #?(:clj Math/E
|
||||||
:cljs js/Math.PI))
|
:cljs js/Math.PI))
|
||||||
|
|
||||||
|
(defn step
|
||||||
|
"returns 1 if number is nonzero, 0 otherwise"
|
||||||
|
[x]
|
||||||
|
(if (zero? x) 0 1))
|
||||||
|
|
||||||
(defn mean
|
(defn mean
|
||||||
"Returns the mean."
|
"Returns the mean."
|
||||||
[coll]
|
[coll]
|
||||||
@ -132,3 +137,8 @@
|
|||||||
[x]
|
[x]
|
||||||
#?(:clj (Math/tan x)
|
#?(:clj (Math/tan x)
|
||||||
:cljs (js/Math.tan x)))
|
:cljs (js/Math.tan x)))
|
||||||
|
|
||||||
|
(defn transpose
|
||||||
|
"returns a vector containing the transpose of a coll of colls"
|
||||||
|
[x]
|
||||||
|
(apply map vector x))
|
@ -2,11 +2,47 @@
|
|||||||
"Functions to measure things."
|
"Functions to measure things."
|
||||||
(:require [propeller.tools.math :as math]))
|
(:require [propeller.tools.math :as math]))
|
||||||
|
|
||||||
|
(defn argmins
|
||||||
|
"returns the indice(s) of the minimum value of a list. Could be more efficient, probably"
|
||||||
|
[coll]
|
||||||
|
(if (empty? coll) '()
|
||||||
|
(let [m (apply min coll)]
|
||||||
|
(keep-indexed #(when (= m %2) %1) coll))))
|
||||||
|
|
||||||
|
(defn argmax-last
|
||||||
|
"returns the index of the maximum value in a list, tiebreaking last"
|
||||||
|
[coll]
|
||||||
|
(->> coll
|
||||||
|
(map-indexed vector)
|
||||||
|
(apply max-key second)
|
||||||
|
first))
|
||||||
|
|
||||||
|
(defn argmax
|
||||||
|
"returns the index of the maximum value in a list, randomly tiebreaking"
|
||||||
|
[coll]
|
||||||
|
(if (zero? (count coll))
|
||||||
|
:null
|
||||||
|
(->> coll
|
||||||
|
(map-indexed vector)
|
||||||
|
(filter #(= (apply max coll) (second %)))
|
||||||
|
rand-nth
|
||||||
|
first)))
|
||||||
|
|
||||||
(defn mean
|
(defn mean
|
||||||
"Returns the mean of a collection."
|
"Returns the mean of a collection."
|
||||||
[coll]
|
[coll]
|
||||||
(if (empty? coll) 0 (math/div (apply + coll) (count coll))))
|
(if (empty? coll) 0 (math/div (apply + coll) (count coll))))
|
||||||
|
|
||||||
|
(defn mean-of-colls
|
||||||
|
"returns the mean of multiple colls"
|
||||||
|
[coll]
|
||||||
|
(map mean (math/transpose coll)))
|
||||||
|
|
||||||
|
(defn min-of-colls
|
||||||
|
"returns the smallest value of multiple colls"
|
||||||
|
[coll]
|
||||||
|
(map #(apply min %) (math/transpose coll)))
|
||||||
|
|
||||||
(defn median
|
(defn median
|
||||||
"Returns the median of a collection."
|
"Returns the median of a collection."
|
||||||
[coll]
|
[coll]
|
||||||
@ -29,12 +65,12 @@
|
|||||||
"computes the next row using the prev-row current-element and the other seq"
|
"computes the next row using the prev-row current-element and the other seq"
|
||||||
[prev-row current-element other-seq pred]
|
[prev-row current-element other-seq pred]
|
||||||
(reduce
|
(reduce
|
||||||
(fn [row [diagonal above other-element]]
|
(fn [row [diagonal above other-element]]
|
||||||
(let [update-val (if (pred other-element current-element)
|
(let [update-val (if (pred other-element current-element)
|
||||||
;; if the elements are deemed equivalent according to the predicate
|
;; if the elements are deemed equivalent according to the predicate
|
||||||
;; pred, then no change has taken place to the string, so we are
|
;; pred, then no change has taken place to the string, so we are
|
||||||
;; going to set it the same value as diagonal (which is the previous edit-distance)
|
;; going to set it the same value as diagonal (which is the previous edit-distance)
|
||||||
diagonal
|
diagonal
|
||||||
;; in the case where the elements are not considered equivalent, then we are going
|
;; in the case where the elements are not considered equivalent, then we are going
|
||||||
;; to figure out if its a substitution (then there is a change of 1 from the previous
|
;; to figure out if its a substitution (then there is a change of 1 from the previous
|
||||||
;; edit distance) thus the value is diagonal + 1 or if its a deletion, then the value
|
;; edit distance) thus the value is diagonal + 1 or if its a deletion, then the value
|
||||||
@ -42,18 +78,18 @@
|
|||||||
;; of last of row + 1 (since we will be using vectors, peek is more efficient)
|
;; of last of row + 1 (since we will be using vectors, peek is more efficient)
|
||||||
;; or it could be a case of insertion, then the value is above+1, and we chose
|
;; or it could be a case of insertion, then the value is above+1, and we chose
|
||||||
;; the minimum of the three
|
;; the minimum of the three
|
||||||
(inc (min diagonal above (peek row))))]
|
(inc (min diagonal above (peek row))))]
|
||||||
|
|
||||||
(conj row update-val)))
|
(conj row update-val)))
|
||||||
;; we need to initialize the reduce function with the value of a row, since we are
|
;; we need to initialize the reduce function with the value of a row, since we are
|
||||||
;; constructing this row from the previous one, the row is a vector of 1 element which
|
;; constructing this row from the previous one, the row is a vector of 1 element which
|
||||||
;; consists of 1 + the first element in the previous row (edit distance between the prefix so far
|
;; consists of 1 + the first element in the previous row (edit distance between the prefix so far
|
||||||
;; and an empty string)
|
;; and an empty string)
|
||||||
[(inc (first prev-row))]
|
[(inc (first prev-row))]
|
||||||
;; for the reduction to go over, we need to provide it with three values, the diagonal
|
;; for the reduction to go over, we need to provide it with three values, the diagonal
|
||||||
;; which is the same as prev-row because it starts from 0, the above, which is the next element
|
;; which is the same as prev-row because it starts from 0, the above, which is the next element
|
||||||
;; from the list and finally the element from the other sequence itself.
|
;; from the list and finally the element from the other sequence itself.
|
||||||
(map vector prev-row (next prev-row) other-seq)))
|
(map vector prev-row (next prev-row) other-seq)))
|
||||||
|
|
||||||
(defn levenshtein-distance
|
(defn levenshtein-distance
|
||||||
"Levenshtein Distance - http://en.wikipedia.org/wiki/Levenshtein_distance
|
"Levenshtein Distance - http://en.wikipedia.org/wiki/Levenshtein_distance
|
||||||
@ -66,17 +102,17 @@
|
|||||||
(empty? (str a)) (count (str b)) ;; sometimes stack pushes numbers, force
|
(empty? (str a)) (count (str b)) ;; sometimes stack pushes numbers, force
|
||||||
(empty? (str b)) (count (str a)) ;; a and b to be strings
|
(empty? (str b)) (count (str a)) ;; a and b to be strings
|
||||||
:else (peek
|
:else (peek
|
||||||
(reduce
|
(reduce
|
||||||
;; we use a simple reduction to convert the previous row into the
|
;; we use a simple reduction to convert the previous row into the
|
||||||
;; next-row using the compute-next-row which takes a current
|
;; next-row using the compute-next-row which takes a current
|
||||||
;; element, the previous-row computed so far, and the predicate
|
;; element, the previous-row computed so far, and the predicate
|
||||||
;; to compare for equality
|
;; to compare for equality
|
||||||
(fn [prev-row current-element]
|
(fn [prev-row current-element]
|
||||||
(compute-next-row prev-row current-element (str b) p))
|
(compute-next-row prev-row current-element (str b) p))
|
||||||
;; we need to initialize the prev-row with the edit distance
|
;; we need to initialize the prev-row with the edit distance
|
||||||
;; between the various prefixes of b and the empty string
|
;; between the various prefixes of b and the empty string
|
||||||
(range (inc (count (str b))))
|
(range (inc (count (str b))))
|
||||||
(str a)))))
|
(str a)))))
|
||||||
|
|
||||||
(defn sequence-similarity
|
(defn sequence-similarity
|
||||||
"Returns a number between 0 and 1, indicating how similar the sequences are
|
"Returns a number between 0 and 1, indicating how similar the sequences are
|
||||||
|
@ -3,6 +3,20 @@
|
|||||||
(:require [clojure.zip :as zip]
|
(:require [clojure.zip :as zip]
|
||||||
[clojure.repl :as repl]))
|
[clojure.repl :as repl]))
|
||||||
|
|
||||||
|
(defn filter-by-index
|
||||||
|
"filters a collection by a list of indices"
|
||||||
|
[coll idxs]
|
||||||
|
;(prn {:func :filter-by-index :coll coll :idxs idxs})
|
||||||
|
(map (partial nth coll) idxs))
|
||||||
|
|
||||||
|
(defn drop-nth
|
||||||
|
"drops the nth element from a collection"
|
||||||
|
[n coll]
|
||||||
|
;(prn {:func :drop-nth :n n :coll coll})
|
||||||
|
(concat
|
||||||
|
(take n coll)
|
||||||
|
(nthrest coll (inc n))))
|
||||||
|
|
||||||
(defn first-non-nil
|
(defn first-non-nil
|
||||||
"Returns the first non-nil values from the collection, or returns `nil` if
|
"Returns the first non-nil values from the collection, or returns `nil` if
|
||||||
the collection is empty or only contains `nil`."
|
the collection is empty or only contains `nil`."
|
||||||
|
4
src/propeller/variation.cljc
Executable file → Normal file
4
src/propeller/variation.cljc
Executable file → Normal file
@ -254,6 +254,8 @@ The function `new-individual` returns a new individual produced by selection and
|
|||||||
"Returns a new individual produced by selection and variation of
|
"Returns a new individual produced by selection and variation of
|
||||||
individuals in the population."
|
individuals in the population."
|
||||||
[pop argmap]
|
[pop argmap]
|
||||||
|
(let [umad-parent (selection/select-parent pop argmap)
|
||||||
|
parent-ind (:index umad-parent)] ;this is a hack to log hyperselection, only works for umad
|
||||||
{:plushy
|
{:plushy
|
||||||
(let [r (rand)
|
(let [r (rand)
|
||||||
op (loop [accum 0.0
|
op (loop [accum 0.0
|
||||||
@ -381,4 +383,4 @@ The function `new-individual` returns a new individual produced by selection and
|
|||||||
:else
|
:else
|
||||||
(throw #?(:clj (Exception. (str "No match in new-individual for " op))
|
(throw #?(:clj (Exception. (str "No match in new-individual for " op))
|
||||||
:cljs (js/Error
|
:cljs (js/Error
|
||||||
(str "No match in new-individual for " op))))))})
|
(str "No match in new-individual for " op))))))}))
|
163
test/propeller/push/downsample_test.cljc
Normal file
163
test/propeller/push/downsample_test.cljc
Normal file
@ -0,0 +1,163 @@
|
|||||||
|
(ns propeller.push.downsample-test
|
||||||
|
(:require [clojure.test :as t]
|
||||||
|
[propeller.utils :as u]
|
||||||
|
[propeller.simplification :as s]
|
||||||
|
[propeller.downsample :as ds]
|
||||||
|
[propeller.hyperselection :as hs]))
|
||||||
|
|
||||||
|
|
||||||
|
(t/deftest assign-indices-to-data-test
|
||||||
|
(t/testing "assign-indices-to-data"
|
||||||
|
(t/testing "should return a map of the same length"
|
||||||
|
(t/is (= (count (ds/assign-indices-to-data (range 10))) 10))
|
||||||
|
(t/is (= (count (ds/assign-indices-to-data (range 0))) 0)))
|
||||||
|
(t/testing "should return a map where each element has an index key"
|
||||||
|
(t/is (every? #(:index %) (ds/assign-indices-to-data (map #(assoc {} :input %) (range 10))))))
|
||||||
|
(t/testing "should return distinct indices"
|
||||||
|
(t/is (= (map #(:index %) (ds/assign-indices-to-data (range 10))) (range 10))))))
|
||||||
|
|
||||||
|
(t/deftest select-downsample-random-test
|
||||||
|
(t/testing "select-downsample-random"
|
||||||
|
(t/testing "should select the correct amount of elements"
|
||||||
|
(t/is (= (count (ds/select-downsample-random (range 10) {:downsample-rate 0.1})) 1))
|
||||||
|
(t/is (= (count (ds/select-downsample-random (range 10) {:downsample-rate 0.2})) 2))
|
||||||
|
(t/is (= (count (ds/select-downsample-random (range 10) {:downsample-rate 0.5})) 5)))
|
||||||
|
(t/testing "should not return duplicate items (when called with set of numbers)"
|
||||||
|
(t/is (= (count (set (ds/select-downsample-random (range 10) {:downsample-rate 0.1}))) 1))
|
||||||
|
(t/is (= (count (set (ds/select-downsample-random (range 10) {:downsample-rate 0.2}))) 2))
|
||||||
|
(t/is (= (count (set (ds/select-downsample-random (range 10) {:downsample-rate 0.5}))) 5)))
|
||||||
|
(t/testing "should round down the number of elements selected if not whole"
|
||||||
|
(t/is (= (count (ds/select-downsample-random (range 3) {:downsample-rate 0.5})) 1))
|
||||||
|
(t/is (= (count (ds/select-downsample-random (range 1) {:downsample-rate 0.5})) 0)))
|
||||||
|
(t/testing "should not return more elements than available"
|
||||||
|
(t/is (= (count (ds/select-downsample-random (range 10) {:downsample-rate 2})) 10))
|
||||||
|
(t/is (= (count (ds/select-downsample-random (range 10) {:downsample-rate 1.5})) 10)))))
|
||||||
|
|
||||||
|
(t/deftest get-distance-between-cases-test
|
||||||
|
(t/testing "get-distance-between-cases"
|
||||||
|
(t/testing "should return correct distance"
|
||||||
|
(t/is (= 3 (ds/get-distance-between-cases '((0 1 1) (0 1 1) (1 0 1)) 0 1))))
|
||||||
|
(t/testing "should return 0 for the distance of a case to itself"
|
||||||
|
(t/is (= 0 (ds/get-distance-between-cases '((0 1 1) (0 1 1) (1 0 1)) 0 0))))
|
||||||
|
(t/testing "should work for non binary values (0 is solved)"
|
||||||
|
(t/is (= 1 (ds/get-distance-between-cases '((0 2 2) (0 2 2) (1 0 50)) 1 2))))
|
||||||
|
(t/testing "should return the max distance if one of the cases does not exist"
|
||||||
|
(t/is (= 3 (ds/get-distance-between-cases '((0 1 1) (0 1 1) (1 0 1)) 0 4))))))
|
||||||
|
|
||||||
|
(t/deftest merge-map-lists-at-index-test
|
||||||
|
(t/testing "merge-map-lists-at-index"
|
||||||
|
(t/testing "works properly"
|
||||||
|
(t/is (= '({:index 0 :a 3 :b 2} {:index 1 :a 2 :b 3}) (ds/merge-map-lists-at-index '({:index 0 :a 3 :b 2} {:index 1 :a 1 :b 2}) '({:index 1 :a 2 :b 3})))))
|
||||||
|
(t/testing "doesn't change big list if no indices match"
|
||||||
|
(t/is (= '({:index 0 :a 3 :b 2} {:index 1 :a 1 :b 2}) (ds/merge-map-lists-at-index '({:index 0 :a 3 :b 2} {:index 1 :a 1 :b 2}) '({:index 3 :a 2 :b 3})))))
|
||||||
|
(t/testing "doesn't fail on empty list"
|
||||||
|
(t/is (= '() (ds/merge-map-lists-at-index '() '()))))
|
||||||
|
(t/testing "shouldn't fail merging non-empty with empty"
|
||||||
|
(t/is (= '({:index 0 :a 3 :b 2} {:index 1 :a 1 :b 2}) (ds/merge-map-lists-at-index '({:index 0 :a 3 :b 2} {:index 1 :a 1 :b 2}) '()))))))
|
||||||
|
|
||||||
|
(t/deftest update-at-indices-test
|
||||||
|
(t/testing "update-at-indices"
|
||||||
|
(t/testing "should update at correct indices"
|
||||||
|
(t/is (= (ds/update-at-indices [1 2 3 4] [5] [0]) [5 2 3 4]))
|
||||||
|
(t/is (= (ds/update-at-indices [1 2 3 4] [5] [0]) [5 2 3 4])))
|
||||||
|
(t/testing "should update nothing if index list is empty"
|
||||||
|
(t/is (= (ds/update-at-indices [6 5 4 0 0] [] []) [6 5 4 0 0])))
|
||||||
|
(t/testing "should update nothing if index list is out of bounds"
|
||||||
|
(t/is (= (ds/update-at-indices [6 5 4 0 0] [4 5 1] [-1 5 6]) [6 5 4 0 0])))
|
||||||
|
(t/testing "should update only when indices are available (length mismatch)"
|
||||||
|
(t/is (= (ds/update-at-indices [6 5 4 0 0] [1 2 3 4] [0 1]) [1 2 4 0 0])))
|
||||||
|
(t/testing "should not care about index order"
|
||||||
|
(t/is (= (ds/update-at-indices [6 5 4 0 0] [2 1] [1 0]) [1 2 4 0 0])))
|
||||||
|
(t/testing "should work when input is a list"
|
||||||
|
(t/is (= (ds/update-at-indices '(6 5 4 0 0) '(2 1) '(1 0)) [1 2 4 0 0])))))
|
||||||
|
|
||||||
|
|
||||||
|
(t/deftest ids-types-test
|
||||||
|
(t/testing "replace-close-zero-with-zero"
|
||||||
|
(t/testing "should replace the close to zero values with zero"
|
||||||
|
(t/is (= (ds/replace-close-zero-with-zero '(0.1 2 3 4 0.1 2 3 4) 0.2) '(0 2 3 4 0 2 3 4)))
|
||||||
|
(t/is (= (ds/replace-close-zero-with-zero '(0.1 0.1) 0.0) '(0.1 0.1)))
|
||||||
|
(t/is (= (ds/replace-close-zero-with-zero '(100 100 200) 100) '(0 0 200))))))
|
||||||
|
|
||||||
|
(t/deftest update-case-distances-test
|
||||||
|
(t/testing "update-case-distances"
|
||||||
|
(t/testing "should update correctly when fewer errors than all"
|
||||||
|
(t/is (= (ds/update-case-distances '({:errors (0 0)} {:errors (0 0)})
|
||||||
|
'({:index 3 :distances [2 2 2 2 2]} {:index 4 :distances [2 2 2 2 2]})
|
||||||
|
'({:index 0 :distances [2 2 2 2 2]} {:index 1 :distances [2 2 2 2 2]} {:index 2 :distances [2 2 2 2 2]} {:index 3 :distances [2 2 2 2 2]} {:index 4 :distances [2 2 2 2 2]})
|
||||||
|
:solved)
|
||||||
|
'({:index 0 :distances [2 2 2 2 2]} {:index 1 :distances [2 2 2 2 2]} {:index 2 :distances [2 2 2 2 2]}
|
||||||
|
{:index 3 :distances [2 2 2 0 0]} {:index 4 :distances [2 2 2 0 0]}))))
|
||||||
|
(t/testing "should update correctly when same errors as all"
|
||||||
|
(t/is (= (ds/update-case-distances '({:errors (0 0 0 0 0)} {:errors (0 0 0 0 0)})
|
||||||
|
'({:index 0 :distances [2 2 2 2 2]} {:index 1 :distances [2 2 2 2 2]} {:index 2 :distances [2 2 2 2 2]} {:index 3 :distances [2 2 2 2 2]} {:index 4 :distances [2 2 2 2 2]})
|
||||||
|
'({:index 0 :distances [2 2 2 2 2]} {:index 1 :distances [2 2 2 2 2]} {:index 2 :distances [2 2 2 2 2]} {:index 3 :distances [2 2 2 2 2]} {:index 4 :distances [2 2 2 2 2]})
|
||||||
|
:solved)
|
||||||
|
'({:index 0 :distances [0 0 0 0 0]} {:index 1 :distances [0 0 0 0 0]} {:index 2 :distances [0 0 0 0 0]}
|
||||||
|
{:index 3 :distances [0 0 0 0 0]} {:index 4 :distances [0 0 0 0 0]}))))
|
||||||
|
(t/testing "should update correctly for elite/not-elite"
|
||||||
|
(t/is (= (ds/update-case-distances '({:errors (1 1 1 2 2)} {:errors (2 2 2 1 1)})
|
||||||
|
'({:index 0 :distances [2 2 2 2 2]} {:index 1 :distances [2 2 2 2 2]} {:index 2 :distances [2 2 2 2 2]} {:index 3 :distances [2 2 2 2 2]} {:index 4 :distances [2 2 2 2 2]})
|
||||||
|
'({:index 0 :distances [2 2 2 2 2]} {:index 1 :distances [2 2 2 2 2]} {:index 2 :distances [2 2 2 2 2]} {:index 3 :distances [2 2 2 2 2]} {:index 4 :distances [2 2 2 2 2]})
|
||||||
|
:elite)
|
||||||
|
'({:index 0 :distances [0 0 0 2 2]} {:index 1 :distances [0 0 0 2 2]} {:index 2 :distances [0 0 0 2 2]}
|
||||||
|
{:index 3 :distances [2 2 2 0 0]} {:index 4 :distances [2 2 2 0 0]})))
|
||||||
|
)))
|
||||||
|
|
||||||
|
(t/deftest case-maxmin-test
|
||||||
|
(t/testing "case-maxmin selects correct downsample"
|
||||||
|
(let [selected (ds/select-downsample-maxmin
|
||||||
|
'({:input1 [0] :output1 [10] :index 0 :distances [0 5 0 0 0]}
|
||||||
|
{:input1 [1] :output1 [11] :index 1 :distances [5 0 5 5 5]}
|
||||||
|
{:input1 [2] :output1 [12] :index 2 :distances [0 5 0 0 0]}
|
||||||
|
{:input1 [3] :output1 [13] :index 3 :distances [0 5 0 0 0]}
|
||||||
|
{:input1 [4] :output1 [14] :index 4 :distances [0 5 0 0 0]})
|
||||||
|
{:downsample-rate 0.4})]
|
||||||
|
(prn {:selected selected})
|
||||||
|
(t/is (or (= (:index (first selected)) 1) (= (:index (second selected)) 1))))))
|
||||||
|
|
||||||
|
(t/deftest case-maxmin-adaptive
|
||||||
|
(t/testing "case-maxmin-adaptive selects correct downsample simple"
|
||||||
|
(let [selected (ds/select-downsample-maxmin-adaptive
|
||||||
|
'({:input1 [0] :output1 [10] :index 0 :distances [0 5 0 0 0]}
|
||||||
|
{:input1 [1] :output1 [11] :index 1 :distances [5 0 5 5 5]}
|
||||||
|
{:input1 [2] :output1 [12] :index 2 :distances [0 5 0 0 0]}
|
||||||
|
{:input1 [3] :output1 [13] :index 3 :distances [0 5 0 0 0]}
|
||||||
|
{:input1 [4] :output1 [14] :index 4 :distances [0 5 0 0 0]})
|
||||||
|
{:case-delta 0})]
|
||||||
|
(prn {:selected selected})
|
||||||
|
(t/is (or (= (:index (first selected)) 1) (= (:index (second selected)) 1)))
|
||||||
|
(t/is (= 2 (count selected)))))
|
||||||
|
(t/testing "case-maxmin-adaptive selects correct downsample when all identical"
|
||||||
|
(let [selected (ds/select-downsample-maxmin-adaptive
|
||||||
|
'({:input1 [0] :output1 [10] :index 0 :distances [0 0 0 0 0]}
|
||||||
|
{:input1 [1] :output1 [11] :index 1 :distances [0 0 0 0 0]}
|
||||||
|
{:input1 [2] :output1 [12] :index 2 :distances [0 0 0 0 0]}
|
||||||
|
{:input1 [3] :output1 [13] :index 3 :distances [0 0 0 0 0]}
|
||||||
|
{:input1 [4] :output1 [14] :index 4 :distances [0 0 0 0 0]})
|
||||||
|
{:case-delta 0})]
|
||||||
|
(prn {:selected selected})
|
||||||
|
(t/is (= 1 (count selected))))))
|
||||||
|
|
||||||
|
|
||||||
|
(t/deftest hyperselection-test
|
||||||
|
(let [parents1 '({:blah 3 :index 1} {:blah 3 :index 1}
|
||||||
|
{:blah 3 :index 1} {:blah 3 :index 2})
|
||||||
|
parents2 '({:plushy 2 :index 0} {:blah 3 :index 2}
|
||||||
|
{:blah 3 :index 3} {:index 4})
|
||||||
|
emptyparents '({:blah 1} {:blah 1} {:blah 1})]
|
||||||
|
(t/testing "sum-list-map-indices function works correctly"
|
||||||
|
(t/is (= {1 3, 2 1} (hs/sum-list-map-indices parents1)))
|
||||||
|
(t/is (= {0 1, 2 1, 3 1, 4 1} (hs/sum-list-map-indices parents2))))
|
||||||
|
(t/testing "ordered-freqs function works correctly"
|
||||||
|
(t/is (= '(3 1) (hs/ordered-freqs (hs/sum-list-map-indices parents1))))
|
||||||
|
(t/is (= '(1 1 1 1) (hs/ordered-freqs (hs/sum-list-map-indices parents2)))))
|
||||||
|
(t/testing "hyperselection-track works correctly"
|
||||||
|
(t/is (= '(0.75 0.25) (hs/hyperselection-track parents1)))
|
||||||
|
(t/is (= '(0.25 0.25 0.25 0.25) (hs/hyperselection-track parents2))))
|
||||||
|
(t/testing "reindex-pop works correctly"
|
||||||
|
(t/is (= '({:blah 3 :index 0} {:blah 3 :index 1}
|
||||||
|
{:blah 3 :index 2} {:blah 3 :index 3}) (hs/reindex-pop parents1)))
|
||||||
|
(t/is (= '({:plushy 2 :index 0} {:blah 3 :index 1}
|
||||||
|
{:blah 3 :index 2} {:index 3}) (hs/reindex-pop parents2)))
|
||||||
|
(t/is (= '({:blah 1 :index 0} {:blah 1 :index 1} {:blah 1 :index 2}) (hs/reindex-pop emptyparents))))))
|
23
test/propeller/selection_test.cljc
Normal file
23
test/propeller/selection_test.cljc
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
(ns propeller.selection-test
|
||||||
|
(:require [clojure.test :as t]
|
||||||
|
[propeller.selection :as s]))
|
||||||
|
|
||||||
|
|
||||||
|
(t/deftest roulette-selection-test
|
||||||
|
(t/testing "fitness proportionate selection"
|
||||||
|
(t/testing "should correctly define the probabilities of selection"
|
||||||
|
(t/is (let [ret (s/fitness-proportionate-selection '({:index 0 :total-error 0}
|
||||||
|
{:index 1 :total-error 1}
|
||||||
|
{:index 2 :total-error 1}
|
||||||
|
{:index 3 :total-error 1}) {:empty :argmap})]
|
||||||
|
(case (:index ret)
|
||||||
|
0 (= (:fitness ret) 1) ;if we selected index 0, check that fitness is correctly calculated to 1
|
||||||
|
(= (float (:fitness ret)) 0.5)
|
||||||
|
))))
|
||||||
|
(t/testing "should always return the same individual if there is only one"
|
||||||
|
(t/testing "desipte it having bad error"
|
||||||
|
(t/is (= (:index (s/fitness-proportionate-selection '({:index 99 :total-error 109012390123}) {:empty :argmap}))
|
||||||
|
99)))
|
||||||
|
(t/testing "when it has low error"
|
||||||
|
(t/is (= (:index (s/fitness-proportionate-selection '({:index 22 :total-error 0}) {:empty :argmap}))
|
||||||
|
22))))))
|
@ -3,6 +3,22 @@
|
|||||||
[propeller.tools.metrics :as m]
|
[propeller.tools.metrics :as m]
|
||||||
[propeller.tools.math :as a]))
|
[propeller.tools.math :as a]))
|
||||||
|
|
||||||
|
(t/deftest argmax-test
|
||||||
|
(t/is (= (m/argmax '(1 2 3 4)) 3))
|
||||||
|
(t/is (= 1 (nth '(1 1 1 1) (m/argmax '(1 1 1 1)))))
|
||||||
|
(t/is (= 3 (nth '(1 3 3 1) (m/argmax '(1 3 3 1)))))
|
||||||
|
(t/is (= 0 (m/argmax '(1)))))
|
||||||
|
|
||||||
|
(t/deftest mean-of-colls-test
|
||||||
|
(t/is (= (m/mean-of-colls '((1 2 3 4) (4 3 2 1))) '(2.5 2.5 2.5 2.5)))
|
||||||
|
(t/is (= (m/mean-of-colls '((1 2 3) (4 3 2 1))) '(2.5 2.5 2.5)))
|
||||||
|
(t/is (= (m/mean-of-colls '((1))) '(1.0))))
|
||||||
|
|
||||||
|
(t/deftest min-of-colls-test
|
||||||
|
(t/is (= (m/min-of-colls '((1 2 3 4) (4 3 2 1))) '(1 2 2 1)))
|
||||||
|
(t/is (= (m/min-of-colls '((1 2 3) (4 3 2 1))) '(1 2 2)))
|
||||||
|
(t/is (= (m/min-of-colls '((1))) '(1))))
|
||||||
|
|
||||||
(t/deftest mean-test
|
(t/deftest mean-test
|
||||||
(t/is (= (m/mean '(1 2 3 4)) 2.5))
|
(t/is (= (m/mean '(1 2 3 4)) 2.5))
|
||||||
(t/is (= (m/mean '()) 0)))
|
(t/is (= (m/mean '()) 0)))
|
||||||
@ -13,9 +29,18 @@
|
|||||||
;(t/is (= (m/median '()) 0.0))
|
;(t/is (= (m/median '()) 0.0))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
(t/deftest argmins-test
|
||||||
|
(t/is (= (m/argmins '(1 2 3 4)) '(0)))
|
||||||
|
(t/is (= (m/argmins '(1 1 3 4)) '(0 1)))
|
||||||
|
(t/is (= (m/argmins '()) '()))
|
||||||
|
(t/is (= (m/argmins '(3 4 5 6 6 6)) '(0)))
|
||||||
|
(t/is (= (m/argmins '(6 4 5 6 6 6)) '(1)))
|
||||||
|
(t/is (= (m/argmins '(0 4 5 0 0 0)) '(0 3 4 5))))
|
||||||
|
|
||||||
|
|
||||||
(t/deftest levenshtein-distance-test
|
(t/deftest levenshtein-distance-test
|
||||||
(t/is (= (m/levenshtein-distance "kitten" "sipping") 5))
|
(t/is (= (m/levenshtein-distance "kitten" "sipping") 5))
|
||||||
(t/is (= (m/levenshtein-distance "" "hello")) 5))
|
(t/is (= (m/levenshtein-distance "" "hello") 5)))
|
||||||
|
|
||||||
(t/deftest sequence-similarity-test
|
(t/deftest sequence-similarity-test
|
||||||
(t/is (a/approx= (m/sequence-similarity "kitten" "sipping") 0.2857 0.001))
|
(t/is (a/approx= (m/sequence-similarity "kitten" "sipping") 0.2857 0.001))
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
(ns propeller.utils-test
|
(ns propeller.utils-test
|
||||||
(:require [clojure.test :as t]
|
(:require [clojure.test :as t]
|
||||||
[propeller.utils :as u]
|
[propeller.utils :as u]
|
||||||
[propeller.simplification :as s]))
|
[propeller.simplification :as s]
|
||||||
|
[propeller.downsample :as ds]
|
||||||
|
[propeller.hyperselection :as hs]))
|
||||||
|
|
||||||
(t/deftest first-non-nil-test
|
(t/deftest first-non-nil-test
|
||||||
(t/is (= 1 (u/first-non-nil '(1 2 3))))
|
(t/is (= 1 (u/first-non-nil '(1 2 3))))
|
||||||
@ -22,57 +24,62 @@
|
|||||||
|
|
||||||
(t/deftest random-instruction-test
|
(t/deftest random-instruction-test
|
||||||
(t/is
|
(t/is
|
||||||
(letfn [(instruct [] 1)]
|
(letfn [(instruct [] 1)]
|
||||||
(let [test (u/random-instruction [instruct 2])]
|
(let [test (u/random-instruction [instruct 2])]
|
||||||
(if (= 1 test)
|
(if (= 1 test)
|
||||||
true
|
true
|
||||||
(= 2 test))))))
|
(= 2 test))))))
|
||||||
|
|
||||||
(t/deftest count-points-test
|
(t/deftest count-points-test
|
||||||
(t/is (= 6 (u/count-points '(:a :b (:c :d)))))
|
(t/is (= 6 (u/count-points '(:a :b (:c :d)))))
|
||||||
(t/is (= 1 (u/count-points '())))
|
(t/is (= 1 (u/count-points '())))
|
||||||
(t/is (= 2 (u/count-points '(:a)))))
|
(t/is (= 2 (u/count-points '(:a)))))
|
||||||
|
|
||||||
(t/testing "choose-random-k"
|
(t/deftest choose-random-k-test
|
||||||
(t/testing "should return indices that are a member of the original array"
|
(t/testing "choose-random-k"
|
||||||
(t/is (every? identity (map #(contains? (set (range 10)) %) (s/choose-random-k 3 (range 10))))))
|
(t/testing "should return indices that are a member of the original array"
|
||||||
(t/testing "should return a list of size k"
|
(t/is (every? identity (map #(contains? (set (range 10)) %) (s/choose-random-k 3 (range 10))))))
|
||||||
(t/is (= (count (s/choose-random-k 7 (range 10))) 7))))
|
(t/testing "should return a list of size k"
|
||||||
|
(t/is (= (count (s/choose-random-k 7 (range 10))) 7)))))
|
||||||
|
|
||||||
(t/testing "delete-at-indices"
|
|
||||||
(t/testing "should actually remove indicated items"
|
|
||||||
(t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 3) '(:hi0 :hi1 :hi2 :hi3)))))
|
|
||||||
(t/testing "should work with numerical indices"
|
|
||||||
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0) '(:hi0 :hi1 :hi2 :hi3)))))
|
|
||||||
(t/testing "should not delete anything for index out of bounds"
|
|
||||||
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 10) '(:hi0 :hi1 :hi2 :hi3))))
|
|
||||||
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 -10) '(:hi0 :hi1 :hi2 :hi3))))
|
|
||||||
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(-0 -10) '(:hi0 :hi1 :hi2 :hi3)))))
|
|
||||||
(t/testing "should only delete at single index once"
|
|
||||||
(t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 0 0 0 3 3 3) '(:hi0 :hi1 :hi2 :hi3)))))
|
|
||||||
(t/testing "should return empty list when deleting from empty list"
|
|
||||||
(t/is (= '() (s/delete-at-indices '(0) '()))))
|
|
||||||
(t/testing "should be able to delete at arbitrary indices"
|
|
||||||
(t/is (= (count (s/delete-at-indices (s/choose-random-k 3 (range 10)) (range 10))) 7))))
|
|
||||||
|
|
||||||
(t/testing "delete-random-k"
|
(t/deftest delete-at-indices-test
|
||||||
(t/testing "should remove the correct amount of items"
|
(t/testing "delete-at-indices"
|
||||||
(t/is (= (count (s/delete-k-random 3 (range 10))) 7))
|
(t/testing "should actually remove indicated items"
|
||||||
(t/is (= (count (s/delete-k-random 10 (range 10))) 0))
|
(t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 3) '(:hi0 :hi1 :hi2 :hi3)))))
|
||||||
(t/is (= (count (s/delete-k-random 0 (range 10))) 10)))
|
(t/testing "should work with numerical indices"
|
||||||
(t/testing "should not fail if k >> size of collection"
|
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0) '(:hi0 :hi1 :hi2 :hi3)))))
|
||||||
(t/is (= (count (s/delete-k-random 300 (range 10))) 0))
|
(t/testing "should not delete anything for index out of bounds"
|
||||||
(t/is (= (s/delete-k-random 300 '(:hi1 :hi2 :hi3)) '())))
|
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 10) '(:hi0 :hi1 :hi2 :hi3))))
|
||||||
(t/testing "should not fail if the collection is empty"
|
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(0 -10) '(:hi0 :hi1 :hi2 :hi3))))
|
||||||
(t/is (= (count (s/delete-k-random 300 '())) 0))
|
(t/is (= '(:hi1 :hi2 :hi3) (s/delete-at-indices '(-0 -10) '(:hi0 :hi1 :hi2 :hi3)))))
|
||||||
(t/is (= (count (s/delete-k-random 0 '())) 0)))
|
(t/testing "should only delete at single index once"
|
||||||
(t/testing "should maintain order of the remaining items"
|
(t/is (= '(:hi1 :hi2) (s/delete-at-indices '(0 0 0 0 3 3 3) '(:hi0 :hi1 :hi2 :hi3)))))
|
||||||
(t/is (apply < (s/delete-k-random 3 (range 10))))))
|
(t/testing "should return empty list when deleting from empty list"
|
||||||
|
(t/is (= '() (s/delete-at-indices '(0) '()))))
|
||||||
|
(t/testing "should be able to delete at arbitrary indices"
|
||||||
|
(t/is (= (count (s/delete-at-indices (s/choose-random-k 3 (range 10)) (range 10))) 7)))))
|
||||||
|
|
||||||
(t/testing "auto-simplify-plushy"
|
(t/deftest delete-random-k-test
|
||||||
(t/testing "should handle having an empty plushy"
|
(t/testing "delete-random-k"
|
||||||
(t/is (= (s/auto-simplify-plushy '() (fn [argmap data plushy] 0) {:simplification-steps 100 :simplification-k 4 :simplification-verbose? false}) '())))
|
(t/testing "should remove the correct amount of items"
|
||||||
(let [plushy '(:exec_dup 1 :integer_add close :in1 :integer_add 0 :in1 :in1 :integer_mult :integer_add)]
|
(t/is (= (count (s/delete-k-random 3 (range 10))) 7))
|
||||||
(t/testing "should decrease size of plushy that always has perfect scores"
|
(t/is (= (count (s/delete-k-random 10 (range 10))) 0))
|
||||||
(t/is (< (count (s/auto-simplify-plushy plushy (fn [argmap data plushy] 0) {:simplification-steps 100 :simplification-k 4 :simplification-verbose? false})) (count plushy)))
|
(t/is (= (count (s/delete-k-random 0 (range 10))) 10)))
|
||||||
(t/is (< (count (s/auto-simplify-plushy plushy (fn [argmap data plushy] 0) {:simplification-steps 100 :simplification-k 10 :simplification-verbose? false})) (count plushy))))))
|
(t/testing "should not fail if k >> size of collection"
|
||||||
|
(t/is (= (count (s/delete-k-random 300 (range 10))) 0))
|
||||||
|
(t/is (= (s/delete-k-random 300 '(:hi1 :hi2 :hi3)) '())))
|
||||||
|
(t/testing "should not fail if the collection is empty"
|
||||||
|
(t/is (= (count (s/delete-k-random 300 '())) 0))
|
||||||
|
(t/is (= (count (s/delete-k-random 0 '())) 0)))
|
||||||
|
(t/testing "should maintain order of the remaining items"
|
||||||
|
(t/is (apply < (s/delete-k-random 3 (range 10)))))))
|
||||||
|
|
||||||
|
(t/deftest auto-simplify-plushy-test
|
||||||
|
(t/testing "auto-simplify-plushy"
|
||||||
|
(t/testing "should handle having an empty plushy"
|
||||||
|
(t/is (= (s/auto-simplify-plushy '() (fn [argmap data plushy] 0) {:simplification-steps 100 :simplification-k 4 :simplification-verbose? false}) '())))
|
||||||
|
(let [plushy '(:exec_dup 1 :integer_add close :in1 :integer_add 0 :in1 :in1 :integer_mult :integer_add)]
|
||||||
|
(t/testing "should decrease size of plushy that always has perfect scores"
|
||||||
|
(t/is (< (count (s/auto-simplify-plushy plushy (fn [argmap data plushy] 0) {:simplification-steps 100 :simplification-k 4 :simplification-verbose? false})) (count plushy)))
|
||||||
|
(t/is (< (count (s/auto-simplify-plushy plushy (fn [argmap data plushy] 0) {:simplification-steps 100 :simplification-k 10 :simplification-verbose? false})) (count plushy)))))))
|
Loading…
x
Reference in New Issue
Block a user