From 7ce624d4bac810396aaa7710ba124f74f10d9674 Mon Sep 17 00:00:00 2001 From: mcgirjau Date: Wed, 1 Jul 2020 13:07:13 -0400 Subject: [PATCH] Add Number IO and Smallest problems --- deps.edn | 1 + src/propeller/core.cljc | 10 +- src/propeller/gp.cljc | 18 +- .../problems/software/number_io.cljc | 87 ++++++- src/propeller/problems/software/smallest.cljc | 92 ++++++++ src/propeller/push/instructions/code.cljc | 218 +++++++++++++----- .../push/instructions/polymorphic.cljc | 18 +- src/propeller/push/state.cljc | 4 +- src/propeller/selection.cljc | 2 +- 9 files changed, 365 insertions(+), 85 deletions(-) create mode 100644 deps.edn create mode 100644 src/propeller/problems/software/smallest.cljc diff --git a/deps.edn b/deps.edn new file mode 100644 index 0000000..dedbbfa --- /dev/null +++ b/deps.edn @@ -0,0 +1 @@ +{:deps {org.clojure/clojurescript {:mvn/version "1.10.758"}}} \ No newline at end of file diff --git a/src/propeller/core.cljc b/src/propeller/core.cljc index 6279fb3..9489fac 100644 --- a/src/propeller/core.cljc +++ b/src/propeller/core.cljc @@ -3,6 +3,8 @@ (:require [propeller.gp :as gp] [propeller.problems.simple-regression :as regression] [propeller.problems.string-classification :as string-classif] + [propeller.problems.software.number-io :as number-io] + [propeller.problems.software.smallest :as smallest] #?(:cljs [cljs.reader :refer [read-string]]))) (defn -main @@ -11,12 +13,12 @@ (gp/gp (update-in (merge - {:instructions regression/instructions - :error-function regression/error-function + {:instructions smallest/instructions + :error-function smallest/error-function :max-generations 500 :population-size 500 - :max-initial-plushy-size 50 - :step-limit 100 + :max-initial-plushy-size 100 + :step-limit 200 :parent-selection :lexicase :tournament-size 5 :umad-rate 0.1 diff --git a/src/propeller/gp.cljc b/src/propeller/gp.cljc index f5f9088..158365f 100644 --- a/src/propeller/gp.cljc +++ b/src/propeller/gp.cljc @@ -1,5 +1,6 @@ (ns propeller.gp - (:require [propeller.genome :as genome] + (:require [clojure.string] + [propeller.genome :as genome] [propeller.variation :as variation] [propeller.push.core :as push] [propeller.push.instructions.bool] @@ -23,7 +24,7 @@ (print "Best program: ") (prn (genome/plushy->push (:plushy best))) (println "Best total error:" (:total-error best)) (println "Best errors:" (:errors best)) - (println "Best behaviors:" (:behaviors best)) + (println "Best behaviors:" (map clojure.string/trim (:behaviors best))) (println "Genotypic diversity:" (float (/ (count (distinct (map :plushy pop))) (count pop)))) (println "Average genome length:" @@ -45,11 +46,18 @@ instructions max-initial-plushy-size)))] (let [evaluated-pop (sort-by :total-error - (map (partial error-function argmap) - population))] + (map (partial error-function argmap) population)) + best-individual (first evaluated-pop)] (report evaluated-pop generation) (cond - (zero? (:total-error (first evaluated-pop))) (println "SUCCESS") + ;; Success on training cases is verified on testing cases + (zero? (:total-error best-individual)) + (do (println "SUCCESS at generation" generation) + (print "Checking program on test cases... ") + (if (zero? (:total-error (error-function argmap best-individual :test))) + (println "Test cases passed.") + (println "Test cases failed."))) + ;; (>= generation max-generations) nil :else (recur (inc generation) (if (:elitism argmap) diff --git a/src/propeller/problems/software/number_io.cljc b/src/propeller/problems/software/number_io.cljc index d21dba7..ddb1e90 100644 --- a/src/propeller/problems/software/number_io.cljc +++ b/src/propeller/problems/software/number_io.cljc @@ -1,3 +1,88 @@ -(ns propeller.problems.software.number-io) +(ns propeller.problems.software.number-io + (:require [propeller.genome :as genome] + [propeller.push.interpreter :as interpreter] + [propeller.push.state :as state] + [propeller.push.utils :refer [get-stack-instructions]] + [propeller.utils :as utils] + [propeller.push.state :as state] + [propeller.tools.math :as math])) +;; ============================================================================= +;; Tom Helmuth, thelmuth@cs.umass.edu +;; +;; NUMBER IO PROBLEM +;; +;; This problem file defines the following problem: +;; There are two inputs, a float and an int. The program must read them in, find +;; their sum as a float, and print the result as a float. +;; +;; Problem Source: iJava (http://ijava.cs.umass.edu/) +;; +;; NOTE: input stack: in1 (float), +;; in2 (int) +;; output stack: printed output +;; ============================================================================= +;; ============================================================================= +;; DATA DOMAINS +;; +;; A list of data domains. Each domain is a map containing a "set" of inputs +;; and two integers representing how many cases from the set should be used as +;; training and testing cases respectively. Each "set" of inputs is either a +;; list or a function that, when called, will create a random element of the set +;; ============================================================================= + +;; Random float between -100.0 and 100.0 +(defn random-float [] (- (* (rand) 200) 100.0)) + +; Random integer between -100 and 100 +(defn random-int [] (- (rand-int 201) 100.0)) + +(def instructions + (utils/not-lazy + (concat + ;; stack-specific instructions + (get-stack-instructions #{:float :integer :print}) + ;; input instructions + (list :in1 :in2) + ;; ERCs (constants) + (list random-float random-int)))) + +(def train-and-test-data + (let [inputs (vec (repeatedly 1025 #(vector (random-int) (random-float)))) + outputs (mapv #(apply + %) inputs) + train-set {:inputs (take 25 inputs) + :outputs (take 25 outputs)} + test-set {:inputs (drop 25 inputs) + :outputs (drop 25 outputs)}] + {:train train-set + :test test-set})) + +(defn error-function + ([argmap individual] + (error-function argmap individual :train)) + ([argmap individual subset] + (let [program (genome/plushy->push (:plushy individual)) + data (get train-and-test-data subset) + inputs (:inputs data) + correct-outputs (:outputs data) + outputs (map (fn [input] + (state/peek-stack + (interpreter/interpret-program + program + (assoc state/empty-state :input {:in1 (first input) + :in2 (last input)} + :output '("")) + (:step-limit argmap)) + :output)) + inputs) + errors (map (fn [correct-output output] + (let [parsed-output (try (read-string output) + (catch Exception e 1000.0))] + (min 1000.0 (math/abs (- correct-output parsed-output))))) + correct-outputs + outputs)] + (assoc individual + :behaviors outputs + :errors errors + :total-error (apply +' errors))))) diff --git a/src/propeller/problems/software/smallest.cljc b/src/propeller/problems/software/smallest.cljc new file mode 100644 index 0000000..7c91feb --- /dev/null +++ b/src/propeller/problems/software/smallest.cljc @@ -0,0 +1,92 @@ +(ns propeller.problems.software.smallest + (:require [propeller.genome :as genome] + [propeller.push.interpreter :as interpreter] + [propeller.push.state :as state] + [propeller.push.utils :refer [get-stack-instructions]] + [propeller.utils :as utils] + [propeller.push.state :as state])) + +;; ============================================================================= +;; Tom Helmuth, thelmuth@cs.umass.edu +;; +;; SMALLEST PROBLEM +;; +;; This problem file defines the following problem: +;; There are two inputs, a float and an int. The program must read them in, find +;; their sum as a float, and print the result as a float. +;; +;; Problem Source: C. Le Goues et al., "The ManyBugs and IntroClass Benchmarks +;; for Automated Repair of C Programs," in IEEE Transactions on Software +;; Engineering, vol. 41, no. 12, pp. 1236-1256, Dec. 1 2015. +;; doi: 10.1109/TSE.2015.2454513 +;; +;; NOTE: input stack: in1 (int), +;; in2 (int), +;; in3 (int), +;; in4 (int), +;; output stack: printed output +;; ============================================================================= + +;; ============================================================================= +;; DATA DOMAINS +;; +;; A list of data domains. Each domain is a map containing a "set" of inputs +;; and two integers representing how many cases from the set should be used as +;; training and testing cases respectively. Each "set" of inputs is either a +;; list or a function that, when called, will create a random element of the set +;; ============================================================================= + +; Random integer between -100 and 100 +(defn random-int [] (- (rand-int 201) 100.0)) + +(def instructions + (utils/not-lazy + (concat + ;; stack-specific instructions + (get-stack-instructions #{:boolean :exec :integer :print}) + ;; input instructions + (list :in1 :in2 :in3 :in4) + ;; ERCs (constants) + (list random-int)))) + +(def train-and-test-data + (let [inputs (vec (repeatedly 1100 #(vector (random-int) (random-int) + (random-int) (random-int)))) + outputs (mapv #(apply min %) inputs) + train-set {:inputs (take 100 inputs) + :outputs (take 100 outputs)} + test-set {:inputs (drop 100 inputs) + :outputs (drop 100 outputs)}] + {:train train-set + :test test-set})) + +(defn error-function + ([argmap individual] + (error-function argmap individual :train)) + ([argmap individual subset] + (let [program (genome/plushy->push (:plushy individual)) + data (get train-and-test-data subset) + inputs (:inputs data) + correct-outputs (:outputs data) + outputs (map (fn [input] + (state/peek-stack + (interpreter/interpret-program + program + (assoc state/empty-state :input {:in1 (get input 0) + :in2 (get input 1) + :in3 (get input 2) + :in4 (get input 3)} + :output '("")) + (:step-limit argmap)) + :output)) + inputs) + errors (map (fn [correct-output output] + (let [parsed-output (try (read-string output) + (catch Exception e 1))] + (if (= correct-output parsed-output) 0 1))) + correct-outputs + outputs)] + (assoc individual + :behaviors outputs + :errors errors + :total-error (apply +' errors))))) diff --git a/src/propeller/push/instructions/code.cljc b/src/propeller/push/instructions/code.cljc index 1cfea9c..7cf513d 100644 --- a/src/propeller/push/instructions/code.cljc +++ b/src/propeller/push/instructions/code.cljc @@ -16,22 +16,7 @@ ^{:stacks #{}} (fn [stack state] state)) -(def _do*range - ^{:stacks #{:exec :integer}} - (fn [stack state] state)) - -(def _noop - ^{:stacks #{}} - (fn [stack state] state)) - -(def _noop - ^{:stacks #{}} - (fn [stack state] state)) - -(generate-instructions - [:exec :code] - [_noop]) - +(generate-instructions [:exec :code] [_noop]) ;; ============================================================================= ;; CODE Instructions @@ -50,62 +35,173 @@ [:code :code] :code))) -(def-instruction - :code_atom - ^{:stacks #{:code}} - (fn [state] - ())) - -(def-instruction - :code_car - ^{:stacks #{:code}} - (fn [state] - ())) - -(def-instruction - :code_cdr - ^{:stacks #{:code}} - (fn [state] - ())) - -(def-instruction - :code_cons - ^{:stacks #{:code}} - (fn [state] - ())) - -(def-instruction - :code_do - ^{:stacks #{:code}} - (fn [state] - ())) - -(def-instruction - :code_do* - ^{:stacks #{:code}} - (fn [state] - ())) - -(def-instruction - :code_append - ^{:stacks #{:code}} - (fn [state] - ())) +;; Unfinished... ;; ============================================================================= ;; EXEC Instructions ;; ============================================================================= +;; Executes the top EXEC instruction (i.e. loops) a number of times determined +;; by the top two INTEGERs, while also pushing the loop counter onto the INTEGER +;; stack. The top INTEGER is the "destination index" and the second INTEGER is +;; the "current index". If the integers are equal, then the current index is +;; pushed onto the INTEGER stack and the code (which is the "body" of the loop) +;; is pushed onto the EXEC stack for subsequent execution. If the integers are +;; not equal, then the current index will still be pushed onto the INTEGER stack +;; but two items will be pushed onto the EXEC stack - first a recursive call to +;; :exec_do*range (with the same code and destination index, but with a current +;; index that has been either incremented or decremented by 1 to be closer to +;; the destination index) and then the body code. Note that the range is +;; inclusive of both endpoints; a call with integer arguments 3 and 5 will cause +;; its body to be executed 3 times, with the loop counter having the values 3, +;; 4, and 5. Note also that one can specify a loop that "counts down" by +;; providing a destination index that is less than the specified current index. (def-instruction - :exec_dup - ^{:stacks #{:exec}} + :exec_do*range + ^{:stacks #{:exec :integer}} (fn [state] - (if (state/empty-stack? state :exec) + (if (or (state/empty-stack? state :exec) + (< (count (:integer state)) 2)) state - (state/push-to-stack state :exec (first (:exec state)))))) + (let [to-do (state/peek-stack state :exec) + destination-index (state/peek-stack state :integer) + current-index (state/peek-stack + (state/pop-stack state :integer) :integer) + popped-state (state/pop-stack + (state/pop-stack + (state/pop-stack state :exec) :integer) :integer) + increment (cond (< current-index destination-index) 1 + (> current-index destination-index) -1 + :else 0) + continuation (if (zero? increment) + popped-state + (state/push-to-stack popped-state + :exec + (list (+' current-index increment) + destination-index + :exec_do*range + to-do)))] + (state/push-to-stack + (state/push-to-stack continuation :integer current-index) :exec to-do))))) +;; Executes the top EXEC instruction (i.e. loops) a number of times determined +;; by the top INTEGER, pushing an index (which runs from 0 to one less than the +;; total number of iterations) onto the INTEGER stack prior to each execution +;; of the loop body. If the top INTEGER argument is <= 0, this becomes a NOOP +(def-instruction + :exec_do*count + ^{:stacks #{:exec :integer}} + (fn [state] + (if (or (state/empty-stack? state :integer) + (state/empty-stack? state :exec) + (< (state/peek-stack state :integer) 1)) + state + (let [to-do (state/peek-stack state :exec) + index (state/peek-stack state :integer) + popped-state (state/pop-stack (state/pop-stack state :exec) :integer)] + (state/push-to-stack popped-state :exec (list 0 + (dec index) + :exec_do*range + to-do)))))) + +;; Like :exec_do*count, but does not push the loop counter onto the INTEGER stack +(def-instruction + :exec_do*times + ^{:stacks #{:exec :integer}} + (fn [state] + (if (or (state/empty-stack? state :integer) + (state/empty-stack? state :exec) + (< (state/peek-stack state :integer) 1)) + state + (let [to-do (state/peek-stack state :exec) + to-do-with-pop (cons :integer_pop (utils/ensure-list to-do)) + index (state/peek-stack state :integer) + popped-state (state/pop-stack (state/pop-stack state :exec) :integer)] + (state/push-to-stack popped-state :exec (list 0 + (dec index) + :exec_do*range + to-do-with-pop)))))) + +;; If the top BOOLEAN is TRUE, removes the the second item on the EXEC stack, +;; leaving the first item to be executed. Otherwise, removes the first item, +;; leaving the second to be executed. Acts as a NOOP unless there are at least +;; two items on the EXEC stack and one item on the BOOLEAN stack (def-instruction :exec_if ^{:stacks #{:boolean :exec}} (fn [state] (make-instruction state #(if %1 %3 %2) [:boolean :exec :exec] :exec))) + +;; If the top BOOLEAN is TRUE, leaves the first item on the EXEC stack to be +;; executed. Otherwise, it removes it. Acts as a NOOP unless there is at least +;; one item on the EXEC stack and one item on the BOOLEAN stack +(def-instruction + :exec_when + ^{:stacks #{:boolean :exec}} + (fn [state] + (make-instruction state #(when %1 %2) [:boolean :exec] :exec))) + +;; Keeps executing the top instruction on the EXEC stack while the top item on +;; the BOOLEAN stack is true +(def-instruction + :exec_while + ^{:stacks #{:boolean :exec}} + (fn [state] + (if (state/empty-stack? state :exec) + state + (if (state/empty-stack? state :boolean) + (state/pop-stack state :exec) + (if (state/peek-stack state :boolean) + (let [to-do (state/peek-stack state :exec) + popped-state (state/pop-stack state :boolean)] + (state/push-to-stack + (state/push-to-stack popped-state :exec :exec_while) :exec to-do)) + (state/pop-stack (state/pop-stack state :boolean) :exec)))))) + +;; Keeps executing the top instruction on the EXEC stack while the top item on +;; the BOOLEAN stack is true. Differs from :exec_while in that it executes +;; the top instruction at least once +(def-instruction + :exec_do*while + ^{:stacks #{:boolean :exec}} + (fn [state] + (if (state/empty-stack? state :exec) + state + (let [to-do (state/peek-stack state :exec)] + (state/push-to-stack + (state/push-to-stack state :exec :exec_while) :exec to-do))))) + +;; The "K combinator" - removes the second item on the EXEC stack +(def-instruction + :exec_k + ^{:stacks #{:exec}} + (fn [state] + (make-instruction state (fn [_ first] first) [:exec :exec] :exec))) + +;; The "S combinator" - pops 3 items from the EXEC stack, which we will call A, +;; B, and C (with A being the first one popped), and then pushes a list +;; containing B and C back onto the EXEC stack, followed by another instance of +;; C, followed by another instance of A +(def-instruction + :exec_s + ^{:stacks #{:exec}} + (fn [state] + (if (< (count (:exec state)) 3) + state + (let [[a b c] (state/peek-stack-multiple state :exec 3) + popped-state (state/pop-stack-multiple state :exec 3) + to-push-back (list a c (list b c))] + (state/push-to-stack-multiple popped-state :exec to-push-back))))) + +;; The "Y combinator" - inserts beneath the top item of the EXEC stack a new +;; item of the form "(:exec_y TOP_ITEM)" +(def-instruction + :exec_y + ^{:stacks #{:exec}} + (fn [state] + (if (state/empty-stack? state :exec) + state + (let [top-item (state/peek-stack state :exec) + popped-state (state/pop-stack state :exec) + to-push-back (list top-item (list :exec_y top-item))] + (state/push-to-stack-multiple popped-state :exec to-push-back))))) diff --git a/src/propeller/push/instructions/polymorphic.cljc b/src/propeller/push/instructions/polymorphic.cljc index b690d00..9301f5e 100644 --- a/src/propeller/push/instructions/polymorphic.cljc +++ b/src/propeller/push/instructions/polymorphic.cljc @@ -109,7 +109,7 @@ popped-state (state/pop-stack state :integer) top-item (state/peek-stack popped-state stack) popped-state (state/pop-stack popped-state stack) - index (max 0 (min index-raw (count (get popped-state stack))))] + index (max 0 (min index-raw (dec (count (get popped-state stack)))))] (update popped-state stack #(utils/not-lazy (concat (take index %) (list top-item) (drop index %))))) @@ -144,7 +144,7 @@ (not (state/empty-stack? state stack)))) (let [index-raw (state/peek-stack state :integer) popped-state (state/pop-stack state :integer) - index (max 0 (min index-raw (count (get popped-state stack)))) + index (max 0 (min index-raw (dec (count (get popped-state stack))))) indexed-item (nth (get popped-state stack) index)] (update popped-state stack #(utils/not-lazy (concat (list indexed-item) @@ -164,20 +164,14 @@ (not (state/empty-stack? state stack)))) (let [index-raw (state/peek-stack state :integer) popped-state (state/pop-stack state :integer) - index (max 0 (min index-raw (count (get popped-state stack)))) + index (max 0 (min index-raw (dec (count (get popped-state stack))))) indexed-item (nth (get popped-state stack) index)] (state/push-to-stack popped-state stack indexed-item)) state))) -;; 9 types x 1 functions = 9 instructions -(generate-instructions - [:boolean :char :float :integer :string - :vector_boolean :vector_float :vector_integer :vector_string] - [_eq]) - -;; 11 types x 12 functions = 132 instructions +;; 11 types x 13 functions = 143 instructions (generate-instructions [:boolean :char :code :exec :float :integer :string :vector_boolean :vector_float :vector_integer :vector_string] - [_dup _duptimes _dupitems _empty _flush _pop _rot _shove _stackdepth - _swap _yank _yankdup]) + [_dup _duptimes _dupitems _empty _eq _flush _pop _rot _shove + _stackdepth _swap _yank _yankdup]) diff --git a/src/propeller/push/state.cljc b/src/propeller/push/state.cljc index 53f6da9..a53b64d 100644 --- a/src/propeller/push/state.cljc +++ b/src/propeller/push/state.cljc @@ -55,7 +55,9 @@ (defn push-to-stack "Pushes an item onto a stack." [state stack item] - (update state stack conj item)) + (if (nil? item) + state + (update state stack conj item))) (defn push-to-stack-multiple "Pushes a list of items onto a stack, leaving them in the order they are in." diff --git a/src/propeller/selection.cljc b/src/propeller/selection.cljc index d6739c2..487ecf2 100644 --- a/src/propeller/selection.cljc +++ b/src/propeller/selection.cljc @@ -10,7 +10,7 @@ (defn lexicase-selection "Selects an individual from the population using lexicase selection." [pop argmap] - (loop [survivors pop + (loop [survivors (map rand-nth (vals (group-by :errors pop))) cases (shuffle (range (count (:errors (first pop)))))] (if (or (empty? cases) (empty? (rest survivors)))