diff --git a/project.clj b/project.clj index 35aa7f7..10c6607 100644 --- a/project.clj +++ b/project.clj @@ -6,6 +6,8 @@ :dependencies [[org.clojure/clojure "1.10.0"] [org.clojure/clojurescript "1.9.946"] [org.clojure/test.check "1.1.0"] - [net.clojars.schneau/psb2 "1.0.0"]] + [net.clojars.schneau/psb2 "1.1.0"]] :main ^:skip-aot propeller.core - :repl-options {:init-ns propeller.core}) + :repl-options {:init-ns propeller.core} + :jvm-opts ^:replace []) + diff --git a/propeller.iml b/propeller.iml index f67558b..17abdb3 100644 --- a/propeller.iml +++ b/propeller.iml @@ -6,8 +6,8 @@ - + @@ -23,7 +23,7 @@ - + @@ -36,4 +36,4 @@ - \ No newline at end of file + diff --git a/src/propeller/core.cljc b/src/propeller/core.cljc index d24adb8..db08439 100755 --- a/src/propeller/core.cljc +++ b/src/propeller/core.cljc @@ -3,6 +3,7 @@ (:require [propeller.gp :as gp] [propeller.problems.simple-regression :as regression] [propeller.problems.string-classification :as string-classif] + [clojure.string :as string] #?(:cljs [cljs.reader :refer [read-string]]))) (defn eval-problem-var @@ -12,11 +13,14 @@ (defn -main "Runs propel-gp, giving it a map of arguments." [& args] + ;; Exception for when no args were passed (when (empty? args) (println "You must specify a problem to run.") (println "Try, for example:") (println " lein run software.smallest") (System/exit 1)) + + ;; Creates problems (require (symbol (str "propeller.problems." (first args)))) (gp/gp (update-in @@ -31,9 +35,11 @@ :tournament-size 5 :umad-rate 0.1 :variation {:umad 0.5 :crossover 0.5} - :elitism false} + :elitism false + :PSB2-path "" + :PSB2-problem (clojure.string/replace (first args) #"PSB2." "")} (apply hash-map - (map #(if (string? %) (read-string %) %) + (map #(if (and (string? %) (not (.contains % "/"))) (read-string %) %) (rest args)))) [:error-function] - identity))) + identity))) \ No newline at end of file diff --git a/src/propeller/gp.cljc b/src/propeller/gp.cljc index a849a93..47d8d82 100755 --- a/src/propeller/gp.cljc +++ b/src/propeller/gp.cljc @@ -1,16 +1,17 @@ (ns propeller.gp - (:require [clojure.string] - [clojure.pprint] - [propeller.genome :as genome] - [propeller.variation :as variation] - [propeller.push.instructions.bool] - [propeller.push.instructions.character] - [propeller.push.instructions.code] - [propeller.push.instructions.input-output] - [propeller.push.instructions.numeric] - [propeller.push.instructions.polymorphic] - [propeller.push.instructions.string] - [propeller.push.instructions.vector])) + (:require [clojure.string] + [clojure.pprint] + [propeller.genome :as genome] + [propeller.variation :as variation] + [propeller.push.instructions.bool] + [propeller.push.instructions.character] + [propeller.push.instructions.code] + [propeller.push.instructions.input-output] + [propeller.push.instructions.numeric] + [propeller.push.instructions.polymorphic] + [propeller.push.instructions.string] + [propeller.push.instructions.vector] + [psb2.core :as psb2])) (defn report "Reports information each generation." @@ -31,39 +32,44 @@ (defn gp "Main GP loop." [{:keys [population-size max-generations error-function instructions - max-initial-plushy-size] + max-initial-plushy-size PSB2-path PSB2-problem] :as argmap}] ;; - (println {:starting-args argmap}) + (prn {:starting-args (update (update argmap :error-function str) :instructions str)}) (println) ;; - (loop [generation 0 - population (repeatedly - population-size - #(hash-map :plushy (genome/make-random-plushy - instructions - max-initial-plushy-size)))] - (let [evaluated-pop (sort-by :total-error - (#?(:clj pmap - :cljs map) - (partial error-function argmap) population)) - best-individual (first evaluated-pop)] - (report evaluated-pop generation argmap) - (cond - ;; Success on training cases is verified on testing cases - (zero? (:total-error best-individual)) - (do (println {:success-generation generation}) - (println {:total-test-error (:total-error (error-function argmap best-individual :test))}) - (#?(:clj shutdown-agents)) - ) - ;; - (>= generation max-generations) - nil - ;; - :else (recur (inc generation) - (if (:elitism argmap) - (conj (repeatedly (dec population-size) - #(variation/new-individual evaluated-pop argmap)) - (first evaluated-pop)) - (repeatedly population-size - #(variation/new-individual evaluated-pop argmap)))))))) + (let [PSB2-data (if (= PSB2-path "") + #{} + (psb2/fetch-examples PSB2-path PSB2-problem 200 2000)) + argmap (assoc argmap :train-and-test-data PSB2-data)] + + (loop [generation 0 + population (repeatedly + population-size + #(hash-map :plushy (genome/make-random-plushy + instructions + max-initial-plushy-size)))] + (let [evaluated-pop (sort-by :total-error + (#?(:clj pmap + :cljs map) + (partial error-function argmap) population)) + best-individual (first evaluated-pop)] + (report evaluated-pop generation argmap) + (cond + ;; Success on training cases is verified on testing cases + (zero? (:total-error best-individual)) + (do (prn {:success-generation generation}) + (prn {:total-test-error (:total-error (error-function argmap best-individual :test))}) + (#?(:clj shutdown-agents)) + ) + ;; + (>= generation max-generations) + nil + ;; + :else (recur (inc generation) + (if (:elitism argmap) + (conj (repeatedly (dec population-size) + #(variation/new-individual evaluated-pop argmap)) + (first evaluated-pop)) + (repeatedly population-size + #(variation/new-individual evaluated-pop argmap))))))))) diff --git a/src/propeller/problems/PSB2/basement.cljc b/src/propeller/problems/PSB2/basement.cljc new file mode 100644 index 0000000..643189b --- /dev/null +++ b/src/propeller/problems/PSB2/basement.cljc @@ -0,0 +1,65 @@ +(ns propeller.problems.PSB2.basement + (:require [psb2.core :as psb2] + [propeller.genome :as genome] + [propeller.push.interpreter :as interpreter] + [propeller.utils :as utils] + [propeller.push.utils.helpers :refer [get-stack-instructions]] + [propeller.push.state :as state] + [clojure.pprint :as pprint] + [propeller.tools.math :as math])) + +; =========== PROBLEM DESCRIPTION ============================ +; BASEMENT from PSB2 +; Given a vector of integers, return the first +; index such that the sum of all integers from the start of the +; vector to that index (inclusive) is negative. +; +; Source: https://arxiv.org/pdf/2106.06086.pdf +; =============================================================== + + +; Random integer between -100 and 100 (from smallest) +(defn random-int [] (- (rand-int 201) 100)) + +(def instructions + (utils/not-lazy + (concat + ;;; stack-specific instructions + (get-stack-instructions #{:exec :integer :boolean :vector_integer :print}) + ;;; input instructions + (list :in1) + ;;; close + (list 'close) + ;;; ERCs (constants) + (list random-int -1 0 1 [])))) + +(defn error-function + ([argmap individual] + (error-function argmap individual :train)) + ([argmap individual subset] + (let [program (genome/plushy->push (:plushy individual) argmap) + data (get (get argmap :train-and-test-data) subset) + inputs (map (fn [i] (get i :input1)) data) + correct-outputs (map (fn [i] (get i :output1)) data) + outputs (map (fn [input] + (state/peek-stack + (interpreter/interpret-program + program + (assoc state/empty-state :input {:in1 input}) + (:step-limit argmap)) + :integer)) + inputs) + errors (map (fn [correct-output output] + (if (= output :no-stack-item) + 1000000 + (min 1000.0 (math/abs (- correct-output output))))) + correct-outputs + outputs)] + (assoc individual + :behaviors outputs + :errors errors + :total-error #?(:clj (apply +' errors) + :cljs (apply + errors)))))) + + + diff --git a/src/propeller/problems/PSB2/bouncing_balls.cljc b/src/propeller/problems/PSB2/bouncing_balls.cljc new file mode 100644 index 0000000..dcf395e --- /dev/null +++ b/src/propeller/problems/PSB2/bouncing_balls.cljc @@ -0,0 +1,72 @@ +(ns propeller.problems.PSB2.bouncing-balls + (:require [psb2.core :as psb2] + [propeller.genome :as genome] + [propeller.push.interpreter :as interpreter] + [propeller.utils :as utils] + [propeller.push.utils.helpers :refer [get-stack-instructions]] + [propeller.push.state :as state] + [clojure.pprint :as pprint] + [propeller.tools.math :as math])) + +; =========== PROBLEM DESCRIPTION =============================== +; BOUNCING BALLS from PSB2 +; Given a starting height and a height after the first bounce of a +; dropped ball, calculate the bounciness index +; (height of first bounce / starting height). Then, given a number +; of bounces, use the bounciness index to calculate the total +; distance that the ball travels across those bounces. +; +; Source: https://arxiv.org/pdf/2106.06086.pdf +; ================================================================== + +(defn map-vals-input + "Returns all the input values of a map (specific helper method for bouncing-balls)" + [i] + (vals (select-keys i [:input1 :input2 :input3]))) + +(defn map-vals-output + "Returns the output values of a map (specific helper method for bouncing-balls)" + [i] + (get i :output1)) + +(def instructions + (utils/not-lazy + (concat + ;;; stack-specific instructions + (get-stack-instructions #{:exec :integer :float :boolean :print}) + ;;; input instructions + (list :in1 :in2 :in3) + ;;; close + (list 'close) + ;;; ERCs (constants) + (list 0.0 1.0 2.0)))) + +(defn error-function + ([argmap individual] + (error-function argmap individual :train)) + ([argmap individual subset] + (let [program (genome/plushy->push (:plushy individual) argmap) + data (get (get argmap :train-and-test-data) subset) + inputs (map (fn [i] (map-vals-input i)) data) + correct-outputs (map (fn [i] (map-vals-output i)) data) + outputs (map (fn [input] + (state/peek-stack + (interpreter/interpret-program + program + (assoc state/empty-state :input {:in1 (nth input 0) + :in2 (nth input 1) + :in3 (nth input 2)}) + (:step-limit argmap)) + :float)) + inputs) + errors (map (fn [correct-output output] + (if (= output :no-stack-item) + 1000000.0 + (min 1000.0 (math/abs (- correct-output output))))) + correct-outputs + outputs)] + (assoc individual + :behaviors outputs + :errors errors + :total-error #?(:clj (apply +' errors) + :cljs (apply + errors)))))) diff --git a/src/propeller/problems/PSB2/bowling.cljc b/src/propeller/problems/PSB2/bowling.cljc new file mode 100644 index 0000000..75f22d3 --- /dev/null +++ b/src/propeller/problems/PSB2/bowling.cljc @@ -0,0 +1,60 @@ +(ns propeller.problems.PSB2.bowling + (:require [psb2.core :as psb2] + [propeller.genome :as genome] + [propeller.push.interpreter :as interpreter] + [propeller.utils :as utils] + [propeller.push.utils.helpers :refer [get-stack-instructions]] + [propeller.push.state :as state] + [clojure.pprint :as pprint] + [propeller.tools.math :as math])) + +; =========== PROBLEM DESCRIPTION ====================== +; BOWLING from PSB2 +; Given a string representing the individual +; bowls in a 10-frame round of 10 pin bowling, return the +; score of that round. +; +; Source: https://arxiv.org/pdf/2106.06086.pdf +; ========================================================= + +(defn random-int [] (- (rand-int 201) 100)) + +(def instructions + (utils/not-lazy + (concat + ;;; stack-specific instructions + (get-stack-instructions #{:exec :integer :boolean :char :string :print}) + ;;; input instructions + (list :in1) + ;;; close + (list 'close) + ;;; ERCs (constants) + (list \- \X \/ \1 \2 \3 \4 \5 \6 \7 \8 \9 10 random-int)))) + +(defn error-function + ([argmap individual] + (error-function argmap individual :train)) + ([argmap individual subset] + (let [program (genome/plushy->push (:plushy individual) argmap) + data (get (get argmap :train-and-test-data) subset) + inputs (map (fn [i] (get i :input1)) data) + correct-outputs (map (fn [i] (get i :output1)) data) + outputs (map (fn [input] + (state/peek-stack + (interpreter/interpret-program + program + (assoc state/empty-state :input {:in1 input}) + (:step-limit argmap)) + :integer)) + inputs) + errors (map (fn [correct-output output] + (if (= output :no-stack-item) + 1000000 + (min 1000.0 (math/abs (- correct-output output))))) + correct-outputs + outputs)] + (assoc individual + :behaviors outputs + :errors errors + :total-error #?(:clj (apply +' errors) + :cljs (apply + errors)))))) diff --git a/src/propeller/problems/PSB2/camel_case.cljc b/src/propeller/problems/PSB2/camel_case.cljc new file mode 100644 index 0000000..542b62e --- /dev/null +++ b/src/propeller/problems/PSB2/camel_case.cljc @@ -0,0 +1,98 @@ +(ns propeller.problems.PSB2.camel-case + (:require [psb2.core :as psb2] + [propeller.genome :as genome] + [propeller.push.interpreter :as interpreter] + [propeller.utils :as utils] + [propeller.push.utils.helpers :refer [get-stack-instructions]] + [propeller.push.state :as state] + [propeller.tools.math :as math] + [propeller.tools.metrics :as metrics])) + +; =========== PROBLEM DESCRIPTION ===================================== +; CAMEL CASE from PSB2 +; Take a string in kebab-case and convert all of the words to camelCase. +; Each group of words to convert is delimited by "-", and each grouping +; is separated by a space. For example: "camel-case example-test-string" +; → "camelCase exampleTestString" +; +; Source: https://arxiv.org/pdf/2106.06086.pdf +; ======================================================================= + +; Visible character ERC +(defn random-char + [] + (rand-nth (map char (range 97 122)))) + +; Word generator for string ERC +(defn word-generator + [] + (let [chars-between #(map char (range (int %1) (inc (int %2)))) + chars (chars-between \a \z) + word-len (inc (rand-int 5))] + (apply str (repeatedly word-len #(rand-nth chars))))) + +(defn cleanup-length + [string len] + (let [result (take len string)] + (if (or (= (last result) \space) + (= (last result) \-)) + (apply str (butlast result)) + (apply str result)))) + +; String ERC +(defn random-input + [len] + (loop [result-string (word-generator)] + (if (>= (count result-string) len) + (cleanup-length result-string len) + (recur (str result-string + (if (< (rand) 0.66) \- \space) + (word-generator)))))) + +(def instructions + (utils/not-lazy + (concat + ;;; stack-specific instructions + (get-stack-instructions #{:exec :integer :boolean :char :string :print}) + ;;; input instructions + (list :in1) + ;;; close + (list 'close) + ;;; ERCs (constants) + (list \- \space random-char (fn [] (random-input 21)))))) + + +(defn error-function + ([argmap individual] + (error-function argmap individual :train)) + ([argmap individual subset] + (let [program (genome/plushy->push (:plushy individual) argmap) + data (get (get argmap :train-and-test-data) subset) + inputs (map (fn [i] (get i :input1)) data) + correct-outputs (map (fn [i] (get i :output1)) data) + outputs (map (fn [input] + (state/peek-stack + (interpreter/interpret-program + program + (assoc state/empty-state :input {:in1 input}) + (:step-limit argmap)) + :string)) + inputs) + parsed-outputs (map (fn [output] + (try (read-string output) + #?(:clj (catch Exception e 1000.0) + :cljs (catch js/Error. e 1000.0)))) + outputs) + errors (map (fn [correct-output output] + (if (= output :no-stack-item) + 10000 + (metrics/levenshtein-distance correct-output output))) + correct-outputs + parsed-outputs)] + (assoc individual + :behaviors parsed-outputs + :errors errors + :total-error #?(:clj (apply +' errors) + :cljs (apply + errors)))))) + + diff --git a/src/propeller/problems/PSB2/dice_game.cljc b/src/propeller/problems/PSB2/dice_game.cljc new file mode 100644 index 0000000..7f9bfb8 --- /dev/null +++ b/src/propeller/problems/PSB2/dice_game.cljc @@ -0,0 +1,69 @@ +(ns propeller.problems.PSB2.dice-game + (:require [psb2.core :as psb2] + [propeller.genome :as genome] + [propeller.push.interpreter :as interpreter] + [propeller.utils :as utils] + [propeller.push.utils.helpers :refer [get-stack-instructions]] + [propeller.push.state :as state] + [clojure.pprint :as pprint] + [propeller.tools.math :as math])) + +; =========== PROBLEM DESCRIPTION =============================== +; DICE GAME from PSB2 +; Peter has an n sided die and Colin has an m +; sided die. If they both roll their dice at the same time, return +; the probability that Peter rolls strictly higher than Colin. +; +; Source: https://arxiv.org/pdf/2106.06086.pdf +; ================================================================== + +(defn map-vals-input + "Returns all the input values of a map (specific helper method for bouncing-balls)" + [i] + (vals (select-keys i [:input1 :input2]))) + +(defn map-vals-output + "Returns the output values of a map (specific helper method for bouncing-balls)" + [i] + (get i :output1)) + +(def instructions + (utils/not-lazy + (concat + ;;; stack-specific instructions + (get-stack-instructions #{:exec :integer :float :boolean :print}) + ;;; input instructions + (list :in1 :in2) + ;;; close + (list 'close) + ;;; ERCs (constants) + (list 0.0 1.0)))) + +(defn error-function + ([argmap individual] + (error-function argmap individual :train)) + ([argmap individual subset] + (let [program (genome/plushy->push (:plushy individual) argmap) + data (get (get argmap :train-and-test-data) subset) + inputs (map (fn [i] (map-vals-input i)) data) + correct-outputs (map (fn [i] (map-vals-output i)) data) + outputs (map (fn [input] + (state/peek-stack + (interpreter/interpret-program + program + (assoc state/empty-state :input {:in1 (nth input 0) + :in2 (nth input 1)}) + (:step-limit argmap)) + :float)) + inputs) + errors (map (fn [correct-output output] + (if (= output :no-stack-item) + 1000000.0 + (min 1000.0 (math/abs (- correct-output output))))) + correct-outputs + outputs)] + (assoc individual + :behaviors outputs + :errors errors + :total-error #?(:clj (apply +' errors) + :cljs (apply + errors)))))) diff --git a/src/propeller/problems/PSB2/fuel_cost.cljc b/src/propeller/problems/PSB2/fuel_cost.cljc new file mode 100644 index 0000000..27cd2c7 --- /dev/null +++ b/src/propeller/problems/PSB2/fuel_cost.cljc @@ -0,0 +1,63 @@ +(ns propeller.problems.PSB2.fuel-cost + (:require [psb2.core :as psb2] + [propeller.genome :as genome] + [propeller.push.interpreter :as interpreter] + [propeller.utils :as utils] + [propeller.push.utils.helpers :refer [get-stack-instructions]] + [propeller.push.state :as state] + [clojure.pprint :as pprint] + [propeller.tools.math :as math])) + +; =========== PROBLEM DESCRIPTION ========================= +; FUEL COST from PSB2 +; Given a vector of positive integers, divide +; each by 3, round the result down to the nearest integer, and +; subtract 2. Return the sum of all of the new integers in the +; vector +; +; Source: https://arxiv.org/pdf/2106.06086.pdf +; ============================================================ + +; Random integer between -100 and 100 (from smallest) +(defn random-int [] (- (rand-int 201) 100)) + +(def instructions + (utils/not-lazy + (concat + ;;; stack-specific instructions + (get-stack-instructions #{:exec :integer :boolean :vector_integer :print}) + ;;; input instructions + (list :in1) + ;;; close + (list 'close) + ;;; ERCs (constants) + (list random-int 0 1 2 3)))) + +(defn error-function + ([argmap individual] + (error-function argmap individual :train)) + ([argmap individual subset] + (let [program (genome/plushy->push (:plushy individual) argmap) + data (get (get argmap :train-and-test-data) subset) + inputs (map (fn [i] (get i :input1)) data) + correct-outputs (map (fn [i] (get i :output1)) data) + outputs (map (fn [input] + (state/peek-stack + (interpreter/interpret-program + program + (assoc state/empty-state :input {:in1 input}) + (:step-limit argmap)) + :integer)) + inputs) + errors (map (fn [correct-output output] + (if (= output :no-stack-item) + 1000000 + (min 1000.0 (math/abs (- correct-output output))))) + correct-outputs + outputs)] + (assoc individual + :behaviors outputs + :errors errors + :total-error #?(:clj (apply +' errors) + :cljs (apply + errors)))))) + diff --git a/src/propeller/problems/PSB2/middle_character.cljc b/src/propeller/problems/PSB2/middle_character.cljc new file mode 100644 index 0000000..ee8430e --- /dev/null +++ b/src/propeller/problems/PSB2/middle_character.cljc @@ -0,0 +1,67 @@ +(ns propeller.problems.PSB2.middle-character + (:require [psb2.core :as psb2] + [propeller.genome :as genome] + [propeller.push.interpreter :as interpreter] + [propeller.utils :as utils] + [propeller.push.utils.helpers :refer [get-stack-instructions]] + [propeller.push.state :as state] + [propeller.tools.math :as math] + [propeller.tools.metrics :as metrics])) + +; =========== PROBLEM DESCRIPTION ============================= +; MIDDLE CHARACTER from PSB2 +; Given a string, return the middle +; character as a string if it is odd length; return the two middle +; characters as a string if it is even length. +; +; Source: https://arxiv.org/pdf/2106.06086.pdf +; =============================================================== + +(defn random-int [] (- (rand-int 201) 100)) + +(def instructions + (utils/not-lazy + (concat + ;;; stack-specific instructions + (get-stack-instructions #{:exec :integer :boolean :char :string :print}) + ;;; input instructions + (list :in1) + ;;; close + (list 'close) + ;;; ERCs (constants) + (list "" 0 1 2 random-int)))) + +(defn error-function + ([argmap individual] + (error-function argmap individual :train)) + ([argmap individual subset] + (let [program (genome/plushy->push (:plushy individual) argmap) + data (get (get argmap :train-and-test-data) subset) + inputs (map (fn [i] (get i :input1)) data) + correct-outputs (map (fn [i] (get i :output1)) data) + outputs (map (fn [input] + (state/peek-stack + (interpreter/interpret-program + program + (assoc state/empty-state :input {:in1 input}) + (:step-limit argmap)) + :string)) + inputs) + parsed-outputs (map (fn [output] + (try (read-string output) + #?(:clj (catch Exception e 1000.0) + :cljs (catch js/Error. e 1000.0)))) + outputs) + errors (map (fn [correct-output output] + (if (= output :no-stack-item) + 10000 + (metrics/levenshtein-distance (str correct-output) (str output)))) + correct-outputs + parsed-outputs)] + (assoc individual + :behaviors parsed-outputs + :errors errors + :total-error #?(:clj (apply +' errors) + :cljs (apply + errors)))))) + + diff --git a/src/propeller/problems/PSB2/substitution_cipher.cljc b/src/propeller/problems/PSB2/substitution_cipher.cljc new file mode 100644 index 0000000..9c21bfb --- /dev/null +++ b/src/propeller/problems/PSB2/substitution_cipher.cljc @@ -0,0 +1,77 @@ +(ns propeller.problems.PSB2.substitution-cipher + (:require [psb2.core :as psb2] + [propeller.genome :as genome] + [propeller.push.interpreter :as interpreter] + [propeller.utils :as utils] + [propeller.push.utils.helpers :refer [get-stack-instructions]] + [propeller.push.state :as state] + [propeller.tools.math :as math] + [propeller.tools.metrics :as metrics])) + +; =========== PROBLEM DESCRIPTION ========================= +; SUBSTITUTION CIPHER from PSB2 +; This problem gives 3 strings. +; The first two represent a cipher, mapping each character in +; one string to the one at the same index in the other string. +; The program must apply this cipher to the third string and +; return the deciphered message. +; +; Source: https://arxiv.org/pdf/2106.06086.pdf +; ============================================================ + +(defn map-vals-input + "Returns all the input values of a map (specific helper method for substitution-cipher)" + [i] + (vals (select-keys i [:input1 :input2 :input3]))) + +(defn map-vals-output + "Returns the output values of a map (specific helper method for substitution-cipher)" + [i] + (vals (select-keys i [:output1]))) + +(def instructions + (utils/not-lazy + (concat + ;;; stack-specific instructions + (get-stack-instructions #{:exec :integer :boolean :char :string :print}) + ;;; input instructions + (list :in1 :in2 :in3) + ;;; close + (list 'close) + ;;; ERCs (constants) + (list 0 "")))) + +(defn error-function + ([argmap individual] + (error-function argmap individual :train)) + ([argmap individual subset] + (let [program (genome/plushy->push (:plushy individual) argmap) + data (get (get argmap :train-and-test-data) subset) + inputs (map (fn [i] (map-vals-input i)) data) + correct-outputs (map (fn [i] (map-vals-output i)) data) + outputs (map (fn [input] + (state/peek-stack + (interpreter/interpret-program + program + (assoc state/empty-state :input {:in1 (nth input 0) + :in2 (nth input 1) + :in3 (nth input 2)}) + (:step-limit argmap)) + :string)) + inputs) + parsed-outputs (map (fn [output] + (try (read-string output) + #?(:clj (catch Exception e 1000.0) + :cljs (catch js/Error. e 1000.0)))) + outputs) + errors (map (fn [correct-output output] + (if (= output :no-stack-item) + 10000 + (metrics/levenshtein-distance (str correct-output) (str output)))) + correct-outputs + parsed-outputs)] + (assoc individual + :behaviors parsed-outputs + :errors errors + :total-error #?(:clj (apply +' errors) + :cljs (apply + errors)))))) \ No newline at end of file diff --git a/src/propeller/problems/PSB2/twitter.cljc b/src/propeller/problems/PSB2/twitter.cljc new file mode 100644 index 0000000..f0a17f4 --- /dev/null +++ b/src/propeller/problems/PSB2/twitter.cljc @@ -0,0 +1,70 @@ +(ns propeller.problems.PSB2.twitter + (:require [psb2.core :as psb2] + [propeller.genome :as genome] + [propeller.push.interpreter :as interpreter] + [propeller.utils :as utils] + [propeller.push.utils.helpers :refer [get-stack-instructions]] + [propeller.push.state :as state] + [propeller.tools.math :as math] + [propeller.tools.metrics :as metrics])) + +; =========== PROBLEM DESCRIPTION ============================= +; TWITTER from PSB2 +; Given a string representing a tweet, validate whether the tweet +; meets Twitter’s original character requirements. If the tweet +; has more than 140 characters, return the string "Too many characters". +; If the tweet is empty, return the string "You didn’t type anything". +; Otherwise, return "Your tweet has X characters", where +; the X is the number of characters in the tweet. +; +; Source: https://arxiv.org/pdf/2106.06086.pdf +; =============================================================== + +(defn random-int [] (- (rand-int 201) 100)) + +(def instructions + (utils/not-lazy + (concat + ;;; stack-specific instructions + (get-stack-instructions #{:exec :integer :boolean :char :string :print}) + ;;; input instructions + (list :in1) + ;;; close + (list 'close) + ;;; ERCs (constants) + (list 0 140 "Too many characters" "You didn't type anything" "your tweet has " " characters")))) + +(defn error-function + ([argmap individual] + (error-function argmap individual :train)) + ([argmap individual subset] + (let [program (genome/plushy->push (:plushy individual) argmap) + data (get (get argmap :train-and-test-data) subset) + inputs (map (fn [i] (get i :input1)) data) + correct-outputs (map (fn [i] (get i :output1)) data) + outputs (map (fn [input] + (state/peek-stack + (interpreter/interpret-program + program + (assoc state/empty-state :input {:in1 input}) + (:step-limit argmap)) + :string)) + inputs) + parsed-outputs (map (fn [output] + (try (read-string output) + #?(:clj (catch Exception e 1000.0) + :cljs (catch js/Error. e 1000.0)))) + outputs) + errors (map (fn [correct-output output] + (if (= output :no-stack-item) + 10000 + (metrics/levenshtein-distance (str correct-output) (str output)))) + correct-outputs + parsed-outputs)] + (assoc individual + :behaviors parsed-outputs + :errors errors + :total-error #?(:clj (apply +' errors) + :cljs (apply + errors)))))) + + diff --git a/src/propeller/problems/simple_regression.cljc b/src/propeller/problems/simple_regression.cljc index a8c4993..3f40580 100755 --- a/src/propeller/problems/simple_regression.cljc +++ b/src/propeller/problems/simple_regression.cljc @@ -18,7 +18,7 @@ [x] (+ (* x x x) x 3)) -;; Set of original propel instructions +; Set of original propel instructions (def instructions (list :in1 :integer_add @@ -71,4 +71,4 @@ :behaviors outputs :errors errors :total-error #?(:clj (apply +' errors) - :cljs (apply + errors)))))) + :cljs (apply + errors)))))) \ No newline at end of file diff --git a/src/propeller/push/.DS_Store b/src/propeller/push/.DS_Store index 49c1bfb..ac2df55 100644 Binary files a/src/propeller/push/.DS_Store and b/src/propeller/push/.DS_Store differ diff --git a/src/propeller/push/state.cljc b/src/propeller/push/state.cljc index 6cc754e..b49227e 100755 --- a/src/propeller/push/state.cljc +++ b/src/propeller/push/state.cljc @@ -8,7 +8,7 @@ :float '() :input {} :integer '() - :output '() + :output '("") :string '() :vector_boolean '() :vector_float '() diff --git a/src/propeller/tools/metrics.cljc b/src/propeller/tools/metrics.cljc index 7964add..156265f 100755 --- a/src/propeller/tools/metrics.cljc +++ b/src/propeller/tools/metrics.cljc @@ -23,6 +23,37 @@ [seq1 seq2] (apply + (map #(if (= %1 %2) 0 1) seq1 seq2))) +;; helper method for levenshtein-distance +(defn compute-next-row + "computes the next row using the prev-row current-element and the other seq" + [prev-row current-element other-seq pred] + (reduce + (fn [row [diagonal above other-element]] + (let [update-val (if (pred other-element current-element) + ;; if the elements are deemed equivalent according to the predicate + ;; pred, then no change has taken place to the string, so we are + ;; going to set it the same value as diagonal (which is the previous edit-distance) + diagonal + ;; in the case where the elements are not considered equivalent, then we are going + ;; to figure out if its a substitution (then there is a change of 1 from the previous + ;; edit distance) thus the value is diagonal + 1 or if its a deletion, then the value + ;; is present in the columns, but not in the rows, the edit distance is the edit-distance + ;; of last of row + 1 (since we will be using vectors, peek is more efficient) + ;; or it could be a case of insertion, then the value is above+1, and we chose + ;; the minimum of the three + (inc (min diagonal above (peek row))))] + + (conj row update-val))) + ;; we need to initialize the reduce function with the value of a row, since we are + ;; constructing this row from the previous one, the row is a vector of 1 element which + ;; consists of 1 + the first element in the previous row (edit distance between the prefix so far + ;; and an empty string) + [(inc (first prev-row))] + ;; for the reduction to go over, we need to provide it with three values, the diagonal + ;; which is the same as prev-row because it starts from 0, the above, which is the next element + ;; from the list and finally the element from the other sequence itself. + (map vector prev-row (next prev-row) other-seq))) + (defn levenshtein-distance "Levenshtein Distance - http://en.wikipedia.org/wiki/Levenshtein_distance In Information Theory and Computer Science, the Levenshtein distance is a @@ -31,8 +62,8 @@ little mutability as possible. Still maintains the O(nm) guarantee." [a b & {p :predicate :or {p =}}] (cond - (empty? a) (count b) - (empty? b) (count a) + (empty? (str a)) (count (str b)) ;; sometimes stack pushes numbers, force + (empty? (str b)) (count (str a)) ;; a and b to be strings :else (peek (reduce ;; we use a simple reduction to convert the previous row into the @@ -40,11 +71,11 @@ ;; element, the previous-row computed so far, and the predicate ;; to compare for equality (fn [prev-row current-element] - (compute-next-row prev-row current-element b p)) + (compute-next-row prev-row current-element (str b) p)) ;; we need to initialize the prev-row with the edit distance ;; between the various prefixes of b and the empty string - (range (inc (count b))) - a)))) + (range (inc (count (str b)))) + (str a))))) (defn sequence-similarity "Returns a number between 0 and 1, indicating how similar the sequences are