From 847341e4d8dd632b9aef961cfdc170ebd23b8bd8 Mon Sep 17 00:00:00 2001 From: Lee Spector Date: Wed, 17 Jan 2024 15:32:12 -0500 Subject: [PATCH] Add and reorganize regression problem files --- src/propeller/gp.cljc | 2 +- src/propeller/problems/float_regression.cljc | 83 ---------------- .../problems/{ => regression}/UBall5D.cljc | 2 +- .../{ => regression}/integer_regression.cljc | 2 +- .../problems/regression/nguyen_f1.cljc | 94 ++++++++++++++++++ .../problems/regression/nguyen_f5.cljc | 97 +++++++++++++++++++ 6 files changed, 194 insertions(+), 86 deletions(-) delete mode 100644 src/propeller/problems/float_regression.cljc rename src/propeller/problems/{ => regression}/UBall5D.cljc (99%) rename src/propeller/problems/{ => regression}/integer_regression.cljc (98%) create mode 100644 src/propeller/problems/regression/nguyen_f1.cljc create mode 100644 src/propeller/problems/regression/nguyen_f5.cljc diff --git a/src/propeller/gp.cljc b/src/propeller/gp.cljc index 98e6ca7..6d93bd6 100644 --- a/src/propeller/gp.cljc +++ b/src/propeller/gp.cljc @@ -61,7 +61,7 @@ :bmx-complementary? false ; for bmx, whether mates selected using reverse case sequence of first parent :bmx-maximum-distance 1000000 ; for bmx, don't exchange if distance is greater than this :bmx-same-gene-count false ; for bmx, only allow exchanges between individuals with same number of genes - :closes :specified ; :pecified, :balanced, :none + :closes :specified ; :specified, :balanced, :none :custom-report false ; if provided, should be a function that takes an argmap :dont-end false ; if true, keep running until limit regardless of success :downsample? true ; wether to use downsampling diff --git a/src/propeller/problems/float_regression.cljc b/src/propeller/problems/float_regression.cljc deleted file mode 100644 index fa896ea..0000000 --- a/src/propeller/problems/float_regression.cljc +++ /dev/null @@ -1,83 +0,0 @@ -(ns propeller.problems.float-regression - (:require [propeller.genome :as genome] - [propeller.push.interpreter :as interpreter] - [propeller.push.state :as state] - [propeller.tools.math :as math] - [propeller.gp :as gp] - #?(:cljs [cljs.reader :refer [read-string]]))) - -(defn- target-function - "Target function: f(x) = (1+ x^3)^3 + 1" - [x] - (inc (* (inc (* x x x)) (inc (* x x x)) (inc (* x x x))))) - -(def train-and-test-data - (let [train-inputs (range -1.5 1.5 0.1) - test-inputs (range -1.75 1.75 0.05)] - {:train (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) train-inputs) - :test (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) test-inputs)})) - -(def instructions - (list :in1 - :float_add - :float_subtract - :float_mult - :float_quot - :float_eq - :exec_dup - :exec_if - 'close - 0.0 - 1.0)) - -(defn error-function - "Finds the behaviors and errors of an individual. The error is the absolute - deviation between the target output value and the program's selected behavior, - or 1000000 if no behavior is produced. The behavior is here defined as the - final top item on the FLOAT stack." - ([argmap data individual] - (let [program (genome/plushy->push (:plushy individual) argmap) - inputs (map (fn [x] (first (:input1 x))) data) - correct-outputs (map (fn [x] (first (:output1 x))) data) - outputs (map (fn [input] - (state/peek-stack - (interpreter/interpret-program - program - (assoc state/empty-state :input {:in1 input}) - (:step-limit argmap)) - :float)) - inputs) - errors (map (fn [correct-output output] - (if (= output :no-stack-item) - 1000000 - (math/abs (- correct-output output)))) - correct-outputs - outputs)] - (assoc individual - :behaviors outputs - :errors errors - :total-error #?(:clj (apply +' errors) - :cljs (apply + errors)))))) - -(defn -main - "Runs the top-level genetic programming function, giving it a map of - arguments with defaults that can be overridden from the command line - or through a passed map." - [& args] - (gp/gp - (merge - {:instructions instructions - :error-function error-function - :training-data (:train train-and-test-data) - :testing-data (:test train-and-test-data) - :max-generations 300 - :population-size 1000 - :max-initial-plushy-size 100 - :step-limit 200 - :parent-selection :epsilon-lexicase - :tournament-size 5 - :umad-rate 0.1 - :solution-error-threshold 0.5 - :variation {:umad 1.0 :crossover 0.0} - :elitism false} - (apply hash-map (map #(if (string? %) (read-string %) %) args))))) diff --git a/src/propeller/problems/UBall5D.cljc b/src/propeller/problems/regression/UBall5D.cljc similarity index 99% rename from src/propeller/problems/UBall5D.cljc rename to src/propeller/problems/regression/UBall5D.cljc index 4334f1b..97846eb 100644 --- a/src/propeller/problems/UBall5D.cljc +++ b/src/propeller/problems/regression/UBall5D.cljc @@ -1,4 +1,4 @@ -(ns propeller.problems.UBall5D +(ns propeller.problems.regression.UBall5D (:require [propeller.genome :as genome] [propeller.push.interpreter :as interpreter] [propeller.push.state :as state] diff --git a/src/propeller/problems/integer_regression.cljc b/src/propeller/problems/regression/integer_regression.cljc similarity index 98% rename from src/propeller/problems/integer_regression.cljc rename to src/propeller/problems/regression/integer_regression.cljc index 297198c..a3fbb54 100644 --- a/src/propeller/problems/integer_regression.cljc +++ b/src/propeller/problems/regression/integer_regression.cljc @@ -1,4 +1,4 @@ -(ns propeller.problems.integer-regression +(ns propeller.problems.regression.integer-regression (:require [propeller.genome :as genome] [propeller.push.interpreter :as interpreter] [propeller.push.state :as state] diff --git a/src/propeller/problems/regression/nguyen_f1.cljc b/src/propeller/problems/regression/nguyen_f1.cljc new file mode 100644 index 0000000..06142e8 --- /dev/null +++ b/src/propeller/problems/regression/nguyen_f1.cljc @@ -0,0 +1,94 @@ +;; This file implements a version of the Nguyen-F1 symbolic regression problem, as described in various +;; publications in the genetic programming literature including: +;; +;; Makke, N., Chawla, S. Interpretable scientific discovery with symbolic regression: a review. +;; Artif Intell Rev 57, 2 (2024). https://doi.org/10.1007/s10462-023-10622-0 +;; +;; Note however that it may differ in some respects from the problem used elsewhere, for example +;; in the data ranges and gentic programming function sets which are not always fully documented +;; in the literature. For this reason, while this code can be used as an example and for comparing +;; different configurations of the present system, results obtained with this code may not be directly +;; comparable to those published in the literature. + +(ns propeller.problems.regression.nguyen-f1 + (:require [propeller.genome :as genome] + [propeller.push.interpreter :as interpreter] + [propeller.push.state :as state] + [propeller.tools.math :as math] + [propeller.gp :as gp] + #?(:cljs [cljs.reader :refer [read-string]]))) + +(defn- target-function + "Nguyen F1 = x^3 + x^2 + x" + [x] + (+ (* x x x) (* x x) x)) + +(def train-and-test-data + (let [train-inputs (range -4.0 4.0 0.1) + test-inputs (range -4.0 4.0 0.05)] + {:train (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) train-inputs) + :test (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) test-inputs)})) + +(def instructions + (list :in1 + :float_add + :float_subtract + :float_mult + :float_div + :float_sin + :float_cos + :float_tan + 0.0 + 1.0)) + +(defn error-function + "Finds the behaviors and errors of an individual. The error is the absolute + deviation between the target output value and the program's selected behavior, + or 1000000 if no behavior is produced. The behavior is here defined as the + final top item on the FLOAT stack." + ([argmap data individual] + (let [program (genome/plushy->push (:plushy individual) argmap) + inputs (map (fn [x] (first (:input1 x))) data) + correct-outputs (map (fn [x] (first (:output1 x))) data) + outputs (map (fn [input] + (state/peek-stack + (interpreter/interpret-program + program + (assoc state/empty-state :input {:in1 input}) + (:step-limit argmap)) + :float)) + inputs) + errors (map (fn [correct-output output] + (if (= output :no-stack-item) + 1000000 + (math/abs (- correct-output output)))) + correct-outputs + outputs)] + (assoc individual + :behaviors outputs + :errors errors + :total-error #?(:clj (apply +' errors) + :cljs (apply + errors)))))) + +(defn -main + "Runs the top-level genetic programming function, giving it a map of + arguments with defaults that can be overridden from the command line + or through a passed map." + [& args] + (gp/gp + (merge + {:instructions instructions + :error-function error-function + :training-data (:train train-and-test-data) + :testing-data (:test train-and-test-data) + :downsample? false + :solution-error-threshold 0.1 + :max-generations 300 + :population-size 1000 + :max-initial-plushy-size 50 + :step-limit 100 + :parent-selection :epsilon-lexicase + :umad-rate 0.05 + :variation {:umad 1.0} + :simplification? true} + (apply hash-map (map #(if (string? %) (read-string %) %) args))))) diff --git a/src/propeller/problems/regression/nguyen_f5.cljc b/src/propeller/problems/regression/nguyen_f5.cljc new file mode 100644 index 0000000..6391369 --- /dev/null +++ b/src/propeller/problems/regression/nguyen_f5.cljc @@ -0,0 +1,97 @@ +;; This file implements a version of the Nguyen-F5 symbolic regression problem, as described in various +;; publications in the genetic programming literature including: +;; +;; Makke, N., Chawla, S. Interpretable scientific discovery with symbolic regression: a review. +;; Artif Intell Rev 57, 2 (2024). https://doi.org/10.1007/s10462-023-10622-0 +;; +;; Note however that it may differ in some respects from the problem used elsewhere, for example +;; in the data ranges and gentic programming function sets which are not always fully documented +;; in the literature. For this reason, while this code can be used as an example and for comparing +;; different configurations of the present system, results obtained with this code may not be directly +;; comparable to those published in the literature. + + +(ns propeller.problems.regression.nguyen-f5 + (:require [propeller.genome :as genome] + [propeller.push.interpreter :as interpreter] + [propeller.push.state :as state] + [propeller.tools.math :as math] + [propeller.gp :as gp] + #?(:cljs [cljs.reader :refer [read-string]]))) + +(defn- target-function + "Nguyen F5 = sin(x^2)cos(x) - 1" + [x] + (- (* (Math/sin (* x x)) + (Math/cos x)) + 1)) + +(def train-and-test-data + (let [train-inputs (range -4.0 4.0 0.1) + test-inputs (range -4.0 4.0 0.05)] + {:train (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) train-inputs) + :test (map (fn [x] {:input1 (vector x) :output1 (vector (target-function x))}) test-inputs)})) + +(def instructions + (list :in1 + :float_add + :float_subtract + :float_mult + :float_div + :float_sin + :float_cos + :float_tan + 0.0 + 1.0)) + +(defn error-function + "Finds the behaviors and errors of an individual. The error is the absolute + deviation between the target output value and the program's selected behavior, + or 1000000 if no behavior is produced. The behavior is here defined as the + final top item on the FLOAT stack." + ([argmap data individual] + (let [program (genome/plushy->push (:plushy individual) argmap) + inputs (map (fn [x] (first (:input1 x))) data) + correct-outputs (map (fn [x] (first (:output1 x))) data) + outputs (map (fn [input] + (state/peek-stack + (interpreter/interpret-program + program + (assoc state/empty-state :input {:in1 input}) + (:step-limit argmap)) + :float)) + inputs) + errors (map (fn [correct-output output] + (if (= output :no-stack-item) + 1000000 + (math/abs (- correct-output output)))) + correct-outputs + outputs)] + (assoc individual + :behaviors outputs + :errors errors + :total-error #?(:clj (apply +' errors) + :cljs (apply + errors)))))) + +(defn -main + "Runs the top-level genetic programming function, giving it a map of + arguments with defaults that can be overridden from the command line + or through a passed map." + [& args] + (gp/gp + (merge + {:instructions instructions + :error-function error-function + :training-data (:train train-and-test-data) + :testing-data (:test train-and-test-data) + :downsample? false + :solution-error-threshold 0.1 + :max-generations 300 + :population-size 1000 + :max-initial-plushy-size 50 + :step-limit 100 + :parent-selection :epsilon-lexicase + :umad-rate 0.05 + :variation {:umad 1.0} + :simplification? true} + (apply hash-map (map #(if (string? %) (read-string %) %) args)))))