propeller/src/propeller/problems/PSB2/middle_character.cljc
2023-03-29 14:10:55 -04:00

87 lines
3.4 KiB
Clojure

(ns propeller.problems.PSB2.middle-character
"MIDDLE CHARACTER from PSB2
Given a string, return the middle
character as a string if it is odd length; return the two middle
characters as a string if it is even length.
Source: https://arxiv.org/pdf/2106.06086.pdf"
{:doc/format :markdown}
(:require [psb2.core :as psb2]
[propeller.genome :as genome]
[propeller.push.interpreter :as interpreter]
[propeller.utils :as utils]
[propeller.push.instructions :refer [get-stack-instructions]]
[propeller.push.state :as state]
[propeller.tools.metrics :as metrics]
[propeller.gp :as gp]
#?(:cljs [cljs.reader :refer [read-string]])))
(def train-and-test-data "Data taken from https://zenodo.org/record/5084812" (psb2/fetch-examples "data" "middle-character" 200 2000))
(defn random-int "Random integer between -100 and 100" [] (- (rand-int 201) 100))
(def instructions
"Stack-specific instructions, input instructions, close, and constants"
(utils/not-lazy
(concat
;;; stack-specific instructions
(get-stack-instructions #{:exec :integer :boolean :char :string :print})
;;; input instructions
(list :in1)
;;; close
(list 'close)
;;; ERCs (constants)
(list "" 0 1 2 random-int))))
(defn error-function
"Finds the behaviors and errors of an individual: Error is 0 if the value and
the program's selected behavior match, or 1 if they differ, or 1000000 if no
behavior is produced. The behavior is here defined as the final top item on
the STRING stack."
[argmap data individual]
(let [program (genome/plushy->push (:plushy individual) argmap)
inputs (map (fn [i] (get i :input1)) data)
correct-outputs (map (fn [i] (get i :output1)) data)
outputs (map (fn [input]
(state/peek-stack
(interpreter/interpret-program
program
(assoc state/empty-state :input {:in1 input})
(:step-limit argmap))
:string))
inputs)
errors (map (fn [correct-output output]
(if (= output :no-stack-item)
10000
(metrics/levenshtein-distance correct-output output)))
correct-outputs
outputs)]
(assoc individual
:behaviors outputs
:errors errors
:total-error #?(:clj (apply +' errors)
:cljs (apply + errors)))))
(defn -main
"Runs the top-level genetic programming function, giving it a map of
arguments with defaults that can be overridden from the command line
or through a passed map."
[& args]
(gp/gp
(merge
{:instructions instructions
:error-function error-function
:training-data (:train train-and-test-data)
:testing-data (:test train-and-test-data)
:max-generations 300
:population-size 1000
:max-initial-plushy-size 250
:step-limit 2000
:parent-selection :lexicase
:tournament-size 5
:umad-rate 0.1
:variation {:umad 1.0 :crossover 0.0}
:elitism false}
(apply hash-map (map #(if (string? %) (read-string %) %) args)))))