diff --git a/README.md b/README.md index 3430bd4..da237b4 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,40 @@ A PushGP implementation in Haskell * [ ] Write tests for every function. * [ ] tests/ are just copied from make-grade, need to write for this project. -I would really like to benchmark the following three versions for speed: -1) My custom data one (this repo) -2) direct independent lists for each stack (Rowan's miniHush) -3) eval string (similar to my custumized version of propel clojure) +## Design considerations +The biggest design constraint is that for the exec stack (but not data stacks) +we have to be able to detect type at runtime. + +A simple way to do this for the exec stack is as a list of custom data type. +That custom Gene data type must have as many sub-types as there are types + fuction types. + +If the input stack is singular, then it needs a general Gene data type, +but if there was an input stack per type, they could be specific. + +I would really like to benchmark some of the following three versions for speed: + +1) Where some functions can act on all stacks (this repo), +and thus every data stack is a list of a more general Gene type, +elements of which are wrapped in data TypeGene so they can be identified in stack-general functions. +To bind all the stacks for convenience, +we could put each stack list in a tuple, or a custom data type, Data.Map or Data.HashMap. +The exec stack will always need a more general Gene type, +with Gene types wrapping each individual thing, for runtime identification. + +2) Where type-specific functions act on each stack independently, +and thus each data stack can have exclusive specific basic types, +which are not wrapped in data TypeGene, because they do not need to be identified. +To bind all the stacks for convenience, +we could put each stack list in a tuple, or a custom data type, +but not in a or Data.Map or Data.HashMap, as those require homogenous (K, V) pairs. +The exec stack will always need a more general Gene type, +with Gene types wrapping each individual thing, for runtime identification. + +3) Alternatively, for the exec stack, we could store strings, +and eval strings (similar to my custumized version of propel clojure) +Regular and input stacks can stil be either TypeGene or basic types. +This is clearly not ideal. + +4) For the exec stack itself, +typeable, data generic, ghc.generic, data.dynamic, heterogeneous lists, etc. could also help, +to detect the type of variables at runtime, but I would rather stick to language basics at first. diff --git a/src/Push.hs b/src/Push.hs index 183fec4..32e1db1 100644 --- a/src/Push.hs +++ b/src/Push.hs @@ -1,46 +1,31 @@ module Push where -import Data.List (foldl') - -- import Debug.Trace (trace, traceStack) --- GeneModular or Gene? --- Should we use a StateFunc or *Func for each push type? --- Start with whole StateFunc since it is monolithic (easier to start), --- then generalize and abstract with an apply method that itself takes a simpler function and the state? -{- -data GeneModular - = IntGene Int - | FloatGene Float - | BoolGene Bool - | StringGene String - | IntFunc [([Int] -> [Int] -> [Int])] - | StrFunc [([String] -> [String] -> [String])] - | BoolFunc [([Bool] -> [Bool] -> [Bool])] - | FloatFunc [([Float] -> [Float] -> [Float])] --} - +-- The exec stack must store heterogenous types, +-- and we must be able to detect that type at runtime. +-- One solution is for the exec stack to be a list of [Gene]. +-- The parameter stack could be singular [Gene] or multiple [atomic] types. data Gene = IntGene Int | FloatGene Float | BoolGene Bool | StringGene String - | StateFunc (State -> State -> State) + | StateFunc (State -> State) | Close - | Input Gene -- | Block [Gene] --- If we do plushy, +-- If we do plushy (as opposed to just detecting the Close itself, -- then we may need to make a structually recursive data structure for the "program" data structure -- exampleGenome = [Program] rather than [Gene], or just include the Block above? data State = State { exec :: [Gene], - int :: [Gene], - float :: [Gene], - bool :: [Gene], - string :: [Gene], - input :: [Gene] + int :: [Int], + float :: [Float], + bool :: [Bool], + string :: [String], + parameter :: [Gene] } emptyState :: State @@ -51,38 +36,67 @@ emptyState = float = [], bool = [], string = [], - input = [] + parameter = [] } -stackUpdate :: [Gene] -> State -> State -stackUpdate newstack@(StateFunc _ : _) (State _ i f b s p) = State newstack i f b s p -stackUpdate newstack@(IntGene _ : _) (State e _ f b s p) = State e newstack f b s p -stackUpdate newstack@(FloatGene _ : _) (State e i _ b s p) = State e i newstack b s p -stackUpdate newstack@(BoolGene _ : _) (State e i f _ s p) = State e i f newstack s p -stackUpdate newstack@(StringGene _ : _) (State e i f b _ p) = State e i f b newstack p -stackUpdate newstack@(Input _ : _) (State e i f b s _) = State e i f b s newstack -stackUpdate _ state = state - -unpackIntGene :: Gene -> Int -unpackIntGene (IntGene item) = item - --- Start with monolithic intAdd function: +-- Each core func should be: (State -> State -> State) +-- but each core function can use abstract helper functions. +-- That is more efficient than checking length. +-- Everntually, this can be part of the apply func to state helpers, +-- which should take the number and type of parameter they have. intAdd :: State -> State -intAdd state = - let result = sum (map unpackIntGene (take 2 (int state))) - dropped = drop 2 (int state) - in stackUpdate (IntGene result : dropped) state +intAdd (State es [] fs bs ss ps) = State es [] fs bs ss ps +intAdd (State es [i] fs bs ss ps) = State es [i] fs bs ss ps +intAdd (State es (i : is) fs bs ss ps) = State es ((i + head is) : drop 1 is) fs bs ss ps --- Later, generalize a function called applyFuncToState, --- which takes each simpler atomic function, and the state, --- and applies the function to the state, for example: --- intAdd :: (Int, Int) -> Int --- applyFuncState :: AtomicFuncTypes -> State -> State --- this would change Gene to something like GeneModular above. +-- let result = sum (take 2 (int state)) +-- dropped = drop 2 (int state) +-- in updateIntStack (result : dropped) state + +-- For safety, pattern match on [] and i:is or check for <2 long list after take 2? + +-- Optionally, split this off into independent functions +parameterLoad :: State -> State +parameterLoad (State es is fs bs ss []) = State es is fs bs ss [] +parameterLoad (State es is fs bs ss (p : ps)) = case p of + (IntGene val) -> State es (val : is) fs bs ss ps + (FloatGene val) -> State es is (val : fs) bs ss ps + (BoolGene val) -> State es is fs (val : bs) ss ps + (StringGene val) -> State es is fs bs (val : ss) ps -- Wow, a one-liner for interpreting a paretheses-free genome... -- Does not handle any data elements in genome yet, -- but condition could be added to the lambda. -- Need to update this when adding parethetical blocks too. -interpretGenome :: State -> [(State -> State)] -> State -interpretGenome state = foldl' (\acc f -> f acc) state +-- interpretFuncOnlyGenome :: State -> [State -> State] -> State +-- interpretFuncOnlyGenome = foldl' (\acc f -> f acc) +-- While this is not usable, it illustrates we want this pattern: +-- foldl (strict, cumulative accumulator), and not this pattern: +-- foldr (greedy/lazy incremental or quit early) + +-- Loads a genome into the exec stack +loadProgarm :: [Gene] -> State -> State +loadProgarm newstack (State _ i f b s p) = State newstack i f b s p + +-- Takes a Push state, and generates the next push state via: +-- If the first item on the EXEC stack is a single instruction +-- then pop it and execute it. +-- Else if the first item on the EXEC stack is a literal +-- then pop it and push it onto the appropriate stack. +-- Else (the first item must be a list) pop it and push all of the +-- items that it contains back onto the EXEC stack individually, +-- in reverse order (so that the item that was first in the list +-- ends up on top). +interpretExec :: State -> State +interpretExec (State [] is fs bs ss ps) = State [] is fs bs ss ps +interpretExec (State (e : es) is fs bs ss ps) = + let poppedState = State es is fs bs ss ps + in case e of + (IntGene val) -> interpretExec (State es (val : is) fs bs ss ps) + (FloatGene val) -> interpretExec (State es is (val : fs) bs ss ps) + (BoolGene val) -> interpretExec (State es is fs (val : bs) ss ps) + (StringGene val) -> interpretExec (State es is fs bs (val : ss) ps) + (StateFunc func) -> interpretExec (func poppedState) + +-- The safety of interpretExec on empty stacks depends on the functions it calls. +-- Need to make interpretExec strict, right? diff --git a/src/main.hs b/src/main.hs index 5d2ac76..4ff161f 100644 --- a/src/main.hs +++ b/src/main.hs @@ -3,18 +3,4 @@ import GP import Push main :: IO () -main = do - let exampleGenome = [intAdd, intAdd] - let exampleState = - State - { exec = [IntGene 5, FloatGene 3.4, BoolGene True, StringGene "hi"], - int = [IntGene 1, IntGene 2, IntGene 3], - float = [FloatGene 1.2, FloatGene 1.7], - bool = [BoolGene True, BoolGene False], - string = [StringGene "Hello", StringGene "Push"], - input = [Input $ IntGene 1, Input $ StringGene "Hi", Input $ BoolGene True, Input $ FloatGene 1.3] - } - -- This is an example of applying one function (head exampleGenome produces intAdd) to the exampleState: - assert ([3, 3] == map unpackIntGene (int (head exampleGenome exampleState))) pure () - -- This function applies an entire genome to the starting state, and produces the final state: - assert ([6] == map unpackIntGene (int (interpretGenome exampleState exampleGenome))) pure () +main = do pure ()