From 39f6b9cc5301a04067095d93b58d8a875518221c Mon Sep 17 00:00:00 2001 From: Rowan Torbitzky-Lane Date: Mon, 3 Mar 2025 14:34:30 -0600 Subject: [PATCH] work on variation --- src/HushGP/GP.hs | 134 +++++++++++++++++++----------------- src/HushGP/GP/Individual.hs | 3 + src/HushGP/GP/PushArgs.hs | 12 ++-- src/HushGP/GP/Variation.hs | 64 +++++++++++++++++ src/HushGP/Genome.hs | 9 +-- src/HushGP/Push.hs | 8 ++- src/HushGP/State.hs | 10 +++ src/HushGP/Utility.hs | 17 ++++- 8 files changed, 175 insertions(+), 82 deletions(-) diff --git a/src/HushGP/GP.hs b/src/HushGP/GP.hs index 9c76e69..2845699 100644 --- a/src/HushGP/GP.hs +++ b/src/HushGP/GP.hs @@ -1,17 +1,17 @@ -- | The main file containing information about the GP loop and various population transformation functions. module HushGP.GP where -import System.Random -import System.Random.Shuffle import Control.Monad import Control.Parallel.Strategies import Data.List (sort, uncons) import HushGP.GP.Downsample +import HushGP.GP.Individual import HushGP.GP.PushArgs import HushGP.GP.PushData import HushGP.GP.Variation import HushGP.Genome -import HushGP.GP.Individual +import System.Random +import System.Random.Shuffle -- import Debug.Trace (trace, traceStack) @@ -45,65 +45,69 @@ gpLoop pushArgs@(PushArgs {trainingData = tData}) = do -- holds the evaluation count. The list of Individuals is the population. The last parameter is -- the training data (possibly downsampled). gpLoop' :: PushArgs -> Int -> Int -> [Individual] -> [PushData] -> IO () -gpLoop' pushArgs@(PushArgs {enableDownsampling = enableDS, solutionErrorThreshold = seThresh, downsampleParentsGens = dsParentGens, downsampleParentRate = dsParentRate, trainingData = tData}) - generation evaluations population indexedTrainingData = do - print "Put information about each generation here." - when bestIndPassesDownsample $ print $ "Semi Success Generation: " <> show generation - parentReps <- do - shuffledParents <- shuffle' population (length population) <$> initStdGen - if enableDS && (generation `mod` dsParentGens == 0) - then pure $ take (floor @Float (dsParentRate * (fromIntegral @Int @Float $ length population))) shuffledParents - else pure [] - let nextAction - | ( bestIndPassesDownsample - && ( (case totalFitness (updateIndividual (errorFunction epsilonPushArgs epsilonPushArgs indexedTrainingData (plushy bestInd)) bestInd) of (Just x) -> x; _ -> error "Error: Best downsample individual has no fitness!") - <= solutionErrorThreshold epsilonPushArgs - ) - ) - || (not (enableDownsampling epsilonPushArgs) && ((case totalFitness bestInd of (Just x) -> x; _ -> error "error: Best non-downsample individual has no fitness!") <= solutionErrorThreshold epsilonPushArgs)) = - do - print $ "Successful generation: " <> show generation - print $ "Successful plushy: " <> show (plushy bestInd) - print $ "Successful program: " <> show (plushyToPush $ plushy bestInd) - when (useSimplification epsilonPushArgs) $ - do - let simplifiedPlushy = undefined -- TODO: simplification later - print "Total test error simplified: " <> undefined -- Implement later - print $ "Simplified plushy: " <> undefined -- show simplifiedPlushy - print $ "Simplified program: " <> undefined -- show plushyToPush simplifiedPlushy - | (not (enableDownsampling epsilonPushArgs) && (generation >= maxGenerations epsilonPushArgs)) - || (enableDownsampling epsilonPushArgs && (evaluations >= (maxGenerations epsilonPushArgs * length population * length indexedTrainingData))) = - print $ "Best individual: " <> show (plushy bestInd) - | otherwise = - gpLoop' - pushArgs - (succ generation) - ( evaluations - + (populationSize pushArgs * length (trainingData pushArgs)) - + (if generation `mod` downsampleParentsGens pushArgs == 0 then length parentReps * (length indexedTrainingData - length (trainingData pushArgs)) else 0) - + (if bestIndPassesDownsample then length indexedTrainingData - length tData else 0) - ) - ( if elitism pushArgs - then bestInd : replicate (populationSize epsilonPushArgs - 1) (newIndividual epsilonPushArgs evaledPop) - else replicate (populationSize epsilonPushArgs) (newIndividual epsilonPushArgs evaledPop) - ) - ( if enableDS && ((generation `mod` dsParentGens) == 0) - then updateCaseDistances repEvaluatedPop indexedTrainingData indexedTrainingData (informedDownsamplingType pushArgs) (solutionErrorThreshold pushArgs / fromIntegral @Int @Double (length indexedTrainingData)) - else indexedTrainingData - ) - nextAction - where - -- \| This will have downsampling functionality added later. - repEvaluatedPop :: [Individual] - repEvaluatedPop = - if enableDS - then evaluatePopulation pushArgs indexedTrainingData population - else [] - evaledPop :: [Individual] - evaledPop = evaluatePopulation pushArgs tData population - bestInd :: Individual - bestInd = case uncons evaledPop of Just (x, _) -> x; _ -> error "Error: Population is empty!" - bestIndPassesDownsample :: Bool - bestIndPassesDownsample = enableDS && (extractTotalFitness bestInd <= seThresh) - epsilonPushArgs :: PushArgs - epsilonPushArgs = pushArgs {epsilons = Nothing} -- TODO: And this +gpLoop' + pushArgs@(PushArgs {enableDownsampling = enableDS, solutionErrorThreshold = seThresh, downsampleParentsGens = dsParentGens, downsampleParentRate = dsParentRate, trainingData = tData}) + generation + evaluations + population + indexedTrainingData = do + print "Put information about each generation here." + when bestIndPassesDownsample $ print $ "Semi Success Generation: " <> show generation + parentReps <- do + shuffledParents <- shuffle' population (length population) <$> initStdGen + if enableDS && (generation `mod` dsParentGens == 0) + then pure $ take (floor @Float (dsParentRate * (fromIntegral @Int @Float $ length population))) shuffledParents + else pure [] + let nextAction + | ( bestIndPassesDownsample + && ( (case totalFitness (updateIndividual (errorFunction epsilonPushArgs epsilonPushArgs indexedTrainingData (plushy bestInd)) bestInd) of (Just x) -> x; _ -> error "Error: Best downsample individual has no fitness!") + <= solutionErrorThreshold epsilonPushArgs + ) + ) + || (not (enableDownsampling epsilonPushArgs) && ((case totalFitness bestInd of (Just x) -> x; _ -> error "error: Best non-downsample individual has no fitness!") <= solutionErrorThreshold epsilonPushArgs)) = + do + print $ "Successful generation: " <> show generation + print $ "Successful plushy: " <> show (plushy bestInd) + print $ "Successful program: " <> show (plushyToPush $ plushy bestInd) + when (useSimplification epsilonPushArgs) $ + do + let simplifiedPlushy = undefined -- TODO: simplification later + print "Total test error simplified: " <> undefined -- Implement later + print $ "Simplified plushy: " <> undefined -- show simplifiedPlushy + print $ "Simplified program: " <> undefined -- show plushyToPush simplifiedPlushy + | (not (enableDownsampling epsilonPushArgs) && (generation >= maxGenerations epsilonPushArgs)) + || (enableDownsampling epsilonPushArgs && (evaluations >= (maxGenerations epsilonPushArgs * length population * length indexedTrainingData))) = + print $ "Best individual: " <> show (plushy bestInd) + | otherwise = + gpLoop' + pushArgs + (succ generation) + ( evaluations + + (populationSize pushArgs * length (trainingData pushArgs)) + + (if generation `mod` downsampleParentsGens pushArgs == 0 then length parentReps * (length indexedTrainingData - length (trainingData pushArgs)) else 0) + + (if bestIndPassesDownsample then length indexedTrainingData - length tData else 0) + ) + ( if elitism pushArgs + then bestInd : replicate (populationSize epsilonPushArgs - 1) (newIndividual epsilonPushArgs evaledPop) + else replicate (populationSize epsilonPushArgs) (newIndividual epsilonPushArgs evaledPop) + ) + ( if enableDS && ((generation `mod` dsParentGens) == 0) + then updateCaseDistances repEvaluatedPop indexedTrainingData indexedTrainingData (informedDownsamplingType pushArgs) (solutionErrorThreshold pushArgs / fromIntegral @Int @Double (length indexedTrainingData)) + else indexedTrainingData + ) + nextAction + where + -- \| This will have downsampling functionality added later. + repEvaluatedPop :: [Individual] + repEvaluatedPop = + if enableDS + then evaluatePopulation pushArgs indexedTrainingData population + else [] + evaledPop :: [Individual] + evaledPop = evaluatePopulation pushArgs tData population + bestInd :: Individual + bestInd = case uncons evaledPop of Just (x, _) -> x; _ -> error "Error: Population is empty!" + bestIndPassesDownsample :: Bool + bestIndPassesDownsample = enableDS && (extractTotalFitness bestInd <= seThresh) + epsilonPushArgs :: PushArgs + epsilonPushArgs = pushArgs {epsilons = Nothing} -- TODO: And this diff --git a/src/HushGP/GP/Individual.hs b/src/HushGP/GP/Individual.hs index 500399b..8ba2c5e 100644 --- a/src/HushGP/GP/Individual.hs +++ b/src/HushGP/GP/Individual.hs @@ -16,10 +16,13 @@ instance Ord Individual where ind0 <= ind1 = totalFitness ind0 <= totalFitness ind1 -- | Extracts the fitnessCases from an Individual. Errors if the field is empty. +-- Known as :errors in propeller. extractFitnessCases :: Individual -> [Double] extractFitnessCases Individual {fitnessCases = Nothing} = error "Error: fitnessCases is empty!" extractFitnessCases Individual {fitnessCases = Just xs} = xs +-- | Extracts the total fitness from and Individual. Errors if the field is empty. +-- Known as :total-error in propeller. extractTotalFitness :: Individual -> Double extractTotalFitness Individual {totalFitness = Nothing} = error "Error: totalFitness is empty!" extractTotalFitness Individual {totalFitness = Just x} = x diff --git a/src/HushGP/GP/PushArgs.hs b/src/HushGP/GP/PushArgs.hs index d80e4bc..1312207 100644 --- a/src/HushGP/GP/PushArgs.hs +++ b/src/HushGP/GP/PushArgs.hs @@ -9,10 +9,10 @@ import Data.Map qualified as Map -- of the evolutionary run in Hush. data PushArgs = PushArgs { - -- | For alternation, std deviation fo index when alternating. - alignmentDeviation :: Int, - -- | For alternation, probability of switching parents at each location. - alternationRate :: Float, + -- | For alternation, std deviation for index when alternating. + alignmentDeviation :: Double, + -- | For alternation, probability of switching parents at each location. Should be a value in the range [1,100] + alternationRate :: Int, -- | For bmx, rate genes are exchanged. bmxExchangeRate :: Float, -- | For bmx, max length of a gene. @@ -107,8 +107,8 @@ data PushArgs = PushArgs -- their args from. defaultPushArgs :: PushArgs defaultPushArgs = PushArgs { - alignmentDeviation = 2, - alternationRate = 0.1, + alignmentDeviation = 2.0, + alternationRate = 10, bmxExchangeRate = 0.5, bmxGeneLengthLimit = 10, bmxGapChangeProbability = 0.001, diff --git a/src/HushGP/GP/Variation.hs b/src/HushGP/GP/Variation.hs index 08f974b..5fa7f42 100644 --- a/src/HushGP/GP/Variation.hs +++ b/src/HushGP/GP/Variation.hs @@ -1,7 +1,71 @@ module HushGP.GP.Variation where +import Control.Monad +import HushGP.State import HushGP.GP.PushArgs import HushGP.GP.Individual +import HushGP.Utility + +-- |Performs a uniform crossover on two parents and returns the child. +-- Padding is placed to left of the shorter genome. +crossover :: [Gene] -> [Gene] -> IO [Gene] +crossover plushyA plushyB = do + filter (CrossoverPadding /=) <$> zipWithM (\short long -> randOneToOneHundred >>= (\num -> if num < 50 then pure short else pure long)) shorterPadded longer + where + shorter :: [Gene] + shorter = if length plushyA <= length plushyB then plushyA else plushyB + longer :: [Gene] + longer = if length plushyA > length plushyB then plushyA else plushyB + lengthDiff :: Int + lengthDiff = length longer - length shorter + shorterPadded :: [Gene] + shorterPadded = shorter <> replicate lengthDiff CrossoverPadding + +-- |Alternates between placing genes from one parent to the other in a new child based on some random numbers. +alternation :: PushArgs -> [Gene] -> [Gene] -> IO [Gene] +alternation pushArgs plushyA plushyB = do + randUsePlushyA <- randElem [True, False] + alternation' pushArgs 0 randUsePlushyA [] (length plushyA + length plushyB) plushyA plushyB + +-- |This is a chunker. The PushArgs used in the whole evolutionary run. +-- The first Int is used in the gaussian noise calculation and as a stop condition. +-- The Bool is used to determine which plushy is used to copy to the child. +-- The first [Gene] is the child being created recursively. +-- The second int is the iteration budget. Used to stop very long looping. +-- The second [Gene] is the first plushy parent. +-- The third [Gene] is the second plushy parent. +-- This returns the first [Gene] when the loop is complete. +alternation' :: PushArgs -> Int -> Bool -> [Gene] -> Int -> [Gene] -> [Gene] -> IO [Gene] +alternation' pushArgs@PushArgs{alternationRate = altRate, alignmentDeviation = alignDeviation} n usePlushyA !resultPlushy iterationBudget plushyA plushyB = do + randNum <- randOneToOneHundred + let nextAction + | n >= length (if usePlushyA then plushyA else plushyB) || iterationBudget <= 0 = pure resultPlushy + | randNum < altRate = do + gNoiseFactor <- gaussianNoiseFactor + alternation' pushArgs (max 0 (n + round (gNoiseFactor * alignDeviation))) (not usePlushyA) resultPlushy (pred iterationBudget) plushyA plushyB + | otherwise = alternation' pushArgs (succ n) usePlushyA (resultPlushy <> [(if usePlushyA then plushyA else plushyB) !! n]) (pred iterationBudget) plushyA plushyB + nextAction + +-- |Performs a uniform crossover on two parents and returns the child. +-- Padding is placed to left of the shorter genome. +tailAlignedCrossover :: [Gene] -> [Gene] -> IO [Gene] +tailAlignedCrossover plushyA plushyB = do + filter (CrossoverPadding /=) <$> zipWithM (\short long -> randOneToOneHundred >>= (\num -> if num < 50 then pure short else pure long)) shorterPadded longer + where + shorter :: [Gene] + shorter = if length plushyA <= length plushyB then plushyA else plushyB + longer :: [Gene] + longer = if length plushyA > length plushyB then plushyA else plushyB + lengthDiff :: Int + lengthDiff = length longer - length shorter + shorterPadded :: [Gene] + shorterPadded = replicate lengthDiff CrossoverPadding <> shorter + +-- |Takes the PushArgs for the evolutionary run and a singular plushy. +-- Returns the added onto plushy. Returns the the passed plushy with +-- new instructions possibly added before or after each existing instruction. +uniformAddition :: PushArgs -> [Gene] -> [Gene] +uniformAddition pushArgs plushy = undefined newIndividual :: PushArgs -> [Individual] -> Individual newIndividual = error "Implement this later" diff --git a/src/HushGP/Genome.hs b/src/HushGP/Genome.hs index f3fbb47..5ab05c4 100644 --- a/src/HushGP/Genome.hs +++ b/src/HushGP/Genome.hs @@ -3,24 +3,21 @@ module HushGP.Genome where import Data.List import Data.List.Split import Data.Map qualified as Map +import HushGP.GP.Individual import HushGP.GP.PushArgs import HushGP.Instructions.Opens import HushGP.State import HushGP.Utility -import HushGP.GP.Individual - --- import HushGP.Instructions --- import Debug.Trace -- | Makes a random individual based on the variables in a passed PushArgs. makeRandomIndividual :: PushArgs -> IO Individual makeRandomIndividual pushArgs = do randomPlushy <- makeRandomPlushy pushArgs - return Individual {plushy = randomPlushy, totalFitness = Nothing, fitnessCases = Nothing} + return Individual {plushy = randomPlushy, totalFitness = Nothing, fitnessCases = Nothing, selectionCases = Nothing} -- | Makes a random plushy from variables in a passed PushArgs. makeRandomPlushy :: PushArgs -> IO [Gene] -makeRandomPlushy pushArgs = randomInstructions (maxInitialPlushySize pushArgs) (instructionList pushArgs) +makeRandomPlushy PushArgs {maxInitialPlushySize = maxInitPSize, instructionList = iList} = randomInstructions maxInitPSize iList -- | A utility function to generate an amount based on an int rather than -- from an argmap. diff --git a/src/HushGP/Push.hs b/src/HushGP/Push.hs index d2a42a6..71d9011 100644 --- a/src/HushGP/Push.hs +++ b/src/HushGP/Push.hs @@ -72,7 +72,9 @@ interpretExec state@(State {_exec = e : es}) = (GeneVectorBoolERC (val, _)) -> interpretExec (state & exec .~ es & vectorBool .~ val : view vectorBool state) (GeneVectorStringERC (val, _)) -> interpretExec (state & exec .~ es & vectorString .~ val : view vectorString state) (GeneVectorCharERC (val, _)) -> interpretExec (state & exec .~ es & vectorChar .~ val : view vectorChar state) - Close -> undefined -- This should never happen. Will be converted to Blocks in the Plushy -> Exec stack process - (Open _) -> undefined -- This should also never happen. Should be converted in Plushy -> Exec stack process - Skip -> undefined -- This should double also never happen. + Close -> error "Error: Close found in exec stack!" -- This should never happen. Will be converted to Blocks in the Plushy -> Exec stack process + (Open _) -> error "Error: Open found in exec stack!" -- This should also never happen. Should be converted in Plushy -> Exec stack process + Skip -> error "Error: Skip found in exec stack!" -- This should double also never happen. + CrossoverPadding -> error "Error: CrossoverPadding found in exec stack!" + Gap -> error "Error: Gap found in exec stack!" interpretExec state = state diff --git a/src/HushGP/State.hs b/src/HushGP/State.hs index 8dc96d1..55e377e 100644 --- a/src/HushGP/State.hs +++ b/src/HushGP/State.hs @@ -38,6 +38,10 @@ data Gene | GeneVectorBoolERC ([Bool], StdGen) | GeneVectorStringERC ([String], StdGen) | GeneVectorCharERC ([Char], StdGen) + | -- | This is only used in the crossover function in GP/Variation. Should not be in genome besides there. + CrossoverPadding + | -- | This is used in best match crossover (bmx in PushArgs). + Gap instance Eq Gene where GeneInt x == GeneInt y = x == y @@ -76,6 +80,8 @@ instance Eq Gene where GeneVectorBoolERC (x, _) == GeneVectorBool y = x == y GeneVectorStringERC (x, _) == GeneVectorString y = x == y GeneVectorCharERC (x, _) == GeneVectorChar y = x == y + CrossoverPadding == CrossoverPadding = True + Gap == Gap = True _ == _ = False instance Ord Gene where @@ -115,6 +121,8 @@ instance Ord Gene where GeneVectorBoolERC (x, _) <= GeneVectorBool y = x <= y GeneVectorStringERC (x, _) <= GeneVectorString y = x <= y GeneVectorCharERC (x, _) <= GeneVectorChar y = x <= y + CrossoverPadding <= CrossoverPadding = True + Gap <= Gap = True _ <= _ = False instance Show Gene where @@ -144,6 +152,8 @@ instance Show Gene where show (GeneVectorBoolERC x) = "Bool Vec ERC: " <> show x show (GeneVectorStringERC x) = "String Vec ERC: " <> show x show (GeneVectorCharERC x) = "Char Vec ERC: " <> show x + show CrossoverPadding = "Crossover Padding" + show Gap = "Gap" -- | The structure that holds all of the values. data State = State diff --git a/src/HushGP/Utility.hs b/src/HushGP/Utility.hs index 51270f8..880967b 100644 --- a/src/HushGP/Utility.hs +++ b/src/HushGP/Utility.hs @@ -1,9 +1,9 @@ module HushGP.Utility where -import Data.List import Control.Monad -import System.Random +import Data.List import HushGP.State +import System.Random -- | Generates a single random instruction from a list of instructions. randomInstruction :: [Gene] -> IO Gene @@ -29,5 +29,18 @@ mapIndexed' count f (x : xs) = f count x : mapIndexed' (count + 1) f xs randElem :: [a] -> IO a randElem xs = (xs !!) . fst . uniformR (0, length xs - 1) <$> initStdGen +-- | Used in some of the selection operations. Returns an error saying cases is empty. headCases :: [Int] -> Int headCases xs = case uncons xs of Just (y, _) -> y; _ -> error "Error: cases is empty!" + +-- | Almost a constant but has some randomness inside. Double for more decimal precision. +-- Noise of mean of 0 and std dev of 1. This is a neat function to visualize on desmos! +gaussianNoiseFactor :: IO Double +gaussianNoiseFactor = do + randDecimal0 <- fst . uniformR (0.0 :: Double, 1.0 :: Double) <$> initStdGen + randDecimal1 <- fst . uniformR (0.0 :: Double, 1.0 :: Double) <$> initStdGen + pure (sqrt ((-2.0) * log randDecimal0) * cos (2.0 * pi * randDecimal1)) + +-- | A random number between 1 and 100. +randOneToOneHundred :: IO Int +randOneToOneHundred = fst . uniformR (1 :: Int, 100 :: Int) <$> initStdGen