proper indexing

This commit is contained in:
Rowan Torbitzky-Lane 2025-02-25 22:20:52 -06:00
parent 2d9840c51b
commit db497a087c
5 changed files with 47 additions and 19 deletions

View File

@ -9,6 +9,7 @@ import HushGP.Genome
import HushGP.State import HushGP.State
import HushGP.GP.Variation import HushGP.GP.Variation
import HushGP.GP.Downsample import HushGP.GP.Downsample
import HushGP.Utility
-- import Debug.Trace (trace, traceStack) -- import Debug.Trace (trace, traceStack)
@ -21,7 +22,7 @@ generatePopulation pushArgs = do
-- | Evaluates a population of plushies with the error function passed in via PushArgs and sorts them. -- | Evaluates a population of plushies with the error function passed in via PushArgs and sorts them.
-- TODO: Need to make this runnable in parallel too. -- TODO: Need to make this runnable in parallel too.
evaluatePopulation :: PushArgs -> ([[Gene]], [Gene]) -> [Individual] -> [Individual] evaluatePopulation :: PushArgs -> ([[Gene]], [Gene], [Int]) -> [Individual] -> [Individual]
evaluatePopulation pushArgs passedTrainingData population = sort $ zipWith updateIndividual (map (errorFunction pushArgs pushArgs passedTrainingData . plushy) population) population evaluatePopulation pushArgs passedTrainingData population = sort $ zipWith updateIndividual (map (errorFunction pushArgs pushArgs passedTrainingData . plushy) population) population
-- | A helper function used in evaluatePopulation. Takes a [Double] as the error scores and an individual. -- | A helper function used in evaluatePopulation. Takes a [Double] as the error scores and an individual.
@ -29,19 +30,21 @@ evaluatePopulation pushArgs passedTrainingData population = sort $ zipWith updat
updateIndividual :: [Double] -> Individual -> Individual updateIndividual :: [Double] -> Individual -> Individual
updateIndividual errors ind = ind {totalFitness = Just (sum errors), fitnessCases = Just errors} updateIndividual errors ind = ind {totalFitness = Just (sum errors), fitnessCases = Just errors}
-- | The start of the gp loop. TODO: Make this more accurate later. -- | The start of the gp loop. Generates the population and then calls
-- gpLoop' with modifications to the variables if needed.
gpLoop :: PushArgs -> IO () gpLoop :: PushArgs -> IO ()
gpLoop pushArgs = do gpLoop pushArgs = do
unEvaledPopulation <- generatePopulation pushArgs unEvaledPopulation <- generatePopulation pushArgs
-- let evaledPop = evaluatePopulation pushArgs unEvaledPopulation let indexedTrainingData = makeIndexedTrainingData (trainingData pushArgs)
-- print evaledPop gpLoop' pushArgs 0 0 unEvaledPopulation indexedTrainingData
print "placeholder for now" -- print "do this later"
-- | The guts of the GP loop. Where the work gets done after the initialization happens -- | The guts of the GP loop. Where the work gets done after the initialization happens
-- in the main gpLoop function. The first Int holds the generation count. The second Int -- in the main gpLoop function. The first Int holds the generation count. The second Int
-- holds the evaluation count. The list of Individuals is the population. The last parameter is -- holds the evaluation count. The list of Individuals is the population. The last parameter is
-- the training data (possibly downsampled). -- the training data (possibly downsampled).
gpLoop' :: PushArgs -> Int -> Int -> [Individual] -> ([[Gene]], [Gene]) -> IO () gpLoop' :: PushArgs -> Int -> Int -> [Individual] -> ([[Gene]], [Gene], [Int]) -> IO ()
gpLoop' pushArgs generation evaluations population indexedTrainingData = do gpLoop' pushArgs generation evaluations population indexedTrainingData = do
print "Put information about each generation here." print "Put information about each generation here."
when bestIndPassesDownsample $ print $ "Semi Success Generation: " <> show generation when bestIndPassesDownsample $ print $ "Semi Success Generation: " <> show generation
@ -56,17 +59,18 @@ gpLoop' pushArgs generation evaluations population indexedTrainingData = do
print "Total test error simplified: " <> undefined -- Implement later print "Total test error simplified: " <> undefined -- Implement later
print $ "Simplified plushy: " <> undefined -- show simplifiedPlushy print $ "Simplified plushy: " <> undefined -- show simplifiedPlushy
print $ "Simplified program: " <> undefined -- show plushyToPush simplifiedPlushy print $ "Simplified program: " <> undefined -- show plushyToPush simplifiedPlushy
| (not (enableDownsampling epsilonPushArgs) && (generation >= maxGenerations epsilonPushArgs)) || (enableDownsampling epsilonPushArgs && (evaluations >= (maxGenerations epsilonPushArgs * length population * length (fst indexedTrainingData)))) = | (not (enableDownsampling epsilonPushArgs) && (generation >= maxGenerations epsilonPushArgs)) || (enableDownsampling epsilonPushArgs && (evaluations >= (maxGenerations epsilonPushArgs * length population * length (tfst indexedTrainingData)))) =
print "Incomplete Run, saving the best so far." print $ "Best individual: " <> show (plushy bestInd)
| otherwise = gpLoop' pushArgs (succ generation) | otherwise = gpLoop' pushArgs (succ generation)
(evaluations + (populationSize pushArgs * length (fst $ trainingData pushArgs)) + (if generation `mod` downsampleParentsGens pushArgs == 0 then length parentReps * (length (fst indexedTrainingData) - length (fst $ trainingData pushArgs)) else 0) + (if bestIndPassesDownsample then length (fst indexedTrainingData) - length (fst $ trainingData pushArgs) else 0)) (evaluations + (populationSize pushArgs * length (fst $ trainingData pushArgs)) + (if generation `mod` downsampleParentsGens pushArgs == 0 then length parentReps * (length (tfst indexedTrainingData) - length (fst $ trainingData pushArgs)) else 0) + (if bestIndPassesDownsample then length (tfst indexedTrainingData) - length (fst $ trainingData pushArgs) else 0))
(if elitism pushArgs then bestInd : replicate (populationSize epsilonPushArgs - 1) (newIndividual epsilonPushArgs evaledPop) else replicate (populationSize epsilonPushArgs) (newIndividual epsilonPushArgs evaledPop)) (if elitism pushArgs
(if enableDownsampling pushArgs && ((generation `mod` downsampleParentsGens pushArgs) == 0) then updateCaseDistances repEvaluatedPop indexedTrainingData indexedTrainingData (informedDownsamplingType pushArgs) (solutionErrorThreshold pushArgs / fromIntegral @Int @Double (length $ fst indexedTrainingData)) else indexedTrainingData) then bestInd : replicate (populationSize epsilonPushArgs - 1) (newIndividual epsilonPushArgs evaledPop)
else replicate (populationSize epsilonPushArgs) (newIndividual epsilonPushArgs evaledPop))
(if enableDownsampling pushArgs && ((generation `mod` downsampleParentsGens pushArgs) == 0)
then updateCaseDistances repEvaluatedPop indexedTrainingData indexedTrainingData (informedDownsamplingType pushArgs) (solutionErrorThreshold pushArgs / fromIntegral @Int @Double (length $ tfst indexedTrainingData))
else indexedTrainingData)
nextAction nextAction
where where
-- \| This will have downsampling added to it later.
loopTrainData :: ([[Gene]], [Gene])
loopTrainData = indexedTrainingData
-- \| This will have downsampling functionality added later. -- \| This will have downsampling functionality added later.
parentReps :: [Individual] parentReps :: [Individual]
parentReps = [] parentReps = []

View File

@ -3,5 +3,7 @@ module HushGP.GP.Downsample where
import HushGP.State import HushGP.State
import HushGP.Genome import HushGP.Genome
updateCaseDistances :: [Individual] -> ([[Gene]], [Gene]) -> ([[Gene]], [Gene]) -> String -> Double -> ([[Gene]], [Gene]) updateCaseDistances :: [Individual] -> ([[Gene]], [Gene], [Int]) -> ([[Gene]], [Gene], [Int]) -> String -> Double -> ([[Gene]], [Gene], [Int])
updateCaseDistances evaledPop downsampleData trainingData informedDownsamplingType solutionThreshold = undefined updateCaseDistances evaledPop downsampleData trainingData informedDownsamplingType solutionThreshold = undefined
-- assignIndiciesToData ::

View File

@ -49,7 +49,7 @@ data PushArgs = PushArgs
-- Arg 2: ([[Gene]], [Gene]) is the input data. Input is the first index and output is the second index. -- Arg 2: ([[Gene]], [Gene]) is the input data. Input is the first index and output is the second index.
-- Arg 3: [Gene] is the plushy representation of a program. -- Arg 3: [Gene] is the plushy representation of a program.
-- Returns the error list for a given set of inputs of type [Double]. -- Returns the error list for a given set of inputs of type [Double].
errorFunction :: PushArgs -> ([[Gene]], [Gene]) -> [Gene] -> [Double], errorFunction :: PushArgs -> ([[Gene]], [Gene], [Int]) -> [Gene] -> [Double],
-- | Type of informed downsampling. "solved", "elite", "soft". -- | Type of informed downsampling. "solved", "elite", "soft".
informedDownsamplingType :: String, informedDownsamplingType :: String,
-- | List of instructions to use in the evolutionary run. -- | List of instructions to use in the evolutionary run.

View File

@ -57,8 +57,8 @@ loadState plushy vals =
(loadProgram (plushyToPush plushy) emptyState){_input = Map.fromList (zip [0..] vals)} (loadProgram (plushyToPush plushy) emptyState){_input = Map.fromList (zip [0..] vals)}
-- | The error function for a single set of inputs and outputs. -- | The error function for a single set of inputs and outputs.
intErrorFunction :: PushArgs -> ([[Gene]], [Gene]) -> [Gene] -> [Double] intErrorFunction :: PushArgs -> ([[Gene]], [Gene], [Int]) -> [Gene] -> [Double]
intErrorFunction _args (inputData, outputData) plushy = intErrorFunction _args (inputData, outputData, _) plushy =
map abs $ zipWith (-) (map ((fromIntegral @Integer @Double . (errorHead . _int) . interpretExec) . loadState plushy) inputData) (map (fromIntegral @Integer @Double . extractGeneInt) outputData) map abs $ zipWith (-) (map ((fromIntegral @Integer @Double . (errorHead . _int) . interpretExec) . loadState plushy) inputData) (map (fromIntegral @Integer @Double . extractGeneInt) outputData)
intPushArgs :: PushArgs intPushArgs :: PushArgs
@ -76,7 +76,8 @@ intPushArgs = defaultPushArgs
tournamentSize = 5, tournamentSize = 5,
umadRate = 0.1, umadRate = 0.1,
variation = Map.fromList [("umad", 1.0), ("crossover", 0.0)], variation = Map.fromList [("umad", 1.0), ("crossover", 0.0)],
elitism = False elitism = False,
enableDownsampling = False
} }
main :: IO () main :: IO ()

View File

@ -13,3 +13,24 @@ randomInstruction instructions = do
-- | Generates a list of random instructions from a list of instructions passed in. -- | Generates a list of random instructions from a list of instructions passed in.
randomInstructions :: Int -> [Gene] -> IO [Gene] randomInstructions :: Int -> [Gene] -> IO [Gene]
randomInstructions amt instructions = replicateM amt (randomInstruction instructions) randomInstructions amt instructions = replicateM amt (randomInstruction instructions)
-- |Utility function: Used for indexed training data. Takes the first element of triple.
tfst :: (a, b, c) -> a
tfst (x, _, _) = x
-- |Utility function: Used for indexed training data. Takes the second element of triple.
tsnd :: (a, b, c) -> b
tsnd (_, x, _) = x
-- |Utility function: Used for indexed training data. Takes the third element of triple.
-- The third element in the context of indexed training data represents the index assigned.
thrd :: (a, b, c) -> c
thrd (_, _, x) = x
-- |Utility function: Converts a tuple to a triple with a passed value.
tupleToTriple :: (a, b) -> c -> (a, b, c)
tupleToTriple (x, y) z = (x, y, z)
-- |Utility function: Converts the training data passed in to an indexed representation
makeIndexedTrainingData :: ([[Gene]], [Gene]) -> ([[Gene]], [Gene], [Int])
makeIndexedTrainingData (inputs, outputs) = (inputs, outputs, [0..(length inputs)])