many changes

This commit is contained in:
Rowan Torbitzky-Lane 2025-02-26 20:50:27 -06:00
parent 0c0d57dd8d
commit 06b4adb239
7 changed files with 68 additions and 63 deletions

View File

@ -76,7 +76,7 @@ library
-- Other library packages from which modules are imported. -- Other library packages from which modules are imported.
build-depends: build-depends:
base, containers, lens, split, QuickCheck, regex-tdfa, template-haskell, random, parallel base, containers, lens, split, QuickCheck, regex-tdfa, template-haskell, random, parallel, random-shuffle
-- Directories containing source files. -- Directories containing source files.
hs-source-dirs: src hs-source-dirs: src

View File

@ -4,12 +4,11 @@ module HushGP.GP where
import Control.Monad import Control.Monad
import Control.Parallel.Strategies import Control.Parallel.Strategies
import Data.List (sort, uncons) import Data.List (sort, uncons)
import HushGP.GP.PushArgs
import HushGP.Genome
import HushGP.GP.Variation
import HushGP.GP.Downsample import HushGP.GP.Downsample
import HushGP.GP.PushArgs
import HushGP.GP.PushData import HushGP.GP.PushData
import HushGP.Utility import HushGP.GP.Variation
import HushGP.Genome
-- import Debug.Trace (trace, traceStack) -- import Debug.Trace (trace, traceStack)
@ -35,10 +34,8 @@ updateIndividual errors ind = ind {totalFitness = Just (sum errors), fitnessCase
gpLoop :: PushArgs -> IO () gpLoop :: PushArgs -> IO ()
gpLoop pushArgs = do gpLoop pushArgs = do
unEvaledPopulation <- generatePopulation pushArgs unEvaledPopulation <- generatePopulation pushArgs
let indexedTrainingData = makeIndexedTrainingData (trainingData pushArgs) let indexedTrainingData = assignIndiciesToData (trainingData pushArgs)
gpLoop' pushArgs 0 0 unEvaledPopulation indexedTrainingData gpLoop' pushArgs 0 0 unEvaledPopulation indexedTrainingData
-- print "do this later"
-- | The guts of the GP loop. Where the work gets done after the initialization happens -- | The guts of the GP loop. Where the work gets done after the initialization happens
-- in the main gpLoop function. The first Int holds the generation count. The second Int -- in the main gpLoop function. The first Int holds the generation count. The second Int
@ -48,7 +45,10 @@ gpLoop' :: PushArgs -> Int -> Int -> [Individual] -> [PushData] -> IO ()
gpLoop' pushArgs generation evaluations population indexedTrainingData = do gpLoop' pushArgs generation evaluations population indexedTrainingData = do
print "Put information about each generation here." print "Put information about each generation here."
when bestIndPassesDownsample $ print $ "Semi Success Generation: " <> show generation when bestIndPassesDownsample $ print $ "Semi Success Generation: " <> show generation
let nextAction | (bestIndPassesDownsample && ((case totalFitness (updateIndividual (errorFunction epsilonPushArgs epsilonPushArgs indexedTrainingData (plushy bestInd)) bestInd) of (Just x) -> x; _ -> error "Error: Best downsample individual has no fitness!") <= solutionErrorThreshold epsilonPushArgs)) || (not (enableDownsampling epsilonPushArgs) && ((case totalFitness bestInd of (Just x) -> x; _ -> error "error: Best non-downsample individual has no fitness!") <= solutionErrorThreshold epsilonPushArgs)) = let nextAction
| (bestIndPassesDownsample &&
((case totalFitness (updateIndividual (errorFunction epsilonPushArgs epsilonPushArgs indexedTrainingData (plushy bestInd)) bestInd) of (Just x) -> x; _ -> error "Error: Best downsample individual has no fitness!")
<= solutionErrorThreshold epsilonPushArgs)) || (not (enableDownsampling epsilonPushArgs) && ((case totalFitness bestInd of (Just x) -> x; _ -> error "error: Best non-downsample individual has no fitness!") <= solutionErrorThreshold epsilonPushArgs)) =
do do
print $ "Successful generation: " <> show generation print $ "Successful generation: " <> show generation
print $ "Successful plushy: " <> show (plushy bestInd) print $ "Successful plushy: " <> show (plushy bestInd)
@ -59,16 +59,26 @@ gpLoop' pushArgs generation evaluations population indexedTrainingData = do
print "Total test error simplified: " <> undefined -- Implement later print "Total test error simplified: " <> undefined -- Implement later
print $ "Simplified plushy: " <> undefined -- show simplifiedPlushy print $ "Simplified plushy: " <> undefined -- show simplifiedPlushy
print $ "Simplified program: " <> undefined -- show plushyToPush simplifiedPlushy print $ "Simplified program: " <> undefined -- show plushyToPush simplifiedPlushy
| (not (enableDownsampling epsilonPushArgs) && (generation >= maxGenerations epsilonPushArgs)) || (enableDownsampling epsilonPushArgs && (evaluations >= (maxGenerations epsilonPushArgs * length population * length indexedTrainingData))) = | (not (enableDownsampling epsilonPushArgs) && (generation >= maxGenerations epsilonPushArgs))
|| (enableDownsampling epsilonPushArgs && (evaluations >= (maxGenerations epsilonPushArgs * length population * length indexedTrainingData))) =
print $ "Best individual: " <> show (plushy bestInd) print $ "Best individual: " <> show (plushy bestInd)
| otherwise = gpLoop' pushArgs (succ generation) | otherwise =
(evaluations + (populationSize pushArgs * length (trainingData pushArgs)) + (if generation `mod` downsampleParentsGens pushArgs == 0 then length parentReps * (length indexedTrainingData - length (trainingData pushArgs)) else 0) + (if bestIndPassesDownsample then length indexedTrainingData - length (trainingData pushArgs) else 0)) gpLoop'
(if elitism pushArgs pushArgs
(succ generation)
( evaluations
+ (populationSize pushArgs * length (trainingData pushArgs))
+ (if generation `mod` downsampleParentsGens pushArgs == 0 then length parentReps * (length indexedTrainingData - length (trainingData pushArgs)) else 0)
+ (if bestIndPassesDownsample then length indexedTrainingData - length (trainingData pushArgs) else 0)
)
( if elitism pushArgs
then bestInd : replicate (populationSize epsilonPushArgs - 1) (newIndividual epsilonPushArgs evaledPop) then bestInd : replicate (populationSize epsilonPushArgs - 1) (newIndividual epsilonPushArgs evaledPop)
else replicate (populationSize epsilonPushArgs) (newIndividual epsilonPushArgs evaledPop)) else replicate (populationSize epsilonPushArgs) (newIndividual epsilonPushArgs evaledPop)
(if enableDownsampling pushArgs && ((generation `mod` downsampleParentsGens pushArgs) == 0) )
( if enableDownsampling pushArgs && ((generation `mod` downsampleParentsGens pushArgs) == 0)
then updateCaseDistances repEvaluatedPop indexedTrainingData indexedTrainingData (informedDownsamplingType pushArgs) (solutionErrorThreshold pushArgs / fromIntegral @Int @Double (length indexedTrainingData)) then updateCaseDistances repEvaluatedPop indexedTrainingData indexedTrainingData (informedDownsamplingType pushArgs) (solutionErrorThreshold pushArgs / fromIntegral @Int @Double (length indexedTrainingData))
else indexedTrainingData) else indexedTrainingData
)
nextAction nextAction
where where
-- \| This will have downsampling functionality added later. -- \| This will have downsampling functionality added later.

View File

@ -1,10 +1,26 @@
module HushGP.GP.Downsample where module HushGP.GP.Downsample where
import HushGP.State import System.Random.Shuffle
import System.Random
import HushGP.Genome import HushGP.Genome
import HushGP.GP.PushData import HushGP.GP.PushData
import HushGP.GP.PushArgs
-- |Sets the index of the passed training data.
assignIndiciesToData :: [PushData] -> [PushData]
assignIndiciesToData oldData = zipWith (\dat idx -> dat{_downsampleIndex = Just idx}) oldData [0..]
-- |Initializes cases distances for passed training data.
initializeCaseDistances :: PushArgs -> [PushData]
initializeCaseDistances pushArgs = [ dat{_caseDistances = Just (replicate (length $ trainingData pushArgs) (fromIntegral @Int @Double $ populationSize pushArgs))} | dat <- trainingData pushArgs ]
-- |Updates the cases distances when downsampling
updateCaseDistances :: [Individual] -> [PushData] -> [PushData] -> String -> Double -> [PushData] updateCaseDistances :: [Individual] -> [PushData] -> [PushData] -> String -> Double -> [PushData]
updateCaseDistances evaledPop downsampleData trainingData informedDownsamplingType solutionThreshold = undefined updateCaseDistances evaledPop downsampleData trainingData informedDownsamplingType solutionThreshold = undefined
-- assignIndiciesToData :: -- |Draws a random amount of data points from a passed list of data points.
selectDownsampleRandom :: PushArgs -> [PushData] -> IO [PushData]
selectDownsampleRandom pushArgs pushData = take (floor (downsampleRate pushArgs * fromIntegral @Int @Float (length pushData))) . shuffle' pushData (length pushData - 1) <$> initStdGen
selectDownsampleMaxmin :: PushArgs -> [PushData] -> IO [PushData]
selectDownsampleMaxmin pushArgs@(PushArgs {downsampleRate = dsrate}) pushData = undefined

View File

@ -10,10 +10,6 @@ data PushData = PushData {
_outputData :: Gene, _outputData :: Gene,
_downsampleIndex :: Maybe Int, _downsampleIndex :: Maybe Int,
_caseDistances :: Maybe [Double] _caseDistances :: Maybe [Double]
} } deriving (Show)
-- |Utility function: Sets the index of the passed training data.
makeIndexedTrainingData :: [PushData] -> [PushData]
makeIndexedTrainingData oldData = zipWith (\dat idx -> dat{_downsampleIndex = Just idx}) oldData [0..]
$(makeLenses ''PushData) $(makeLenses ''PushData)

View File

@ -26,8 +26,8 @@ targetFunction :: Integer -> Integer
targetFunction x = (x * x * x) + (2 * x) targetFunction x = (x * x * x) + (2 * x)
-- | The training data for the model. -- | The training data for the model.
trainData :: [PushData] intTrainData :: [PushData]
trainData = map (\num -> PushData { intTrainData = map (\num -> PushData {
_inputData = [GeneInt num], _inputData = [GeneInt num],
_outputData = (GeneInt . targetFunction) num, _outputData = (GeneInt . targetFunction) num,
_downsampleIndex = Nothing, _downsampleIndex = Nothing,
@ -35,9 +35,8 @@ trainData = map (\num -> PushData {
[-10..10] [-10..10]
-- | The testing data for the model. -- | The testing data for the model.
testData :: [PushData] intTestData :: [PushData]
-- testData = (chunksOf 1 $ map GeneInt $ [-20..(-11)] <> [11..21], map (GeneInt . targetFunction) ([-20..(-11)] <> [11..21])) intTestData = map (\num -> PushData {
testData = map (\num -> PushData {
_inputData = [GeneInt num], _inputData = [GeneInt num],
_outputData = (GeneInt . targetFunction) num, _outputData = (GeneInt . targetFunction) num,
_downsampleIndex = Nothing, _downsampleIndex = Nothing,
@ -84,8 +83,8 @@ intPushArgs = defaultPushArgs
{ {
instructionList = runInstructions, instructionList = runInstructions,
errorFunction = intErrorFunction, errorFunction = intErrorFunction,
trainingData = trainData, trainingData = intTrainData,
testingData = testData, testingData = intTestData,
maxGenerations = 300, maxGenerations = 300,
populationSize = 1000, populationSize = 1000,
maxInitialPlushySize = 100, maxInitialPlushySize = 100,
@ -95,7 +94,8 @@ intPushArgs = defaultPushArgs
umadRate = 0.1, umadRate = 0.1,
variation = Map.fromList [("umad", 1.0), ("crossover", 0.0)], variation = Map.fromList [("umad", 1.0), ("crossover", 0.0)],
elitism = False, elitism = False,
enableDownsampling = False enableDownsampling = False,
downsampleRate = 0.5
} }
main :: IO () main :: IO ()

View File

@ -23,7 +23,7 @@ extractAllFunctions pattern = do
loc <- location loc <- location
-- file <- runIO $ readFile pattern -- file <- runIO $ readFile pattern
file <- runIO $ readFile $ loc_filename loc file <- runIO $ readFile $ loc_filename loc
return $ nub $ filter (=~ pattern) $ map fst $ concatMap lex $ lines file pure $ nub $ filter (=~ pattern) $ map fst $ concatMap lex $ lines file
-- | Extracts all functions from a Q [String] (to be used with extractAllFunctions) -- | Extracts all functions from a Q [String] (to be used with extractAllFunctions)
-- funcs has a list of all functions from extractAllFunctions -- funcs has a list of all functions from extractAllFunctions
@ -35,4 +35,4 @@ functionExtractor :: String -> Q Exp
functionExtractor pattern = do functionExtractor pattern = do
funcs <- extractAllFunctions pattern funcs <- extractAllFunctions pattern
let makePair n = TupE [Just $ VarE $ mkName n, Just $ LitE $ StringL n] let makePair n = TupE [Just $ VarE $ mkName n, Just $ LitE $ StringL n]
return $ ListE $ map makePair funcs pure $ ListE $ map makePair funcs

View File

@ -8,25 +8,8 @@ import System.Random
randomInstruction :: [Gene] -> IO Gene randomInstruction :: [Gene] -> IO Gene
randomInstruction instructions = do randomInstruction instructions = do
impureGen <- initStdGen impureGen <- initStdGen
return $ instructions !! fst (uniformR (0, length instructions - 1) impureGen) pure $ instructions !! fst (uniformR (0, length instructions - 1) impureGen)
-- | Generates a list of random instructions from a list of instructions passed in. -- | Generates a list of random instructions from a list of instructions passed in.
randomInstructions :: Int -> [Gene] -> IO [Gene] randomInstructions :: Int -> [Gene] -> IO [Gene]
randomInstructions amt instructions = replicateM amt (randomInstruction instructions) randomInstructions amt instructions = replicateM amt (randomInstruction instructions)
-- |Utility function: Used for indexed training data. Takes the first element of triple.
tfst :: (a, b, c) -> a
tfst (x, _, _) = x
-- |Utility function: Used for indexed training data. Takes the second element of triple.
tsnd :: (a, b, c) -> b
tsnd (_, x, _) = x
-- |Utility function: Used for indexed training data. Takes the third element of triple.
-- The third element in the context of indexed training data represents the index assigned.
thrd :: (a, b, c) -> c
thrd (_, _, x) = x
-- |Utility function: Converts a tuple to a triple with a passed value.
tupleToTriple :: (a, b) -> c -> (a, b, c)
tupleToTriple (x, y) z = (x, y, z)