diff --git a/HushGP.cabal b/HushGP.cabal index 9df2425..e6b7486 100644 --- a/HushGP.cabal +++ b/HushGP.cabal @@ -76,7 +76,7 @@ library -- Other library packages from which modules are imported. build-depends: - base, containers, lens, split, QuickCheck, regex-tdfa, template-haskell, random, parallel + base, containers, lens, split, QuickCheck, regex-tdfa, template-haskell, random, parallel, random-shuffle -- Directories containing source files. hs-source-dirs: src diff --git a/src/HushGP/GP.hs b/src/HushGP/GP.hs index 6f3f6bf..3d84f60 100644 --- a/src/HushGP/GP.hs +++ b/src/HushGP/GP.hs @@ -4,12 +4,11 @@ module HushGP.GP where import Control.Monad import Control.Parallel.Strategies import Data.List (sort, uncons) -import HushGP.GP.PushArgs -import HushGP.Genome -import HushGP.GP.Variation import HushGP.GP.Downsample +import HushGP.GP.PushArgs import HushGP.GP.PushData -import HushGP.Utility +import HushGP.GP.Variation +import HushGP.Genome -- import Debug.Trace (trace, traceStack) @@ -35,10 +34,8 @@ updateIndividual errors ind = ind {totalFitness = Just (sum errors), fitnessCase gpLoop :: PushArgs -> IO () gpLoop pushArgs = do unEvaledPopulation <- generatePopulation pushArgs - let indexedTrainingData = makeIndexedTrainingData (trainingData pushArgs) + let indexedTrainingData = assignIndiciesToData (trainingData pushArgs) gpLoop' pushArgs 0 0 unEvaledPopulation indexedTrainingData - -- print "do this later" - -- | The guts of the GP loop. Where the work gets done after the initialization happens -- in the main gpLoop function. The first Int holds the generation count. The second Int @@ -48,27 +45,40 @@ gpLoop' :: PushArgs -> Int -> Int -> [Individual] -> [PushData] -> IO () gpLoop' pushArgs generation evaluations population indexedTrainingData = do print "Put information about each generation here." when bestIndPassesDownsample $ print $ "Semi Success Generation: " <> show generation - let nextAction | (bestIndPassesDownsample && ((case totalFitness (updateIndividual (errorFunction epsilonPushArgs epsilonPushArgs indexedTrainingData (plushy bestInd)) bestInd) of (Just x) -> x; _ -> error "Error: Best downsample individual has no fitness!") <= solutionErrorThreshold epsilonPushArgs)) || (not (enableDownsampling epsilonPushArgs) && ((case totalFitness bestInd of (Just x) -> x; _ -> error "error: Best non-downsample individual has no fitness!") <= solutionErrorThreshold epsilonPushArgs)) = - do - print $ "Successful generation: " <> show generation - print $ "Successful plushy: " <> show (plushy bestInd) - print $ "Successful program: " <> show (plushyToPush $ plushy bestInd) - when (useSimplification epsilonPushArgs) $ + let nextAction + | (bestIndPassesDownsample && + ((case totalFitness (updateIndividual (errorFunction epsilonPushArgs epsilonPushArgs indexedTrainingData (plushy bestInd)) bestInd) of (Just x) -> x; _ -> error "Error: Best downsample individual has no fitness!") + <= solutionErrorThreshold epsilonPushArgs)) || (not (enableDownsampling epsilonPushArgs) && ((case totalFitness bestInd of (Just x) -> x; _ -> error "error: Best non-downsample individual has no fitness!") <= solutionErrorThreshold epsilonPushArgs)) = do - let simplifiedPlushy = undefined -- TODO: simplification later - print "Total test error simplified: " <> undefined -- Implement later - print $ "Simplified plushy: " <> undefined -- show simplifiedPlushy - print $ "Simplified program: " <> undefined -- show plushyToPush simplifiedPlushy - | (not (enableDownsampling epsilonPushArgs) && (generation >= maxGenerations epsilonPushArgs)) || (enableDownsampling epsilonPushArgs && (evaluations >= (maxGenerations epsilonPushArgs * length population * length indexedTrainingData))) = - print $ "Best individual: " <> show (plushy bestInd) - | otherwise = gpLoop' pushArgs (succ generation) - (evaluations + (populationSize pushArgs * length (trainingData pushArgs)) + (if generation `mod` downsampleParentsGens pushArgs == 0 then length parentReps * (length indexedTrainingData - length (trainingData pushArgs)) else 0) + (if bestIndPassesDownsample then length indexedTrainingData - length (trainingData pushArgs) else 0)) - (if elitism pushArgs - then bestInd : replicate (populationSize epsilonPushArgs - 1) (newIndividual epsilonPushArgs evaledPop) - else replicate (populationSize epsilonPushArgs) (newIndividual epsilonPushArgs evaledPop)) - (if enableDownsampling pushArgs && ((generation `mod` downsampleParentsGens pushArgs) == 0) - then updateCaseDistances repEvaluatedPop indexedTrainingData indexedTrainingData (informedDownsamplingType pushArgs) (solutionErrorThreshold pushArgs / fromIntegral @Int @Double (length indexedTrainingData)) - else indexedTrainingData) + print $ "Successful generation: " <> show generation + print $ "Successful plushy: " <> show (plushy bestInd) + print $ "Successful program: " <> show (plushyToPush $ plushy bestInd) + when (useSimplification epsilonPushArgs) $ + do + let simplifiedPlushy = undefined -- TODO: simplification later + print "Total test error simplified: " <> undefined -- Implement later + print $ "Simplified plushy: " <> undefined -- show simplifiedPlushy + print $ "Simplified program: " <> undefined -- show plushyToPush simplifiedPlushy + | (not (enableDownsampling epsilonPushArgs) && (generation >= maxGenerations epsilonPushArgs)) + || (enableDownsampling epsilonPushArgs && (evaluations >= (maxGenerations epsilonPushArgs * length population * length indexedTrainingData))) = + print $ "Best individual: " <> show (plushy bestInd) + | otherwise = + gpLoop' + pushArgs + (succ generation) + ( evaluations + + (populationSize pushArgs * length (trainingData pushArgs)) + + (if generation `mod` downsampleParentsGens pushArgs == 0 then length parentReps * (length indexedTrainingData - length (trainingData pushArgs)) else 0) + + (if bestIndPassesDownsample then length indexedTrainingData - length (trainingData pushArgs) else 0) + ) + ( if elitism pushArgs + then bestInd : replicate (populationSize epsilonPushArgs - 1) (newIndividual epsilonPushArgs evaledPop) + else replicate (populationSize epsilonPushArgs) (newIndividual epsilonPushArgs evaledPop) + ) + ( if enableDownsampling pushArgs && ((generation `mod` downsampleParentsGens pushArgs) == 0) + then updateCaseDistances repEvaluatedPop indexedTrainingData indexedTrainingData (informedDownsamplingType pushArgs) (solutionErrorThreshold pushArgs / fromIntegral @Int @Double (length indexedTrainingData)) + else indexedTrainingData + ) nextAction where -- \| This will have downsampling functionality added later. diff --git a/src/HushGP/GP/Downsample.hs b/src/HushGP/GP/Downsample.hs index cac606d..42a77ef 100644 --- a/src/HushGP/GP/Downsample.hs +++ b/src/HushGP/GP/Downsample.hs @@ -1,10 +1,26 @@ module HushGP.GP.Downsample where -import HushGP.State +import System.Random.Shuffle +import System.Random import HushGP.Genome import HushGP.GP.PushData +import HushGP.GP.PushArgs +-- |Sets the index of the passed training data. +assignIndiciesToData :: [PushData] -> [PushData] +assignIndiciesToData oldData = zipWith (\dat idx -> dat{_downsampleIndex = Just idx}) oldData [0..] + +-- |Initializes cases distances for passed training data. +initializeCaseDistances :: PushArgs -> [PushData] +initializeCaseDistances pushArgs = [ dat{_caseDistances = Just (replicate (length $ trainingData pushArgs) (fromIntegral @Int @Double $ populationSize pushArgs))} | dat <- trainingData pushArgs ] + +-- |Updates the cases distances when downsampling updateCaseDistances :: [Individual] -> [PushData] -> [PushData] -> String -> Double -> [PushData] updateCaseDistances evaledPop downsampleData trainingData informedDownsamplingType solutionThreshold = undefined --- assignIndiciesToData :: +-- |Draws a random amount of data points from a passed list of data points. +selectDownsampleRandom :: PushArgs -> [PushData] -> IO [PushData] +selectDownsampleRandom pushArgs pushData = take (floor (downsampleRate pushArgs * fromIntegral @Int @Float (length pushData))) . shuffle' pushData (length pushData - 1) <$> initStdGen + +selectDownsampleMaxmin :: PushArgs -> [PushData] -> IO [PushData] +selectDownsampleMaxmin pushArgs@(PushArgs {downsampleRate = dsrate}) pushData = undefined diff --git a/src/HushGP/GP/PushData.hs b/src/HushGP/GP/PushData.hs index 6c7aa22..39385fb 100644 --- a/src/HushGP/GP/PushData.hs +++ b/src/HushGP/GP/PushData.hs @@ -10,10 +10,6 @@ data PushData = PushData { _outputData :: Gene, _downsampleIndex :: Maybe Int, _caseDistances :: Maybe [Double] -} - --- |Utility function: Sets the index of the passed training data. -makeIndexedTrainingData :: [PushData] -> [PushData] -makeIndexedTrainingData oldData = zipWith (\dat idx -> dat{_downsampleIndex = Just idx}) oldData [0..] +} deriving (Show) $(makeLenses ''PushData) diff --git a/src/HushGP/Problems/IntegerRegression.hs b/src/HushGP/Problems/IntegerRegression.hs index 520f6fa..430a082 100644 --- a/src/HushGP/Problems/IntegerRegression.hs +++ b/src/HushGP/Problems/IntegerRegression.hs @@ -26,8 +26,8 @@ targetFunction :: Integer -> Integer targetFunction x = (x * x * x) + (2 * x) -- | The training data for the model. -trainData :: [PushData] -trainData = map (\num -> PushData { +intTrainData :: [PushData] +intTrainData = map (\num -> PushData { _inputData = [GeneInt num], _outputData = (GeneInt . targetFunction) num, _downsampleIndex = Nothing, @@ -35,9 +35,8 @@ trainData = map (\num -> PushData { [-10..10] -- | The testing data for the model. -testData :: [PushData] --- testData = (chunksOf 1 $ map GeneInt $ [-20..(-11)] <> [11..21], map (GeneInt . targetFunction) ([-20..(-11)] <> [11..21])) -testData = map (\num -> PushData { +intTestData :: [PushData] +intTestData = map (\num -> PushData { _inputData = [GeneInt num], _outputData = (GeneInt . targetFunction) num, _downsampleIndex = Nothing, @@ -84,8 +83,8 @@ intPushArgs = defaultPushArgs { instructionList = runInstructions, errorFunction = intErrorFunction, - trainingData = trainData, - testingData = testData, + trainingData = intTrainData, + testingData = intTestData, maxGenerations = 300, populationSize = 1000, maxInitialPlushySize = 100, @@ -95,7 +94,8 @@ intPushArgs = defaultPushArgs umadRate = 0.1, variation = Map.fromList [("umad", 1.0), ("crossover", 0.0)], elitism = False, - enableDownsampling = False + enableDownsampling = False, + downsampleRate = 0.5 } main :: IO () diff --git a/src/HushGP/TH.hs b/src/HushGP/TH.hs index a53b10d..eb860f2 100644 --- a/src/HushGP/TH.hs +++ b/src/HushGP/TH.hs @@ -23,7 +23,7 @@ extractAllFunctions pattern = do loc <- location -- file <- runIO $ readFile pattern file <- runIO $ readFile $ loc_filename loc - return $ nub $ filter (=~ pattern) $ map fst $ concatMap lex $ lines file + pure $ nub $ filter (=~ pattern) $ map fst $ concatMap lex $ lines file -- | Extracts all functions from a Q [String] (to be used with extractAllFunctions) -- funcs has a list of all functions from extractAllFunctions @@ -35,4 +35,4 @@ functionExtractor :: String -> Q Exp functionExtractor pattern = do funcs <- extractAllFunctions pattern let makePair n = TupE [Just $ VarE $ mkName n, Just $ LitE $ StringL n] - return $ ListE $ map makePair funcs + pure $ ListE $ map makePair funcs diff --git a/src/HushGP/Utility.hs b/src/HushGP/Utility.hs index b8c3e8c..c99fc22 100644 --- a/src/HushGP/Utility.hs +++ b/src/HushGP/Utility.hs @@ -8,25 +8,8 @@ import System.Random randomInstruction :: [Gene] -> IO Gene randomInstruction instructions = do impureGen <- initStdGen - return $ instructions !! fst (uniformR (0, length instructions - 1) impureGen) + pure $ instructions !! fst (uniformR (0, length instructions - 1) impureGen) -- | Generates a list of random instructions from a list of instructions passed in. randomInstructions :: Int -> [Gene] -> IO [Gene] randomInstructions amt instructions = replicateM amt (randomInstruction instructions) - --- |Utility function: Used for indexed training data. Takes the first element of triple. -tfst :: (a, b, c) -> a -tfst (x, _, _) = x - --- |Utility function: Used for indexed training data. Takes the second element of triple. -tsnd :: (a, b, c) -> b -tsnd (_, x, _) = x - --- |Utility function: Used for indexed training data. Takes the third element of triple. --- The third element in the context of indexed training data represents the index assigned. -thrd :: (a, b, c) -> c -thrd (_, _, x) = x - --- |Utility function: Converts a tuple to a triple with a passed value. -tupleToTriple :: (a, b) -> c -> (a, b, c) -tupleToTriple (x, y) z = (x, y, z)