Compare commits

..

No commits in common. "1c6421f6da164e7aae12510953a7d4d6a6529527" and "0c0d57dd8d6bc2974b42dc824c17b7963d26f102" have entirely different histories.

7 changed files with 63 additions and 70 deletions

View File

@ -76,7 +76,7 @@ library
-- Other library packages from which modules are imported. -- Other library packages from which modules are imported.
build-depends: build-depends:
base, containers, lens, split, QuickCheck, regex-tdfa, template-haskell, random, parallel, random-shuffle base, containers, lens, split, QuickCheck, regex-tdfa, template-haskell, random, parallel
-- Directories containing source files. -- Directories containing source files.
hs-source-dirs: src hs-source-dirs: src

View File

@ -4,11 +4,12 @@ module HushGP.GP where
import Control.Monad import Control.Monad
import Control.Parallel.Strategies import Control.Parallel.Strategies
import Data.List (sort, uncons) import Data.List (sort, uncons)
import HushGP.GP.Downsample
import HushGP.GP.PushArgs import HushGP.GP.PushArgs
import HushGP.GP.PushData
import HushGP.GP.Variation
import HushGP.Genome import HushGP.Genome
import HushGP.GP.Variation
import HushGP.GP.Downsample
import HushGP.GP.PushData
import HushGP.Utility
-- import Debug.Trace (trace, traceStack) -- import Debug.Trace (trace, traceStack)
@ -34,8 +35,10 @@ updateIndividual errors ind = ind {totalFitness = Just (sum errors), fitnessCase
gpLoop :: PushArgs -> IO () gpLoop :: PushArgs -> IO ()
gpLoop pushArgs = do gpLoop pushArgs = do
unEvaledPopulation <- generatePopulation pushArgs unEvaledPopulation <- generatePopulation pushArgs
let indexedTrainingData = assignIndiciesToData (trainingData pushArgs) let indexedTrainingData = makeIndexedTrainingData (trainingData pushArgs)
gpLoop' pushArgs 0 0 unEvaledPopulation indexedTrainingData gpLoop' pushArgs 0 0 unEvaledPopulation indexedTrainingData
-- print "do this later"
-- | The guts of the GP loop. Where the work gets done after the initialization happens -- | The guts of the GP loop. Where the work gets done after the initialization happens
-- in the main gpLoop function. The first Int holds the generation count. The second Int -- in the main gpLoop function. The first Int holds the generation count. The second Int
@ -45,40 +48,27 @@ gpLoop' :: PushArgs -> Int -> Int -> [Individual] -> [PushData] -> IO ()
gpLoop' pushArgs generation evaluations population indexedTrainingData = do gpLoop' pushArgs generation evaluations population indexedTrainingData = do
print "Put information about each generation here." print "Put information about each generation here."
when bestIndPassesDownsample $ print $ "Semi Success Generation: " <> show generation when bestIndPassesDownsample $ print $ "Semi Success Generation: " <> show generation
let nextAction let nextAction | (bestIndPassesDownsample && ((case totalFitness (updateIndividual (errorFunction epsilonPushArgs epsilonPushArgs indexedTrainingData (plushy bestInd)) bestInd) of (Just x) -> x; _ -> error "Error: Best downsample individual has no fitness!") <= solutionErrorThreshold epsilonPushArgs)) || (not (enableDownsampling epsilonPushArgs) && ((case totalFitness bestInd of (Just x) -> x; _ -> error "error: Best non-downsample individual has no fitness!") <= solutionErrorThreshold epsilonPushArgs)) =
| (bestIndPassesDownsample && do
((case totalFitness (updateIndividual (errorFunction epsilonPushArgs epsilonPushArgs indexedTrainingData (plushy bestInd)) bestInd) of (Just x) -> x; _ -> error "Error: Best downsample individual has no fitness!") print $ "Successful generation: " <> show generation
<= solutionErrorThreshold epsilonPushArgs)) || (not (enableDownsampling epsilonPushArgs) && ((case totalFitness bestInd of (Just x) -> x; _ -> error "error: Best non-downsample individual has no fitness!") <= solutionErrorThreshold epsilonPushArgs)) = print $ "Successful plushy: " <> show (plushy bestInd)
print $ "Successful program: " <> show (plushyToPush $ plushy bestInd)
when (useSimplification epsilonPushArgs) $
do do
print $ "Successful generation: " <> show generation let simplifiedPlushy = undefined -- TODO: simplification later
print $ "Successful plushy: " <> show (plushy bestInd) print "Total test error simplified: " <> undefined -- Implement later
print $ "Successful program: " <> show (plushyToPush $ plushy bestInd) print $ "Simplified plushy: " <> undefined -- show simplifiedPlushy
when (useSimplification epsilonPushArgs) $ print $ "Simplified program: " <> undefined -- show plushyToPush simplifiedPlushy
do | (not (enableDownsampling epsilonPushArgs) && (generation >= maxGenerations epsilonPushArgs)) || (enableDownsampling epsilonPushArgs && (evaluations >= (maxGenerations epsilonPushArgs * length population * length indexedTrainingData))) =
let simplifiedPlushy = undefined -- TODO: simplification later print $ "Best individual: " <> show (plushy bestInd)
print "Total test error simplified: " <> undefined -- Implement later | otherwise = gpLoop' pushArgs (succ generation)
print $ "Simplified plushy: " <> undefined -- show simplifiedPlushy (evaluations + (populationSize pushArgs * length (trainingData pushArgs)) + (if generation `mod` downsampleParentsGens pushArgs == 0 then length parentReps * (length indexedTrainingData - length (trainingData pushArgs)) else 0) + (if bestIndPassesDownsample then length indexedTrainingData - length (trainingData pushArgs) else 0))
print $ "Simplified program: " <> undefined -- show plushyToPush simplifiedPlushy (if elitism pushArgs
| (not (enableDownsampling epsilonPushArgs) && (generation >= maxGenerations epsilonPushArgs)) then bestInd : replicate (populationSize epsilonPushArgs - 1) (newIndividual epsilonPushArgs evaledPop)
|| (enableDownsampling epsilonPushArgs && (evaluations >= (maxGenerations epsilonPushArgs * length population * length indexedTrainingData))) = else replicate (populationSize epsilonPushArgs) (newIndividual epsilonPushArgs evaledPop))
print $ "Best individual: " <> show (plushy bestInd) (if enableDownsampling pushArgs && ((generation `mod` downsampleParentsGens pushArgs) == 0)
| otherwise = then updateCaseDistances repEvaluatedPop indexedTrainingData indexedTrainingData (informedDownsamplingType pushArgs) (solutionErrorThreshold pushArgs / fromIntegral @Int @Double (length indexedTrainingData))
gpLoop' else indexedTrainingData)
pushArgs
(succ generation)
( evaluations
+ (populationSize pushArgs * length (trainingData pushArgs))
+ (if generation `mod` downsampleParentsGens pushArgs == 0 then length parentReps * (length indexedTrainingData - length (trainingData pushArgs)) else 0)
+ (if bestIndPassesDownsample then length indexedTrainingData - length (trainingData pushArgs) else 0)
)
( if elitism pushArgs
then bestInd : replicate (populationSize epsilonPushArgs - 1) (newIndividual epsilonPushArgs evaledPop)
else replicate (populationSize epsilonPushArgs) (newIndividual epsilonPushArgs evaledPop)
)
( if enableDownsampling pushArgs && ((generation `mod` downsampleParentsGens pushArgs) == 0)
then updateCaseDistances repEvaluatedPop indexedTrainingData indexedTrainingData (informedDownsamplingType pushArgs) (solutionErrorThreshold pushArgs / fromIntegral @Int @Double (length indexedTrainingData))
else indexedTrainingData
)
nextAction nextAction
where where
-- \| This will have downsampling functionality added later. -- \| This will have downsampling functionality added later.

View File

@ -1,28 +1,10 @@
module HushGP.GP.Downsample where module HushGP.GP.Downsample where
import System.Random.Shuffle import HushGP.State
import System.Random
import HushGP.Genome import HushGP.Genome
import HushGP.GP.PushData import HushGP.GP.PushData
import HushGP.GP.PushArgs
-- |Sets the index of the passed training data.
assignIndiciesToData :: [PushData] -> [PushData]
assignIndiciesToData oldData = zipWith (\dat idx -> dat{_downsampleIndex = Just idx}) oldData [0..]
-- |Initializes cases distances for passed training data.
initializeCaseDistances :: PushArgs -> [PushData]
initializeCaseDistances pushArgs = [ dat{_caseDistances = Just (replicate (length $ trainingData pushArgs) (fromIntegral @Int @Double $ populationSize pushArgs))} | dat <- trainingData pushArgs ]
-- |Updates the cases distances when downsampling
updateCaseDistances :: [Individual] -> [PushData] -> [PushData] -> String -> Double -> [PushData] updateCaseDistances :: [Individual] -> [PushData] -> [PushData] -> String -> Double -> [PushData]
updateCaseDistances evaledPop downsampleData trainingData informedDownsamplingType solutionThreshold = undefined updateCaseDistances evaledPop downsampleData trainingData informedDownsamplingType solutionThreshold = undefined
-- |Draws a random amount of data points from a passed list of data points. -- assignIndiciesToData ::
selectDownsampleRandom :: PushArgs -> [PushData] -> IO [PushData]
selectDownsampleRandom pushArgs pushData = take (floor (downsampleRate pushArgs * fromIntegral @Int @Float (length pushData))) . shuffle' pushData (length pushData) <$> initStdGen
selectDownsampleMaxmin :: PushArgs -> [PushData] -> IO [PushData]
selectDownsampleMaxmin pushArgs@(PushArgs {downsampleRate = dsRate}) pushData = do
shuffledCases <- shuffle' pushData (length pushData) <$> initStdGen

View File

@ -10,6 +10,10 @@ data PushData = PushData {
_outputData :: Gene, _outputData :: Gene,
_downsampleIndex :: Maybe Int, _downsampleIndex :: Maybe Int,
_caseDistances :: Maybe [Double] _caseDistances :: Maybe [Double]
} deriving (Show) }
-- |Utility function: Sets the index of the passed training data.
makeIndexedTrainingData :: [PushData] -> [PushData]
makeIndexedTrainingData oldData = zipWith (\dat idx -> dat{_downsampleIndex = Just idx}) oldData [0..]
$(makeLenses ''PushData) $(makeLenses ''PushData)

View File

@ -26,8 +26,8 @@ targetFunction :: Integer -> Integer
targetFunction x = (x * x * x) + (2 * x) targetFunction x = (x * x * x) + (2 * x)
-- | The training data for the model. -- | The training data for the model.
intTrainData :: [PushData] trainData :: [PushData]
intTrainData = map (\num -> PushData { trainData = map (\num -> PushData {
_inputData = [GeneInt num], _inputData = [GeneInt num],
_outputData = (GeneInt . targetFunction) num, _outputData = (GeneInt . targetFunction) num,
_downsampleIndex = Nothing, _downsampleIndex = Nothing,
@ -35,8 +35,9 @@ intTrainData = map (\num -> PushData {
[-10..10] [-10..10]
-- | The testing data for the model. -- | The testing data for the model.
intTestData :: [PushData] testData :: [PushData]
intTestData = map (\num -> PushData { -- testData = (chunksOf 1 $ map GeneInt $ [-20..(-11)] <> [11..21], map (GeneInt . targetFunction) ([-20..(-11)] <> [11..21]))
testData = map (\num -> PushData {
_inputData = [GeneInt num], _inputData = [GeneInt num],
_outputData = (GeneInt . targetFunction) num, _outputData = (GeneInt . targetFunction) num,
_downsampleIndex = Nothing, _downsampleIndex = Nothing,
@ -83,8 +84,8 @@ intPushArgs = defaultPushArgs
{ {
instructionList = runInstructions, instructionList = runInstructions,
errorFunction = intErrorFunction, errorFunction = intErrorFunction,
trainingData = intTrainData, trainingData = trainData,
testingData = intTestData, testingData = testData,
maxGenerations = 300, maxGenerations = 300,
populationSize = 1000, populationSize = 1000,
maxInitialPlushySize = 100, maxInitialPlushySize = 100,
@ -94,8 +95,7 @@ intPushArgs = defaultPushArgs
umadRate = 0.1, umadRate = 0.1,
variation = Map.fromList [("umad", 1.0), ("crossover", 0.0)], variation = Map.fromList [("umad", 1.0), ("crossover", 0.0)],
elitism = False, elitism = False,
enableDownsampling = False, enableDownsampling = False
downsampleRate = 0.5
} }
main :: IO () main :: IO ()

View File

@ -23,7 +23,7 @@ extractAllFunctions pattern = do
loc <- location loc <- location
-- file <- runIO $ readFile pattern -- file <- runIO $ readFile pattern
file <- runIO $ readFile $ loc_filename loc file <- runIO $ readFile $ loc_filename loc
pure $ nub $ filter (=~ pattern) $ map fst $ concatMap lex $ lines file return $ nub $ filter (=~ pattern) $ map fst $ concatMap lex $ lines file
-- | Extracts all functions from a Q [String] (to be used with extractAllFunctions) -- | Extracts all functions from a Q [String] (to be used with extractAllFunctions)
-- funcs has a list of all functions from extractAllFunctions -- funcs has a list of all functions from extractAllFunctions
@ -35,4 +35,4 @@ functionExtractor :: String -> Q Exp
functionExtractor pattern = do functionExtractor pattern = do
funcs <- extractAllFunctions pattern funcs <- extractAllFunctions pattern
let makePair n = TupE [Just $ VarE $ mkName n, Just $ LitE $ StringL n] let makePair n = TupE [Just $ VarE $ mkName n, Just $ LitE $ StringL n]
pure $ ListE $ map makePair funcs return $ ListE $ map makePair funcs

View File

@ -8,8 +8,25 @@ import System.Random
randomInstruction :: [Gene] -> IO Gene randomInstruction :: [Gene] -> IO Gene
randomInstruction instructions = do randomInstruction instructions = do
impureGen <- initStdGen impureGen <- initStdGen
pure $ instructions !! fst (uniformR (0, length instructions - 1) impureGen) return $ instructions !! fst (uniformR (0, length instructions - 1) impureGen)
-- | Generates a list of random instructions from a list of instructions passed in. -- | Generates a list of random instructions from a list of instructions passed in.
randomInstructions :: Int -> [Gene] -> IO [Gene] randomInstructions :: Int -> [Gene] -> IO [Gene]
randomInstructions amt instructions = replicateM amt (randomInstruction instructions) randomInstructions amt instructions = replicateM amt (randomInstruction instructions)
-- |Utility function: Used for indexed training data. Takes the first element of triple.
tfst :: (a, b, c) -> a
tfst (x, _, _) = x
-- |Utility function: Used for indexed training data. Takes the second element of triple.
tsnd :: (a, b, c) -> b
tsnd (_, x, _) = x
-- |Utility function: Used for indexed training data. Takes the third element of triple.
-- The third element in the context of indexed training data represents the index assigned.
thrd :: (a, b, c) -> c
thrd (_, _, x) = x
-- |Utility function: Converts a tuple to a triple with a passed value.
tupleToTriple :: (a, b) -> c -> (a, b, c)
tupleToTriple (x, y) z = (x, y, z)