Compare commits

..

No commits in common. "1c6421f6da164e7aae12510953a7d4d6a6529527" and "0c0d57dd8d6bc2974b42dc824c17b7963d26f102" have entirely different histories.

7 changed files with 63 additions and 70 deletions

View File

@ -76,7 +76,7 @@ library
-- Other library packages from which modules are imported.
build-depends:
base, containers, lens, split, QuickCheck, regex-tdfa, template-haskell, random, parallel, random-shuffle
base, containers, lens, split, QuickCheck, regex-tdfa, template-haskell, random, parallel
-- Directories containing source files.
hs-source-dirs: src

View File

@ -4,11 +4,12 @@ module HushGP.GP where
import Control.Monad
import Control.Parallel.Strategies
import Data.List (sort, uncons)
import HushGP.GP.Downsample
import HushGP.GP.PushArgs
import HushGP.GP.PushData
import HushGP.GP.Variation
import HushGP.Genome
import HushGP.GP.Variation
import HushGP.GP.Downsample
import HushGP.GP.PushData
import HushGP.Utility
-- import Debug.Trace (trace, traceStack)
@ -34,8 +35,10 @@ updateIndividual errors ind = ind {totalFitness = Just (sum errors), fitnessCase
gpLoop :: PushArgs -> IO ()
gpLoop pushArgs = do
unEvaledPopulation <- generatePopulation pushArgs
let indexedTrainingData = assignIndiciesToData (trainingData pushArgs)
let indexedTrainingData = makeIndexedTrainingData (trainingData pushArgs)
gpLoop' pushArgs 0 0 unEvaledPopulation indexedTrainingData
-- print "do this later"
-- | The guts of the GP loop. Where the work gets done after the initialization happens
-- in the main gpLoop function. The first Int holds the generation count. The second Int
@ -45,10 +48,7 @@ gpLoop' :: PushArgs -> Int -> Int -> [Individual] -> [PushData] -> IO ()
gpLoop' pushArgs generation evaluations population indexedTrainingData = do
print "Put information about each generation here."
when bestIndPassesDownsample $ print $ "Semi Success Generation: " <> show generation
let nextAction
| (bestIndPassesDownsample &&
((case totalFitness (updateIndividual (errorFunction epsilonPushArgs epsilonPushArgs indexedTrainingData (plushy bestInd)) bestInd) of (Just x) -> x; _ -> error "Error: Best downsample individual has no fitness!")
<= solutionErrorThreshold epsilonPushArgs)) || (not (enableDownsampling epsilonPushArgs) && ((case totalFitness bestInd of (Just x) -> x; _ -> error "error: Best non-downsample individual has no fitness!") <= solutionErrorThreshold epsilonPushArgs)) =
let nextAction | (bestIndPassesDownsample && ((case totalFitness (updateIndividual (errorFunction epsilonPushArgs epsilonPushArgs indexedTrainingData (plushy bestInd)) bestInd) of (Just x) -> x; _ -> error "Error: Best downsample individual has no fitness!") <= solutionErrorThreshold epsilonPushArgs)) || (not (enableDownsampling epsilonPushArgs) && ((case totalFitness bestInd of (Just x) -> x; _ -> error "error: Best non-downsample individual has no fitness!") <= solutionErrorThreshold epsilonPushArgs)) =
do
print $ "Successful generation: " <> show generation
print $ "Successful plushy: " <> show (plushy bestInd)
@ -59,26 +59,16 @@ gpLoop' pushArgs generation evaluations population indexedTrainingData = do
print "Total test error simplified: " <> undefined -- Implement later
print $ "Simplified plushy: " <> undefined -- show simplifiedPlushy
print $ "Simplified program: " <> undefined -- show plushyToPush simplifiedPlushy
| (not (enableDownsampling epsilonPushArgs) && (generation >= maxGenerations epsilonPushArgs))
|| (enableDownsampling epsilonPushArgs && (evaluations >= (maxGenerations epsilonPushArgs * length population * length indexedTrainingData))) =
| (not (enableDownsampling epsilonPushArgs) && (generation >= maxGenerations epsilonPushArgs)) || (enableDownsampling epsilonPushArgs && (evaluations >= (maxGenerations epsilonPushArgs * length population * length indexedTrainingData))) =
print $ "Best individual: " <> show (plushy bestInd)
| otherwise =
gpLoop'
pushArgs
(succ generation)
( evaluations
+ (populationSize pushArgs * length (trainingData pushArgs))
+ (if generation `mod` downsampleParentsGens pushArgs == 0 then length parentReps * (length indexedTrainingData - length (trainingData pushArgs)) else 0)
+ (if bestIndPassesDownsample then length indexedTrainingData - length (trainingData pushArgs) else 0)
)
| otherwise = gpLoop' pushArgs (succ generation)
(evaluations + (populationSize pushArgs * length (trainingData pushArgs)) + (if generation `mod` downsampleParentsGens pushArgs == 0 then length parentReps * (length indexedTrainingData - length (trainingData pushArgs)) else 0) + (if bestIndPassesDownsample then length indexedTrainingData - length (trainingData pushArgs) else 0))
(if elitism pushArgs
then bestInd : replicate (populationSize epsilonPushArgs - 1) (newIndividual epsilonPushArgs evaledPop)
else replicate (populationSize epsilonPushArgs) (newIndividual epsilonPushArgs evaledPop)
)
else replicate (populationSize epsilonPushArgs) (newIndividual epsilonPushArgs evaledPop))
(if enableDownsampling pushArgs && ((generation `mod` downsampleParentsGens pushArgs) == 0)
then updateCaseDistances repEvaluatedPop indexedTrainingData indexedTrainingData (informedDownsamplingType pushArgs) (solutionErrorThreshold pushArgs / fromIntegral @Int @Double (length indexedTrainingData))
else indexedTrainingData
)
else indexedTrainingData)
nextAction
where
-- \| This will have downsampling functionality added later.

View File

@ -1,28 +1,10 @@
module HushGP.GP.Downsample where
import System.Random.Shuffle
import System.Random
import HushGP.State
import HushGP.Genome
import HushGP.GP.PushData
import HushGP.GP.PushArgs
-- |Sets the index of the passed training data.
assignIndiciesToData :: [PushData] -> [PushData]
assignIndiciesToData oldData = zipWith (\dat idx -> dat{_downsampleIndex = Just idx}) oldData [0..]
-- |Initializes cases distances for passed training data.
initializeCaseDistances :: PushArgs -> [PushData]
initializeCaseDistances pushArgs = [ dat{_caseDistances = Just (replicate (length $ trainingData pushArgs) (fromIntegral @Int @Double $ populationSize pushArgs))} | dat <- trainingData pushArgs ]
-- |Updates the cases distances when downsampling
updateCaseDistances :: [Individual] -> [PushData] -> [PushData] -> String -> Double -> [PushData]
updateCaseDistances evaledPop downsampleData trainingData informedDownsamplingType solutionThreshold = undefined
-- |Draws a random amount of data points from a passed list of data points.
selectDownsampleRandom :: PushArgs -> [PushData] -> IO [PushData]
selectDownsampleRandom pushArgs pushData = take (floor (downsampleRate pushArgs * fromIntegral @Int @Float (length pushData))) . shuffle' pushData (length pushData) <$> initStdGen
selectDownsampleMaxmin :: PushArgs -> [PushData] -> IO [PushData]
selectDownsampleMaxmin pushArgs@(PushArgs {downsampleRate = dsRate}) pushData = do
shuffledCases <- shuffle' pushData (length pushData) <$> initStdGen
-- assignIndiciesToData ::

View File

@ -10,6 +10,10 @@ data PushData = PushData {
_outputData :: Gene,
_downsampleIndex :: Maybe Int,
_caseDistances :: Maybe [Double]
} deriving (Show)
}
-- |Utility function: Sets the index of the passed training data.
makeIndexedTrainingData :: [PushData] -> [PushData]
makeIndexedTrainingData oldData = zipWith (\dat idx -> dat{_downsampleIndex = Just idx}) oldData [0..]
$(makeLenses ''PushData)

View File

@ -26,8 +26,8 @@ targetFunction :: Integer -> Integer
targetFunction x = (x * x * x) + (2 * x)
-- | The training data for the model.
intTrainData :: [PushData]
intTrainData = map (\num -> PushData {
trainData :: [PushData]
trainData = map (\num -> PushData {
_inputData = [GeneInt num],
_outputData = (GeneInt . targetFunction) num,
_downsampleIndex = Nothing,
@ -35,8 +35,9 @@ intTrainData = map (\num -> PushData {
[-10..10]
-- | The testing data for the model.
intTestData :: [PushData]
intTestData = map (\num -> PushData {
testData :: [PushData]
-- testData = (chunksOf 1 $ map GeneInt $ [-20..(-11)] <> [11..21], map (GeneInt . targetFunction) ([-20..(-11)] <> [11..21]))
testData = map (\num -> PushData {
_inputData = [GeneInt num],
_outputData = (GeneInt . targetFunction) num,
_downsampleIndex = Nothing,
@ -83,8 +84,8 @@ intPushArgs = defaultPushArgs
{
instructionList = runInstructions,
errorFunction = intErrorFunction,
trainingData = intTrainData,
testingData = intTestData,
trainingData = trainData,
testingData = testData,
maxGenerations = 300,
populationSize = 1000,
maxInitialPlushySize = 100,
@ -94,8 +95,7 @@ intPushArgs = defaultPushArgs
umadRate = 0.1,
variation = Map.fromList [("umad", 1.0), ("crossover", 0.0)],
elitism = False,
enableDownsampling = False,
downsampleRate = 0.5
enableDownsampling = False
}
main :: IO ()

View File

@ -23,7 +23,7 @@ extractAllFunctions pattern = do
loc <- location
-- file <- runIO $ readFile pattern
file <- runIO $ readFile $ loc_filename loc
pure $ nub $ filter (=~ pattern) $ map fst $ concatMap lex $ lines file
return $ nub $ filter (=~ pattern) $ map fst $ concatMap lex $ lines file
-- | Extracts all functions from a Q [String] (to be used with extractAllFunctions)
-- funcs has a list of all functions from extractAllFunctions
@ -35,4 +35,4 @@ functionExtractor :: String -> Q Exp
functionExtractor pattern = do
funcs <- extractAllFunctions pattern
let makePair n = TupE [Just $ VarE $ mkName n, Just $ LitE $ StringL n]
pure $ ListE $ map makePair funcs
return $ ListE $ map makePair funcs

View File

@ -8,8 +8,25 @@ import System.Random
randomInstruction :: [Gene] -> IO Gene
randomInstruction instructions = do
impureGen <- initStdGen
pure $ instructions !! fst (uniformR (0, length instructions - 1) impureGen)
return $ instructions !! fst (uniformR (0, length instructions - 1) impureGen)
-- | Generates a list of random instructions from a list of instructions passed in.
randomInstructions :: Int -> [Gene] -> IO [Gene]
randomInstructions amt instructions = replicateM amt (randomInstruction instructions)
-- |Utility function: Used for indexed training data. Takes the first element of triple.
tfst :: (a, b, c) -> a
tfst (x, _, _) = x
-- |Utility function: Used for indexed training data. Takes the second element of triple.
tsnd :: (a, b, c) -> b
tsnd (_, x, _) = x
-- |Utility function: Used for indexed training data. Takes the third element of triple.
-- The third element in the context of indexed training data represents the index assigned.
thrd :: (a, b, c) -> c
thrd (_, _, x) = x
-- |Utility function: Converts a tuple to a triple with a passed value.
tupleToTriple :: (a, b) -> c -> (a, b, c)
tupleToTriple (x, y) z = (x, y, z)