diff --git a/src/HushGP/GP.hs b/src/HushGP/GP.hs index 3d84f60..38da66e 100644 --- a/src/HushGP/GP.hs +++ b/src/HushGP/GP.hs @@ -15,8 +15,8 @@ import HushGP.Genome -- | Using a PushArgs object, generates a population of the specified size with the -- specified instructions in parallel. generatePopulation :: PushArgs -> IO [Individual] -generatePopulation pushArgs = do - pop <- replicateM (populationSize pushArgs) (makeRandomIndividual pushArgs) +generatePopulation pushArgs@(PushArgs {populationSize = popSize}) = do + pop <- replicateM popSize (makeRandomIndividual pushArgs) return (pop `using` evalList rpar) -- Does this work? Need to test this with the HEC viewing tool. -- | Evaluates a population of plushies with the error function passed in via PushArgs and sorts them. @@ -32,9 +32,9 @@ updateIndividual errors ind = ind {totalFitness = Just (sum errors), fitnessCase -- | The start of the gp loop. Generates the population and then calls -- gpLoop' with modifications to the variables if needed. gpLoop :: PushArgs -> IO () -gpLoop pushArgs = do +gpLoop pushArgs@(PushArgs {trainingData = tData}) = do unEvaledPopulation <- generatePopulation pushArgs - let indexedTrainingData = assignIndiciesToData (trainingData pushArgs) + let indexedTrainingData = assignIndiciesToData tData gpLoop' pushArgs 0 0 unEvaledPopulation indexedTrainingData -- | The guts of the GP loop. Where the work gets done after the initialization happens diff --git a/src/HushGP/GP/Downsample.hs b/src/HushGP/GP/Downsample.hs index b4c1893..65ec7ab 100644 --- a/src/HushGP/GP/Downsample.hs +++ b/src/HushGP/GP/Downsample.hs @@ -2,10 +2,12 @@ module HushGP.GP.Downsample where import System.Random.Shuffle import System.Random +import Data.List import HushGP.Genome import HushGP.GP.PushData import HushGP.GP.PushArgs -import Data.List +import HushGP.Tools.Metrics +import HushGP.Instructions.Utility -- |Sets the index of the passed training data. assignIndiciesToData :: [PushData] -> [PushData] @@ -21,7 +23,7 @@ updateCaseDistances evaledPop downsampleData trainingData informedDownsamplingTy -- |Draws a random amount of data points from a passed list of data points. selectDownsampleRandom :: PushArgs -> [PushData] -> IO [PushData] -selectDownsampleRandom pushArgs pushData = take (floor (downsampleRate pushArgs * fromIntegral @Int @Float (length pushData))) . shuffle' pushData (length pushData) <$> initStdGen +selectDownsampleRandom (PushArgs {downsampleRate = dsRate}) pushData = take (floor (dsRate * fromIntegral @Int @Float (length pushData))) . shuffle' pushData (length pushData) <$> initStdGen -- |Selects a downsample that has it's cases maximally far away by sequentially -- adding cases to the downsample that have their closest case maximally far away. @@ -43,5 +45,40 @@ selectDownsampleMaxmin' :: [PushData] -> [PushData] -> Int -> IO [PushData] selectDownsampleMaxmin' newDownsample casesToPickFrom goalSize | length newDownsample >= goalSize = pure newDownsample | otherwise = do - minCaseDistances - + let newDistances = map extractDistance newDownsample + let minCaseDistances = minOfColumns (map (\distList -> filterByIndex distList (map extractIndex casesToPickFrom)) newDistances) + selectedCaseIndex <- argMax minCaseDistances + stdGen <- initStdGen + selectDownsampleMaxmin' + ((casesToPickFrom !! selectedCaseIndex) : newDownsample) + (shuffle' (deleteAt selectedCaseIndex casesToPickFrom) (length casesToPickFrom - 1) stdGen) + goalSize + +-- |selects a downsample that has it's cases maximally far away by sequentially +-- adding cases to the downsample that have their closest case maximally far away +-- automatically stops when the maximum minimum distance is below delta +selectDownsampleMaxminAdaptive :: PushArgs -> [PushData] -> IO [PushData] +selectDownsampleMaxminAdaptive (PushArgs {caseDelta = cDelta}) pushData = do + shuffledCases <- shuffle' pushData (length pushData) <$> initStdGen + selectDownsampleMaxminAdaptive' + (case uncons shuffledCases of (Just (x, _)) -> [x]; _ -> error "error: shuffledCases empty!") + (drop 1 shuffledCases) + cDelta + +-- |The main loop of selectDownsampleMaxmin. This is where most of calculation happens. +-- When called from selectDownsampleMaxmin: The first [PushData] holds the head of the +-- original pushData wrapped in a list, the second [PushData] holds the rest of the list +-- without the aformentioned head. The Int is the caseDelta derived from the downsample rate +-- and the length of the original [pushData]. +selectDownsampleMaxminAdaptive' :: [PushData] -> [PushData] -> Double -> IO [PushData] +selectDownsampleMaxminAdaptive' newDownsample casesToPickFrom cDelta = do + let newDistances = map extractDistance newDownsample + let minCaseDistances = minOfColumns (map (\distList -> filterByIndex distList (map extractIndex casesToPickFrom)) newDistances) + selectedCaseIndex <- argMax minCaseDistances + stdGen <- initStdGen + if null casesToPickFrom || (maximum minCaseDistances <= cDelta) + then pure newDownsample + else selectDownsampleMaxminAdaptive' + ((casesToPickFrom !! selectedCaseIndex) : newDownsample) + (shuffle' (deleteAt selectedCaseIndex casesToPickFrom) (length casesToPickFrom - 1) stdGen) + cDelta diff --git a/src/HushGP/GP/PushArgs.hs b/src/HushGP/GP/PushArgs.hs index 5b4410c..d66d188 100644 --- a/src/HushGP/GP/PushArgs.hs +++ b/src/HushGP/GP/PushArgs.hs @@ -93,7 +93,10 @@ data PushArgs = PushArgs -- Takes a Map of String -> Float where the string is the genetic operator variation :: Map.Map String Float, -- | The epsilons calculated for epsilon lexicase selection. Only used for epsilon lexicase selection. - epsilons :: Maybe [Double] + epsilons :: Maybe [Double], + -- | Used with the CaseMaxminAuto downsampling strategy. Tells downsampling to stop when + -- the maximum minimum distance is too far away. + caseDelta :: Double } -- | The default values for which all runs of Hush derive @@ -139,5 +142,6 @@ defaultPushArgs = PushArgs { trainingData = error "Must supply the trainingData yourself", umadRate = 0.1, variation = Map.fromList [("umad", 1.0)], - epsilons = Nothing + epsilons = Nothing, + caseDelta = 0.0 } diff --git a/src/HushGP/GP/PushData.hs b/src/HushGP/GP/PushData.hs index 39385fb..38ec51a 100644 --- a/src/HushGP/GP/PushData.hs +++ b/src/HushGP/GP/PushData.hs @@ -12,4 +12,20 @@ data PushData = PushData { _caseDistances :: Maybe [Double] } deriving (Show) +-- |Extracts the case distances from a PushData object. Errors if the +-- _caseDistances list is Nothing. +extractDistance :: PushData -> [Double] +extractDistance PushData{_caseDistances = Nothing} = error "Error: Case distances are empty!. This should never happen" +extractDistance PushData{_caseDistances = Just xs} = xs + +-- |Extracts the downsample index from a PushData object. Errors if the +-- _downsampleIndex is Nothing. +extractIndex :: PushData -> Int +extractIndex PushData{_downsampleIndex = Nothing} = error "Error: Case distances are empty!. This should never happen" +extractIndex PushData{_downsampleIndex = Just x} = x + +-- |Filters a list by another list of indicies. +filterByIndex :: [a] -> [Int] -> [a] +filterByIndex origList = map (origList !!) + $(makeLenses ''PushData) diff --git a/src/HushGP/Tools/Metrics.hs b/src/HushGP/Tools/Metrics.hs index 015b107..5b46f33 100644 --- a/src/HushGP/Tools/Metrics.hs +++ b/src/HushGP/Tools/Metrics.hs @@ -1,3 +1,20 @@ module HushGP.Tools.Metrics where +import Data.List +import System.Random +import System.Random.Shuffle +-- |Maps minimum over the transposed [[Double]]. +minOfColumns :: [[Double]] -> [Double] +minOfColumns columns = map minimum (transpose columns) + +-- |Returns the index of the maximum value in a list, randomly tiebreaking. +argMax :: Ord a => [a] -> IO Int +argMax xs = argMaxHead . shuffle' (elemIndices (maximum xs) xs) (length xs) <$> initStdGen + +-- |Takes the first element from a list and returns an error as specified. For use with +-- the argMax function. +argMaxHead :: [a] -> a +argMaxHead xs = case uncons xs of + Just (x, _) -> x + _ -> error "Error: Head is empty in argMax!"