downsample maxmins done
This commit is contained in:
parent
4aa8aa9f2a
commit
5f8f0db1c6
@ -15,8 +15,8 @@ import HushGP.Genome
|
|||||||
-- | Using a PushArgs object, generates a population of the specified size with the
|
-- | Using a PushArgs object, generates a population of the specified size with the
|
||||||
-- specified instructions in parallel.
|
-- specified instructions in parallel.
|
||||||
generatePopulation :: PushArgs -> IO [Individual]
|
generatePopulation :: PushArgs -> IO [Individual]
|
||||||
generatePopulation pushArgs = do
|
generatePopulation pushArgs@(PushArgs {populationSize = popSize}) = do
|
||||||
pop <- replicateM (populationSize pushArgs) (makeRandomIndividual pushArgs)
|
pop <- replicateM popSize (makeRandomIndividual pushArgs)
|
||||||
return (pop `using` evalList rpar) -- Does this work? Need to test this with the HEC viewing tool.
|
return (pop `using` evalList rpar) -- Does this work? Need to test this with the HEC viewing tool.
|
||||||
|
|
||||||
-- | Evaluates a population of plushies with the error function passed in via PushArgs and sorts them.
|
-- | Evaluates a population of plushies with the error function passed in via PushArgs and sorts them.
|
||||||
@ -32,9 +32,9 @@ updateIndividual errors ind = ind {totalFitness = Just (sum errors), fitnessCase
|
|||||||
-- | The start of the gp loop. Generates the population and then calls
|
-- | The start of the gp loop. Generates the population and then calls
|
||||||
-- gpLoop' with modifications to the variables if needed.
|
-- gpLoop' with modifications to the variables if needed.
|
||||||
gpLoop :: PushArgs -> IO ()
|
gpLoop :: PushArgs -> IO ()
|
||||||
gpLoop pushArgs = do
|
gpLoop pushArgs@(PushArgs {trainingData = tData}) = do
|
||||||
unEvaledPopulation <- generatePopulation pushArgs
|
unEvaledPopulation <- generatePopulation pushArgs
|
||||||
let indexedTrainingData = assignIndiciesToData (trainingData pushArgs)
|
let indexedTrainingData = assignIndiciesToData tData
|
||||||
gpLoop' pushArgs 0 0 unEvaledPopulation indexedTrainingData
|
gpLoop' pushArgs 0 0 unEvaledPopulation indexedTrainingData
|
||||||
|
|
||||||
-- | The guts of the GP loop. Where the work gets done after the initialization happens
|
-- | The guts of the GP loop. Where the work gets done after the initialization happens
|
||||||
|
@ -2,10 +2,12 @@ module HushGP.GP.Downsample where
|
|||||||
|
|
||||||
import System.Random.Shuffle
|
import System.Random.Shuffle
|
||||||
import System.Random
|
import System.Random
|
||||||
|
import Data.List
|
||||||
import HushGP.Genome
|
import HushGP.Genome
|
||||||
import HushGP.GP.PushData
|
import HushGP.GP.PushData
|
||||||
import HushGP.GP.PushArgs
|
import HushGP.GP.PushArgs
|
||||||
import Data.List
|
import HushGP.Tools.Metrics
|
||||||
|
import HushGP.Instructions.Utility
|
||||||
|
|
||||||
-- |Sets the index of the passed training data.
|
-- |Sets the index of the passed training data.
|
||||||
assignIndiciesToData :: [PushData] -> [PushData]
|
assignIndiciesToData :: [PushData] -> [PushData]
|
||||||
@ -21,7 +23,7 @@ updateCaseDistances evaledPop downsampleData trainingData informedDownsamplingTy
|
|||||||
|
|
||||||
-- |Draws a random amount of data points from a passed list of data points.
|
-- |Draws a random amount of data points from a passed list of data points.
|
||||||
selectDownsampleRandom :: PushArgs -> [PushData] -> IO [PushData]
|
selectDownsampleRandom :: PushArgs -> [PushData] -> IO [PushData]
|
||||||
selectDownsampleRandom pushArgs pushData = take (floor (downsampleRate pushArgs * fromIntegral @Int @Float (length pushData))) . shuffle' pushData (length pushData) <$> initStdGen
|
selectDownsampleRandom (PushArgs {downsampleRate = dsRate}) pushData = take (floor (dsRate * fromIntegral @Int @Float (length pushData))) . shuffle' pushData (length pushData) <$> initStdGen
|
||||||
|
|
||||||
-- |Selects a downsample that has it's cases maximally far away by sequentially
|
-- |Selects a downsample that has it's cases maximally far away by sequentially
|
||||||
-- adding cases to the downsample that have their closest case maximally far away.
|
-- adding cases to the downsample that have their closest case maximally far away.
|
||||||
@ -43,5 +45,40 @@ selectDownsampleMaxmin' :: [PushData] -> [PushData] -> Int -> IO [PushData]
|
|||||||
selectDownsampleMaxmin' newDownsample casesToPickFrom goalSize
|
selectDownsampleMaxmin' newDownsample casesToPickFrom goalSize
|
||||||
| length newDownsample >= goalSize = pure newDownsample
|
| length newDownsample >= goalSize = pure newDownsample
|
||||||
| otherwise = do
|
| otherwise = do
|
||||||
minCaseDistances
|
let newDistances = map extractDistance newDownsample
|
||||||
|
let minCaseDistances = minOfColumns (map (\distList -> filterByIndex distList (map extractIndex casesToPickFrom)) newDistances)
|
||||||
|
selectedCaseIndex <- argMax minCaseDistances
|
||||||
|
stdGen <- initStdGen
|
||||||
|
selectDownsampleMaxmin'
|
||||||
|
((casesToPickFrom !! selectedCaseIndex) : newDownsample)
|
||||||
|
(shuffle' (deleteAt selectedCaseIndex casesToPickFrom) (length casesToPickFrom - 1) stdGen)
|
||||||
|
goalSize
|
||||||
|
|
||||||
|
-- |selects a downsample that has it's cases maximally far away by sequentially
|
||||||
|
-- adding cases to the downsample that have their closest case maximally far away
|
||||||
|
-- automatically stops when the maximum minimum distance is below delta
|
||||||
|
selectDownsampleMaxminAdaptive :: PushArgs -> [PushData] -> IO [PushData]
|
||||||
|
selectDownsampleMaxminAdaptive (PushArgs {caseDelta = cDelta}) pushData = do
|
||||||
|
shuffledCases <- shuffle' pushData (length pushData) <$> initStdGen
|
||||||
|
selectDownsampleMaxminAdaptive'
|
||||||
|
(case uncons shuffledCases of (Just (x, _)) -> [x]; _ -> error "error: shuffledCases empty!")
|
||||||
|
(drop 1 shuffledCases)
|
||||||
|
cDelta
|
||||||
|
|
||||||
|
-- |The main loop of selectDownsampleMaxmin. This is where most of calculation happens.
|
||||||
|
-- When called from selectDownsampleMaxmin: The first [PushData] holds the head of the
|
||||||
|
-- original pushData wrapped in a list, the second [PushData] holds the rest of the list
|
||||||
|
-- without the aformentioned head. The Int is the caseDelta derived from the downsample rate
|
||||||
|
-- and the length of the original [pushData].
|
||||||
|
selectDownsampleMaxminAdaptive' :: [PushData] -> [PushData] -> Double -> IO [PushData]
|
||||||
|
selectDownsampleMaxminAdaptive' newDownsample casesToPickFrom cDelta = do
|
||||||
|
let newDistances = map extractDistance newDownsample
|
||||||
|
let minCaseDistances = minOfColumns (map (\distList -> filterByIndex distList (map extractIndex casesToPickFrom)) newDistances)
|
||||||
|
selectedCaseIndex <- argMax minCaseDistances
|
||||||
|
stdGen <- initStdGen
|
||||||
|
if null casesToPickFrom || (maximum minCaseDistances <= cDelta)
|
||||||
|
then pure newDownsample
|
||||||
|
else selectDownsampleMaxminAdaptive'
|
||||||
|
((casesToPickFrom !! selectedCaseIndex) : newDownsample)
|
||||||
|
(shuffle' (deleteAt selectedCaseIndex casesToPickFrom) (length casesToPickFrom - 1) stdGen)
|
||||||
|
cDelta
|
||||||
|
@ -93,7 +93,10 @@ data PushArgs = PushArgs
|
|||||||
-- Takes a Map of String -> Float where the string is the genetic operator
|
-- Takes a Map of String -> Float where the string is the genetic operator
|
||||||
variation :: Map.Map String Float,
|
variation :: Map.Map String Float,
|
||||||
-- | The epsilons calculated for epsilon lexicase selection. Only used for epsilon lexicase selection.
|
-- | The epsilons calculated for epsilon lexicase selection. Only used for epsilon lexicase selection.
|
||||||
epsilons :: Maybe [Double]
|
epsilons :: Maybe [Double],
|
||||||
|
-- | Used with the CaseMaxminAuto downsampling strategy. Tells downsampling to stop when
|
||||||
|
-- the maximum minimum distance is too far away.
|
||||||
|
caseDelta :: Double
|
||||||
}
|
}
|
||||||
|
|
||||||
-- | The default values for which all runs of Hush derive
|
-- | The default values for which all runs of Hush derive
|
||||||
@ -139,5 +142,6 @@ defaultPushArgs = PushArgs {
|
|||||||
trainingData = error "Must supply the trainingData yourself",
|
trainingData = error "Must supply the trainingData yourself",
|
||||||
umadRate = 0.1,
|
umadRate = 0.1,
|
||||||
variation = Map.fromList [("umad", 1.0)],
|
variation = Map.fromList [("umad", 1.0)],
|
||||||
epsilons = Nothing
|
epsilons = Nothing,
|
||||||
|
caseDelta = 0.0
|
||||||
}
|
}
|
||||||
|
@ -12,4 +12,20 @@ data PushData = PushData {
|
|||||||
_caseDistances :: Maybe [Double]
|
_caseDistances :: Maybe [Double]
|
||||||
} deriving (Show)
|
} deriving (Show)
|
||||||
|
|
||||||
|
-- |Extracts the case distances from a PushData object. Errors if the
|
||||||
|
-- _caseDistances list is Nothing.
|
||||||
|
extractDistance :: PushData -> [Double]
|
||||||
|
extractDistance PushData{_caseDistances = Nothing} = error "Error: Case distances are empty!. This should never happen"
|
||||||
|
extractDistance PushData{_caseDistances = Just xs} = xs
|
||||||
|
|
||||||
|
-- |Extracts the downsample index from a PushData object. Errors if the
|
||||||
|
-- _downsampleIndex is Nothing.
|
||||||
|
extractIndex :: PushData -> Int
|
||||||
|
extractIndex PushData{_downsampleIndex = Nothing} = error "Error: Case distances are empty!. This should never happen"
|
||||||
|
extractIndex PushData{_downsampleIndex = Just x} = x
|
||||||
|
|
||||||
|
-- |Filters a list by another list of indicies.
|
||||||
|
filterByIndex :: [a] -> [Int] -> [a]
|
||||||
|
filterByIndex origList = map (origList !!)
|
||||||
|
|
||||||
$(makeLenses ''PushData)
|
$(makeLenses ''PushData)
|
||||||
|
@ -1,3 +1,20 @@
|
|||||||
module HushGP.Tools.Metrics where
|
module HushGP.Tools.Metrics where
|
||||||
|
|
||||||
|
import Data.List
|
||||||
|
import System.Random
|
||||||
|
import System.Random.Shuffle
|
||||||
|
|
||||||
|
-- |Maps minimum over the transposed [[Double]].
|
||||||
|
minOfColumns :: [[Double]] -> [Double]
|
||||||
|
minOfColumns columns = map minimum (transpose columns)
|
||||||
|
|
||||||
|
-- |Returns the index of the maximum value in a list, randomly tiebreaking.
|
||||||
|
argMax :: Ord a => [a] -> IO Int
|
||||||
|
argMax xs = argMaxHead . shuffle' (elemIndices (maximum xs) xs) (length xs) <$> initStdGen
|
||||||
|
|
||||||
|
-- |Takes the first element from a list and returns an error as specified. For use with
|
||||||
|
-- the argMax function.
|
||||||
|
argMaxHead :: [a] -> a
|
||||||
|
argMaxHead xs = case uncons xs of
|
||||||
|
Just (x, _) -> x
|
||||||
|
_ -> error "Error: Head is empty in argMax!"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user