downsample maxmins done
This commit is contained in:
parent
4aa8aa9f2a
commit
5f8f0db1c6
@ -15,8 +15,8 @@ import HushGP.Genome
|
||||
-- | Using a PushArgs object, generates a population of the specified size with the
|
||||
-- specified instructions in parallel.
|
||||
generatePopulation :: PushArgs -> IO [Individual]
|
||||
generatePopulation pushArgs = do
|
||||
pop <- replicateM (populationSize pushArgs) (makeRandomIndividual pushArgs)
|
||||
generatePopulation pushArgs@(PushArgs {populationSize = popSize}) = do
|
||||
pop <- replicateM popSize (makeRandomIndividual pushArgs)
|
||||
return (pop `using` evalList rpar) -- Does this work? Need to test this with the HEC viewing tool.
|
||||
|
||||
-- | Evaluates a population of plushies with the error function passed in via PushArgs and sorts them.
|
||||
@ -32,9 +32,9 @@ updateIndividual errors ind = ind {totalFitness = Just (sum errors), fitnessCase
|
||||
-- | The start of the gp loop. Generates the population and then calls
|
||||
-- gpLoop' with modifications to the variables if needed.
|
||||
gpLoop :: PushArgs -> IO ()
|
||||
gpLoop pushArgs = do
|
||||
gpLoop pushArgs@(PushArgs {trainingData = tData}) = do
|
||||
unEvaledPopulation <- generatePopulation pushArgs
|
||||
let indexedTrainingData = assignIndiciesToData (trainingData pushArgs)
|
||||
let indexedTrainingData = assignIndiciesToData tData
|
||||
gpLoop' pushArgs 0 0 unEvaledPopulation indexedTrainingData
|
||||
|
||||
-- | The guts of the GP loop. Where the work gets done after the initialization happens
|
||||
|
@ -2,10 +2,12 @@ module HushGP.GP.Downsample where
|
||||
|
||||
import System.Random.Shuffle
|
||||
import System.Random
|
||||
import Data.List
|
||||
import HushGP.Genome
|
||||
import HushGP.GP.PushData
|
||||
import HushGP.GP.PushArgs
|
||||
import Data.List
|
||||
import HushGP.Tools.Metrics
|
||||
import HushGP.Instructions.Utility
|
||||
|
||||
-- |Sets the index of the passed training data.
|
||||
assignIndiciesToData :: [PushData] -> [PushData]
|
||||
@ -21,7 +23,7 @@ updateCaseDistances evaledPop downsampleData trainingData informedDownsamplingTy
|
||||
|
||||
-- |Draws a random amount of data points from a passed list of data points.
|
||||
selectDownsampleRandom :: PushArgs -> [PushData] -> IO [PushData]
|
||||
selectDownsampleRandom pushArgs pushData = take (floor (downsampleRate pushArgs * fromIntegral @Int @Float (length pushData))) . shuffle' pushData (length pushData) <$> initStdGen
|
||||
selectDownsampleRandom (PushArgs {downsampleRate = dsRate}) pushData = take (floor (dsRate * fromIntegral @Int @Float (length pushData))) . shuffle' pushData (length pushData) <$> initStdGen
|
||||
|
||||
-- |Selects a downsample that has it's cases maximally far away by sequentially
|
||||
-- adding cases to the downsample that have their closest case maximally far away.
|
||||
@ -43,5 +45,40 @@ selectDownsampleMaxmin' :: [PushData] -> [PushData] -> Int -> IO [PushData]
|
||||
selectDownsampleMaxmin' newDownsample casesToPickFrom goalSize
|
||||
| length newDownsample >= goalSize = pure newDownsample
|
||||
| otherwise = do
|
||||
minCaseDistances
|
||||
let newDistances = map extractDistance newDownsample
|
||||
let minCaseDistances = minOfColumns (map (\distList -> filterByIndex distList (map extractIndex casesToPickFrom)) newDistances)
|
||||
selectedCaseIndex <- argMax minCaseDistances
|
||||
stdGen <- initStdGen
|
||||
selectDownsampleMaxmin'
|
||||
((casesToPickFrom !! selectedCaseIndex) : newDownsample)
|
||||
(shuffle' (deleteAt selectedCaseIndex casesToPickFrom) (length casesToPickFrom - 1) stdGen)
|
||||
goalSize
|
||||
|
||||
-- |selects a downsample that has it's cases maximally far away by sequentially
|
||||
-- adding cases to the downsample that have their closest case maximally far away
|
||||
-- automatically stops when the maximum minimum distance is below delta
|
||||
selectDownsampleMaxminAdaptive :: PushArgs -> [PushData] -> IO [PushData]
|
||||
selectDownsampleMaxminAdaptive (PushArgs {caseDelta = cDelta}) pushData = do
|
||||
shuffledCases <- shuffle' pushData (length pushData) <$> initStdGen
|
||||
selectDownsampleMaxminAdaptive'
|
||||
(case uncons shuffledCases of (Just (x, _)) -> [x]; _ -> error "error: shuffledCases empty!")
|
||||
(drop 1 shuffledCases)
|
||||
cDelta
|
||||
|
||||
-- |The main loop of selectDownsampleMaxmin. This is where most of calculation happens.
|
||||
-- When called from selectDownsampleMaxmin: The first [PushData] holds the head of the
|
||||
-- original pushData wrapped in a list, the second [PushData] holds the rest of the list
|
||||
-- without the aformentioned head. The Int is the caseDelta derived from the downsample rate
|
||||
-- and the length of the original [pushData].
|
||||
selectDownsampleMaxminAdaptive' :: [PushData] -> [PushData] -> Double -> IO [PushData]
|
||||
selectDownsampleMaxminAdaptive' newDownsample casesToPickFrom cDelta = do
|
||||
let newDistances = map extractDistance newDownsample
|
||||
let minCaseDistances = minOfColumns (map (\distList -> filterByIndex distList (map extractIndex casesToPickFrom)) newDistances)
|
||||
selectedCaseIndex <- argMax minCaseDistances
|
||||
stdGen <- initStdGen
|
||||
if null casesToPickFrom || (maximum minCaseDistances <= cDelta)
|
||||
then pure newDownsample
|
||||
else selectDownsampleMaxminAdaptive'
|
||||
((casesToPickFrom !! selectedCaseIndex) : newDownsample)
|
||||
(shuffle' (deleteAt selectedCaseIndex casesToPickFrom) (length casesToPickFrom - 1) stdGen)
|
||||
cDelta
|
||||
|
@ -93,7 +93,10 @@ data PushArgs = PushArgs
|
||||
-- Takes a Map of String -> Float where the string is the genetic operator
|
||||
variation :: Map.Map String Float,
|
||||
-- | The epsilons calculated for epsilon lexicase selection. Only used for epsilon lexicase selection.
|
||||
epsilons :: Maybe [Double]
|
||||
epsilons :: Maybe [Double],
|
||||
-- | Used with the CaseMaxminAuto downsampling strategy. Tells downsampling to stop when
|
||||
-- the maximum minimum distance is too far away.
|
||||
caseDelta :: Double
|
||||
}
|
||||
|
||||
-- | The default values for which all runs of Hush derive
|
||||
@ -139,5 +142,6 @@ defaultPushArgs = PushArgs {
|
||||
trainingData = error "Must supply the trainingData yourself",
|
||||
umadRate = 0.1,
|
||||
variation = Map.fromList [("umad", 1.0)],
|
||||
epsilons = Nothing
|
||||
epsilons = Nothing,
|
||||
caseDelta = 0.0
|
||||
}
|
||||
|
@ -12,4 +12,20 @@ data PushData = PushData {
|
||||
_caseDistances :: Maybe [Double]
|
||||
} deriving (Show)
|
||||
|
||||
-- |Extracts the case distances from a PushData object. Errors if the
|
||||
-- _caseDistances list is Nothing.
|
||||
extractDistance :: PushData -> [Double]
|
||||
extractDistance PushData{_caseDistances = Nothing} = error "Error: Case distances are empty!. This should never happen"
|
||||
extractDistance PushData{_caseDistances = Just xs} = xs
|
||||
|
||||
-- |Extracts the downsample index from a PushData object. Errors if the
|
||||
-- _downsampleIndex is Nothing.
|
||||
extractIndex :: PushData -> Int
|
||||
extractIndex PushData{_downsampleIndex = Nothing} = error "Error: Case distances are empty!. This should never happen"
|
||||
extractIndex PushData{_downsampleIndex = Just x} = x
|
||||
|
||||
-- |Filters a list by another list of indicies.
|
||||
filterByIndex :: [a] -> [Int] -> [a]
|
||||
filterByIndex origList = map (origList !!)
|
||||
|
||||
$(makeLenses ''PushData)
|
||||
|
@ -1,3 +1,20 @@
|
||||
module HushGP.Tools.Metrics where
|
||||
|
||||
import Data.List
|
||||
import System.Random
|
||||
import System.Random.Shuffle
|
||||
|
||||
-- |Maps minimum over the transposed [[Double]].
|
||||
minOfColumns :: [[Double]] -> [Double]
|
||||
minOfColumns columns = map minimum (transpose columns)
|
||||
|
||||
-- |Returns the index of the maximum value in a list, randomly tiebreaking.
|
||||
argMax :: Ord a => [a] -> IO Int
|
||||
argMax xs = argMaxHead . shuffle' (elemIndices (maximum xs) xs) (length xs) <$> initStdGen
|
||||
|
||||
-- |Takes the first element from a list and returns an error as specified. For use with
|
||||
-- the argMax function.
|
||||
argMaxHead :: [a] -> a
|
||||
argMaxHead xs = case uncons xs of
|
||||
Just (x, _) -> x
|
||||
_ -> error "Error: Head is empty in argMax!"
|
||||
|
Loading…
x
Reference in New Issue
Block a user