downsample maxmins done

This commit is contained in:
Rowan Torbitzky-Lane 2025-02-27 20:55:00 -06:00
parent 4aa8aa9f2a
commit 5f8f0db1c6
5 changed files with 84 additions and 10 deletions

View File

@ -15,8 +15,8 @@ import HushGP.Genome
-- | Using a PushArgs object, generates a population of the specified size with the
-- specified instructions in parallel.
generatePopulation :: PushArgs -> IO [Individual]
generatePopulation pushArgs = do
pop <- replicateM (populationSize pushArgs) (makeRandomIndividual pushArgs)
generatePopulation pushArgs@(PushArgs {populationSize = popSize}) = do
pop <- replicateM popSize (makeRandomIndividual pushArgs)
return (pop `using` evalList rpar) -- Does this work? Need to test this with the HEC viewing tool.
-- | Evaluates a population of plushies with the error function passed in via PushArgs and sorts them.
@ -32,9 +32,9 @@ updateIndividual errors ind = ind {totalFitness = Just (sum errors), fitnessCase
-- | The start of the gp loop. Generates the population and then calls
-- gpLoop' with modifications to the variables if needed.
gpLoop :: PushArgs -> IO ()
gpLoop pushArgs = do
gpLoop pushArgs@(PushArgs {trainingData = tData}) = do
unEvaledPopulation <- generatePopulation pushArgs
let indexedTrainingData = assignIndiciesToData (trainingData pushArgs)
let indexedTrainingData = assignIndiciesToData tData
gpLoop' pushArgs 0 0 unEvaledPopulation indexedTrainingData
-- | The guts of the GP loop. Where the work gets done after the initialization happens

View File

@ -2,10 +2,12 @@ module HushGP.GP.Downsample where
import System.Random.Shuffle
import System.Random
import Data.List
import HushGP.Genome
import HushGP.GP.PushData
import HushGP.GP.PushArgs
import Data.List
import HushGP.Tools.Metrics
import HushGP.Instructions.Utility
-- |Sets the index of the passed training data.
assignIndiciesToData :: [PushData] -> [PushData]
@ -21,7 +23,7 @@ updateCaseDistances evaledPop downsampleData trainingData informedDownsamplingTy
-- |Draws a random amount of data points from a passed list of data points.
selectDownsampleRandom :: PushArgs -> [PushData] -> IO [PushData]
selectDownsampleRandom pushArgs pushData = take (floor (downsampleRate pushArgs * fromIntegral @Int @Float (length pushData))) . shuffle' pushData (length pushData) <$> initStdGen
selectDownsampleRandom (PushArgs {downsampleRate = dsRate}) pushData = take (floor (dsRate * fromIntegral @Int @Float (length pushData))) . shuffle' pushData (length pushData) <$> initStdGen
-- |Selects a downsample that has it's cases maximally far away by sequentially
-- adding cases to the downsample that have their closest case maximally far away.
@ -43,5 +45,40 @@ selectDownsampleMaxmin' :: [PushData] -> [PushData] -> Int -> IO [PushData]
selectDownsampleMaxmin' newDownsample casesToPickFrom goalSize
| length newDownsample >= goalSize = pure newDownsample
| otherwise = do
minCaseDistances
let newDistances = map extractDistance newDownsample
let minCaseDistances = minOfColumns (map (\distList -> filterByIndex distList (map extractIndex casesToPickFrom)) newDistances)
selectedCaseIndex <- argMax minCaseDistances
stdGen <- initStdGen
selectDownsampleMaxmin'
((casesToPickFrom !! selectedCaseIndex) : newDownsample)
(shuffle' (deleteAt selectedCaseIndex casesToPickFrom) (length casesToPickFrom - 1) stdGen)
goalSize
-- |selects a downsample that has it's cases maximally far away by sequentially
-- adding cases to the downsample that have their closest case maximally far away
-- automatically stops when the maximum minimum distance is below delta
selectDownsampleMaxminAdaptive :: PushArgs -> [PushData] -> IO [PushData]
selectDownsampleMaxminAdaptive (PushArgs {caseDelta = cDelta}) pushData = do
shuffledCases <- shuffle' pushData (length pushData) <$> initStdGen
selectDownsampleMaxminAdaptive'
(case uncons shuffledCases of (Just (x, _)) -> [x]; _ -> error "error: shuffledCases empty!")
(drop 1 shuffledCases)
cDelta
-- |The main loop of selectDownsampleMaxmin. This is where most of calculation happens.
-- When called from selectDownsampleMaxmin: The first [PushData] holds the head of the
-- original pushData wrapped in a list, the second [PushData] holds the rest of the list
-- without the aformentioned head. The Int is the caseDelta derived from the downsample rate
-- and the length of the original [pushData].
selectDownsampleMaxminAdaptive' :: [PushData] -> [PushData] -> Double -> IO [PushData]
selectDownsampleMaxminAdaptive' newDownsample casesToPickFrom cDelta = do
let newDistances = map extractDistance newDownsample
let minCaseDistances = minOfColumns (map (\distList -> filterByIndex distList (map extractIndex casesToPickFrom)) newDistances)
selectedCaseIndex <- argMax minCaseDistances
stdGen <- initStdGen
if null casesToPickFrom || (maximum minCaseDistances <= cDelta)
then pure newDownsample
else selectDownsampleMaxminAdaptive'
((casesToPickFrom !! selectedCaseIndex) : newDownsample)
(shuffle' (deleteAt selectedCaseIndex casesToPickFrom) (length casesToPickFrom - 1) stdGen)
cDelta

View File

@ -93,7 +93,10 @@ data PushArgs = PushArgs
-- Takes a Map of String -> Float where the string is the genetic operator
variation :: Map.Map String Float,
-- | The epsilons calculated for epsilon lexicase selection. Only used for epsilon lexicase selection.
epsilons :: Maybe [Double]
epsilons :: Maybe [Double],
-- | Used with the CaseMaxminAuto downsampling strategy. Tells downsampling to stop when
-- the maximum minimum distance is too far away.
caseDelta :: Double
}
-- | The default values for which all runs of Hush derive
@ -139,5 +142,6 @@ defaultPushArgs = PushArgs {
trainingData = error "Must supply the trainingData yourself",
umadRate = 0.1,
variation = Map.fromList [("umad", 1.0)],
epsilons = Nothing
epsilons = Nothing,
caseDelta = 0.0
}

View File

@ -12,4 +12,20 @@ data PushData = PushData {
_caseDistances :: Maybe [Double]
} deriving (Show)
-- |Extracts the case distances from a PushData object. Errors if the
-- _caseDistances list is Nothing.
extractDistance :: PushData -> [Double]
extractDistance PushData{_caseDistances = Nothing} = error "Error: Case distances are empty!. This should never happen"
extractDistance PushData{_caseDistances = Just xs} = xs
-- |Extracts the downsample index from a PushData object. Errors if the
-- _downsampleIndex is Nothing.
extractIndex :: PushData -> Int
extractIndex PushData{_downsampleIndex = Nothing} = error "Error: Case distances are empty!. This should never happen"
extractIndex PushData{_downsampleIndex = Just x} = x
-- |Filters a list by another list of indicies.
filterByIndex :: [a] -> [Int] -> [a]
filterByIndex origList = map (origList !!)
$(makeLenses ''PushData)

View File

@ -1,3 +1,20 @@
module HushGP.Tools.Metrics where
import Data.List
import System.Random
import System.Random.Shuffle
-- |Maps minimum over the transposed [[Double]].
minOfColumns :: [[Double]] -> [Double]
minOfColumns columns = map minimum (transpose columns)
-- |Returns the index of the maximum value in a list, randomly tiebreaking.
argMax :: Ord a => [a] -> IO Int
argMax xs = argMaxHead . shuffle' (elemIndices (maximum xs) xs) (length xs) <$> initStdGen
-- |Takes the first element from a list and returns an error as specified. For use with
-- the argMax function.
argMaxHead :: [a] -> a
argMaxHead xs = case uncons xs of
Just (x, _) -> x
_ -> error "Error: Head is empty in argMax!"