downsample maxmins done

This commit is contained in:
Rowan Torbitzky-Lane 2025-02-27 20:55:00 -06:00
parent 4aa8aa9f2a
commit 5f8f0db1c6
5 changed files with 84 additions and 10 deletions

View File

@ -15,8 +15,8 @@ import HushGP.Genome
-- | Using a PushArgs object, generates a population of the specified size with the -- | Using a PushArgs object, generates a population of the specified size with the
-- specified instructions in parallel. -- specified instructions in parallel.
generatePopulation :: PushArgs -> IO [Individual] generatePopulation :: PushArgs -> IO [Individual]
generatePopulation pushArgs = do generatePopulation pushArgs@(PushArgs {populationSize = popSize}) = do
pop <- replicateM (populationSize pushArgs) (makeRandomIndividual pushArgs) pop <- replicateM popSize (makeRandomIndividual pushArgs)
return (pop `using` evalList rpar) -- Does this work? Need to test this with the HEC viewing tool. return (pop `using` evalList rpar) -- Does this work? Need to test this with the HEC viewing tool.
-- | Evaluates a population of plushies with the error function passed in via PushArgs and sorts them. -- | Evaluates a population of plushies with the error function passed in via PushArgs and sorts them.
@ -32,9 +32,9 @@ updateIndividual errors ind = ind {totalFitness = Just (sum errors), fitnessCase
-- | The start of the gp loop. Generates the population and then calls -- | The start of the gp loop. Generates the population and then calls
-- gpLoop' with modifications to the variables if needed. -- gpLoop' with modifications to the variables if needed.
gpLoop :: PushArgs -> IO () gpLoop :: PushArgs -> IO ()
gpLoop pushArgs = do gpLoop pushArgs@(PushArgs {trainingData = tData}) = do
unEvaledPopulation <- generatePopulation pushArgs unEvaledPopulation <- generatePopulation pushArgs
let indexedTrainingData = assignIndiciesToData (trainingData pushArgs) let indexedTrainingData = assignIndiciesToData tData
gpLoop' pushArgs 0 0 unEvaledPopulation indexedTrainingData gpLoop' pushArgs 0 0 unEvaledPopulation indexedTrainingData
-- | The guts of the GP loop. Where the work gets done after the initialization happens -- | The guts of the GP loop. Where the work gets done after the initialization happens

View File

@ -2,10 +2,12 @@ module HushGP.GP.Downsample where
import System.Random.Shuffle import System.Random.Shuffle
import System.Random import System.Random
import Data.List
import HushGP.Genome import HushGP.Genome
import HushGP.GP.PushData import HushGP.GP.PushData
import HushGP.GP.PushArgs import HushGP.GP.PushArgs
import Data.List import HushGP.Tools.Metrics
import HushGP.Instructions.Utility
-- |Sets the index of the passed training data. -- |Sets the index of the passed training data.
assignIndiciesToData :: [PushData] -> [PushData] assignIndiciesToData :: [PushData] -> [PushData]
@ -21,7 +23,7 @@ updateCaseDistances evaledPop downsampleData trainingData informedDownsamplingTy
-- |Draws a random amount of data points from a passed list of data points. -- |Draws a random amount of data points from a passed list of data points.
selectDownsampleRandom :: PushArgs -> [PushData] -> IO [PushData] selectDownsampleRandom :: PushArgs -> [PushData] -> IO [PushData]
selectDownsampleRandom pushArgs pushData = take (floor (downsampleRate pushArgs * fromIntegral @Int @Float (length pushData))) . shuffle' pushData (length pushData) <$> initStdGen selectDownsampleRandom (PushArgs {downsampleRate = dsRate}) pushData = take (floor (dsRate * fromIntegral @Int @Float (length pushData))) . shuffle' pushData (length pushData) <$> initStdGen
-- |Selects a downsample that has it's cases maximally far away by sequentially -- |Selects a downsample that has it's cases maximally far away by sequentially
-- adding cases to the downsample that have their closest case maximally far away. -- adding cases to the downsample that have their closest case maximally far away.
@ -43,5 +45,40 @@ selectDownsampleMaxmin' :: [PushData] -> [PushData] -> Int -> IO [PushData]
selectDownsampleMaxmin' newDownsample casesToPickFrom goalSize selectDownsampleMaxmin' newDownsample casesToPickFrom goalSize
| length newDownsample >= goalSize = pure newDownsample | length newDownsample >= goalSize = pure newDownsample
| otherwise = do | otherwise = do
minCaseDistances let newDistances = map extractDistance newDownsample
let minCaseDistances = minOfColumns (map (\distList -> filterByIndex distList (map extractIndex casesToPickFrom)) newDistances)
selectedCaseIndex <- argMax minCaseDistances
stdGen <- initStdGen
selectDownsampleMaxmin'
((casesToPickFrom !! selectedCaseIndex) : newDownsample)
(shuffle' (deleteAt selectedCaseIndex casesToPickFrom) (length casesToPickFrom - 1) stdGen)
goalSize
-- |selects a downsample that has it's cases maximally far away by sequentially
-- adding cases to the downsample that have their closest case maximally far away
-- automatically stops when the maximum minimum distance is below delta
selectDownsampleMaxminAdaptive :: PushArgs -> [PushData] -> IO [PushData]
selectDownsampleMaxminAdaptive (PushArgs {caseDelta = cDelta}) pushData = do
shuffledCases <- shuffle' pushData (length pushData) <$> initStdGen
selectDownsampleMaxminAdaptive'
(case uncons shuffledCases of (Just (x, _)) -> [x]; _ -> error "error: shuffledCases empty!")
(drop 1 shuffledCases)
cDelta
-- |The main loop of selectDownsampleMaxmin. This is where most of calculation happens.
-- When called from selectDownsampleMaxmin: The first [PushData] holds the head of the
-- original pushData wrapped in a list, the second [PushData] holds the rest of the list
-- without the aformentioned head. The Int is the caseDelta derived from the downsample rate
-- and the length of the original [pushData].
selectDownsampleMaxminAdaptive' :: [PushData] -> [PushData] -> Double -> IO [PushData]
selectDownsampleMaxminAdaptive' newDownsample casesToPickFrom cDelta = do
let newDistances = map extractDistance newDownsample
let minCaseDistances = minOfColumns (map (\distList -> filterByIndex distList (map extractIndex casesToPickFrom)) newDistances)
selectedCaseIndex <- argMax minCaseDistances
stdGen <- initStdGen
if null casesToPickFrom || (maximum minCaseDistances <= cDelta)
then pure newDownsample
else selectDownsampleMaxminAdaptive'
((casesToPickFrom !! selectedCaseIndex) : newDownsample)
(shuffle' (deleteAt selectedCaseIndex casesToPickFrom) (length casesToPickFrom - 1) stdGen)
cDelta

View File

@ -93,7 +93,10 @@ data PushArgs = PushArgs
-- Takes a Map of String -> Float where the string is the genetic operator -- Takes a Map of String -> Float where the string is the genetic operator
variation :: Map.Map String Float, variation :: Map.Map String Float,
-- | The epsilons calculated for epsilon lexicase selection. Only used for epsilon lexicase selection. -- | The epsilons calculated for epsilon lexicase selection. Only used for epsilon lexicase selection.
epsilons :: Maybe [Double] epsilons :: Maybe [Double],
-- | Used with the CaseMaxminAuto downsampling strategy. Tells downsampling to stop when
-- the maximum minimum distance is too far away.
caseDelta :: Double
} }
-- | The default values for which all runs of Hush derive -- | The default values for which all runs of Hush derive
@ -139,5 +142,6 @@ defaultPushArgs = PushArgs {
trainingData = error "Must supply the trainingData yourself", trainingData = error "Must supply the trainingData yourself",
umadRate = 0.1, umadRate = 0.1,
variation = Map.fromList [("umad", 1.0)], variation = Map.fromList [("umad", 1.0)],
epsilons = Nothing epsilons = Nothing,
caseDelta = 0.0
} }

View File

@ -12,4 +12,20 @@ data PushData = PushData {
_caseDistances :: Maybe [Double] _caseDistances :: Maybe [Double]
} deriving (Show) } deriving (Show)
-- |Extracts the case distances from a PushData object. Errors if the
-- _caseDistances list is Nothing.
extractDistance :: PushData -> [Double]
extractDistance PushData{_caseDistances = Nothing} = error "Error: Case distances are empty!. This should never happen"
extractDistance PushData{_caseDistances = Just xs} = xs
-- |Extracts the downsample index from a PushData object. Errors if the
-- _downsampleIndex is Nothing.
extractIndex :: PushData -> Int
extractIndex PushData{_downsampleIndex = Nothing} = error "Error: Case distances are empty!. This should never happen"
extractIndex PushData{_downsampleIndex = Just x} = x
-- |Filters a list by another list of indicies.
filterByIndex :: [a] -> [Int] -> [a]
filterByIndex origList = map (origList !!)
$(makeLenses ''PushData) $(makeLenses ''PushData)

View File

@ -1,3 +1,20 @@
module HushGP.Tools.Metrics where module HushGP.Tools.Metrics where
import Data.List
import System.Random
import System.Random.Shuffle
-- |Maps minimum over the transposed [[Double]].
minOfColumns :: [[Double]] -> [Double]
minOfColumns columns = map minimum (transpose columns)
-- |Returns the index of the maximum value in a list, randomly tiebreaking.
argMax :: Ord a => [a] -> IO Int
argMax xs = argMaxHead . shuffle' (elemIndices (maximum xs) xs) (length xs) <$> initStdGen
-- |Takes the first element from a list and returns an error as specified. For use with
-- the argMax function.
argMaxHead :: [a] -> a
argMaxHead xs = case uncons xs of
Just (x, _) -> x
_ -> error "Error: Head is empty in argMax!"