downsample done, need to test it now

This commit is contained in:
Rowan Torbitzky-Lane 2025-03-01 16:20:09 -06:00
parent 92e5443ce5
commit 9c23017976
6 changed files with 57 additions and 20 deletions

View File

@ -17,7 +17,7 @@ assignIndicesToData oldData = zipWith (\dat idx -> dat{_downsampleIndex = Just i
-- |Initializes cases distances for passed training data. -- |Initializes cases distances for passed training data.
initializeCaseDistances :: PushArgs -> [PushData] initializeCaseDistances :: PushArgs -> [PushData]
initializeCaseDistances (PushArgs {trainingData = tData, populationSize = popSize}) = [ dat{_caseDistances = Just (replicate (length tData) popSize)} | dat <- tData ] initializeCaseDistances (PushArgs {trainingData = tData, populationSize = popSize}) = [ dat{_caseDistances = Just (replicate (length tData) (fromIntegral @Int @Double popSize))} | dat <- tData ]
-- |Draws a random amount of data points from a passed list of data points. -- |Draws a random amount of data points from a passed list of data points.
selectDownsampleRandom :: PushArgs -> [PushData] -> IO [PushData] selectDownsampleRandom :: PushArgs -> [PushData] -> IO [PushData]
@ -68,7 +68,7 @@ selectDownsampleMaxminAdaptive (PushArgs {caseDelta = cDelta}) pushData = do
-- original pushData wrapped in a list, the second [PushData] holds the rest of the list -- original pushData wrapped in a list, the second [PushData] holds the rest of the list
-- without the aformentioned head. The Int is the caseDelta derived from the downsample rate -- without the aformentioned head. The Int is the caseDelta derived from the downsample rate
-- and the length of the original [pushData]. -- and the length of the original [pushData].
selectDownsampleMaxminAdaptive' :: [PushData] -> [PushData] -> Int -> IO [PushData] selectDownsampleMaxminAdaptive' :: [PushData] -> [PushData] -> Double -> IO [PushData]
selectDownsampleMaxminAdaptive' newDownsample casesToPickFrom cDelta = do selectDownsampleMaxminAdaptive' newDownsample casesToPickFrom cDelta = do
let newDistances = map extractDistance newDownsample let newDistances = map extractDistance newDownsample
let minCaseDistances = minOfColumns (map (\distList -> filterByIndex distList (map extractIndex casesToPickFrom)) newDistances) let minCaseDistances = minOfColumns (map (\distList -> filterByIndex distList (map extractIndex casesToPickFrom)) newDistances)
@ -82,24 +82,24 @@ selectDownsampleMaxminAdaptive' newDownsample casesToPickFrom cDelta = do
cDelta cDelta
-- |Returns the distance between two cases given a list of individual error vectors, and the index these -- |Returns the distance between two cases given a list of individual error vectors, and the index these
-- cases exist in the error vector. Only makes the distinction between zero and nonzero errors" -- cases exist in the error vector. Only makes the distinction between zero and nonzero errors.
getDistanceBetweenCases :: [[Int]] -> Int -> Int -> Int getDistanceBetweenCases :: [[Double]] -> Int -> Int -> Double
getDistanceBetweenCases errorLists caseIndex0 caseIndex1 = getDistanceBetweenCases errorLists caseIndex0 caseIndex1 =
if lhe < caseIndex0 || lhe < caseIndex1 || caseIndex0 < 0 || caseIndex1 < 0 if lhe < caseIndex0 || lhe < caseIndex1 || caseIndex0 < 0 || caseIndex1 < 0
then length errorLists then fromIntegral @Int @Double $ length errorLists
else sum $ zipWith (\e0 e1 -> abs $ abs (signum e0) - abs (signum e1)) errors0 errors1 else sum $ zipWith (\e0 e1 -> abs $ abs (signum e0) - abs (signum e1)) errors0 errors1
where where
lhe :: Int lhe :: Int -- length head errorLists
lhe = length $ case uncons errorLists of Just (x, _) -> x; _ -> error "Error: errorLists is empty!" lhe = length $ case uncons errorLists of Just (x, _) -> x; _ -> error "Error: errorLists is empty!"
errors0 :: [Int] errors0 :: [Double]
errors0 = map (!! caseIndex0) errorLists errors0 = map (\e -> case e !? caseIndex0 of Just x -> x; _ -> 0) errorLists
errors1 :: [Int] errors1 :: [Double]
errors1 = map (!! caseIndex1) errorLists errors1 = map (\e -> case e !? caseIndex1 of Just x -> x; _ -> 0) errorLists
-- |Updates a list with the values from another list based on an index from a third list. -- |Updates a list with the values from another list based on an index from a third list.
-- The first list (bigList) has its indices updated with the values from the second list (smallList) -- The first list (bigList) has its indices updated with the values from the second list (smallList)
-- per index notated in the third [Int] list. -- per index notated in the third [Int] list.
updateAtIndices :: [Int] -> [Int] -> [Int] -> [Int] updateAtIndices :: [a] -> [a] -> [Int] -> [a]
updateAtIndices bigList _ [] = bigList updateAtIndices bigList _ [] = bigList
updateAtIndices bigList smallList indices = updateAtIndices bigList smallList indices =
if length smallList /= length indices || any (\x -> x < 0 || x >= length bigList) indices if length smallList /= length indices || any (\x -> x < 0 || x >= length bigList) indices
@ -118,9 +118,39 @@ updateAtIndices' bigList (sval:svals) (idx:idxs) = updateAtIndices' (replaceAt i
mergePushDataListsAtIndex :: [PushData] -> [PushData] -> [PushData] mergePushDataListsAtIndex :: [PushData] -> [PushData] -> [PushData]
mergePushDataListsAtIndex bigList smallList = map (\x -> let correspondingSmall = find (\y -> extractIndex x == extractIndex y) smallList in fromMaybe x correspondingSmall) bigList mergePushDataListsAtIndex bigList smallList = map (\x -> let correspondingSmall = find (\y -> extractIndex x == extractIndex y) smallList in fromMaybe x correspondingSmall) bigList
-- |Replaces all ints of a list that equal the minimum int in said same list with 0.
replaceMinsWithZero :: (Num a, Ord a) => [a] -> [a]
replaceMinsWithZero xs = map (\x -> if minimum xs == x then 0 else x) xs
-- |Replaces values within a delta of zero with zero, mainly used for regression problems.
replaceCloseZeroWithZero :: (Num a, Ord a) => a -> [a] -> [a]
replaceCloseZeroWithZero delta = map (\x -> if delta >= x then 0 else x)
-- |Converts a set of errors into a list where all the elite errors are replaced with 0s so that we can use
-- it in the selection of downsamples with elite/not-elite selection.
convertToEliteError :: forall a. (Num a, Ord a) => [[a]] -> [[a]]
convertToEliteError = map (replaceMinsWithZero @a) -- crazy lambda reduction. Is it worth it here?
-- |Converts a set of errors into a list where all of the errors are replaced with within a delta.
convertToSoftError :: forall a. (Num a, Ord a) => a -> [[a]] -> [[a]]
convertToSoftError delta = map (delta `replaceCloseZeroWithZero`)
-- |Updates the cases distances when downsampling. -- |Updates the cases distances when downsampling.
updateCaseDistances :: [Individual] -> [PushData] -> [PushData] -> String -> Double -> [PushData] updateCaseDistances :: [Individual] -> [PushData] -> [PushData] -> String -> Double -> [PushData]
updateCaseDistances evaledPop downsampleData trainingData informedDownsamplingType solutionThreshold = undefined updateCaseDistances evaledPop downsampleData trainData informedDownsamplingType solutionThreshold =
mergePushDataListsAtIndex trainData
(mapIndexed (\idx dCase -> dCase{_caseDistances = Just (updateAtIndices (extractDistance dCase)
(map (getDistanceBetweenCases corrErrors idx) [0..(length dsIndicies - 1)]) dsIndicies)}) downsampleData)
where
dsIndicies :: [Int]
dsIndicies = map extractIndex downsampleData
errors :: [[Double]]
errors = map extractFitnessCases evaledPop
corrErrors :: [[Double]]
corrErrors = case informedDownsamplingType of
"elite" -> convertToEliteError errors
"soft" -> convertToSoftError solutionThreshold errors
_ -> errors
-- map (\other -> getDistanceBetweenCases [[0,0],[0,0]] 0 other) [0..(length [3,4] - 1)] -- map (\other -> getDistanceBetweenCases [[0,0],[0,0]] 0 other) [0..(length [3,4] - 1)]

View File

@ -96,7 +96,7 @@ data PushArgs = PushArgs
epsilons :: Maybe [Double], epsilons :: Maybe [Double],
-- | Used with the CaseMaxminAuto downsampling strategy. Tells downsampling to stop when -- | Used with the CaseMaxminAuto downsampling strategy. Tells downsampling to stop when
-- the maximum minimum distance is too far away. -- the maximum minimum distance is too far away.
caseDelta :: Int caseDelta :: Double
} }
-- | The default values for which all runs of Hush derive -- | The default values for which all runs of Hush derive

View File

@ -9,12 +9,17 @@ data PushData = PushData {
_inputData :: [Gene], _inputData :: [Gene],
_outputData :: Gene, _outputData :: Gene,
_downsampleIndex :: Maybe Int, _downsampleIndex :: Maybe Int,
_caseDistances :: Maybe [Int] _caseDistances :: Maybe [Double]
} deriving (Show) } deriving (Show)
-- |Extracts any value from a list [PushData] based on their accessor and places the result back
-- into a list.
extractField :: Lens' PushData a -> [PushData] -> [a]
extractField accessor pushData = [ view accessor dataPoint | dataPoint <- pushData ]
-- |Extracts the case distances from a PushData object. Errors if the -- |Extracts the case distances from a PushData object. Errors if the
-- _caseDistances list is Nothing. -- _caseDistances list is Nothing.
extractDistance :: PushData -> [Int] extractDistance :: PushData -> [Double]
extractDistance PushData{_caseDistances = Nothing} = error "Error: Case distances are Nothing!. They should be assigned first!" extractDistance PushData{_caseDistances = Nothing} = error "Error: Case distances are Nothing!. They should be assigned first!"
extractDistance PushData{_caseDistances = Just xs} = xs extractDistance PushData{_caseDistances = Just xs} = xs

View File

@ -23,6 +23,11 @@ data Individual = Individual
instance Ord Individual where instance Ord Individual where
ind0 <= ind1 = totalFitness ind0 <= totalFitness ind1 ind0 <= ind1 = totalFitness ind0 <= totalFitness ind1
-- |Extracts the fitnessCases from an Individual. Errors if the field is empty.
extractFitnessCases :: Individual -> [Double]
extractFitnessCases Individual{fitnessCases = Nothing} = error "Error: fitnessCases is empty!"
extractFitnessCases Individual{fitnessCases = Just xs} = xs
-- | Makes a random individual based on the variables in a passed PushArgs. -- | Makes a random individual based on the variables in a passed PushArgs.
makeRandomIndividual :: PushArgs -> IO Individual makeRandomIndividual :: PushArgs -> IO Individual
makeRandomIndividual pushArgs = do makeRandomIndividual pushArgs = do
@ -90,7 +95,7 @@ plushyToPush' openPlushy push
firstPlushy = firstPlushy =
case uncons openPlushy of case uncons openPlushy of
Just (g, _) -> g Just (g, _) -> g
_ -> error "This shouldn't happen" _ -> error "Error: First plushy taken when no plushy available!"
postOpen :: [Gene] postOpen :: [Gene]
postOpen = reverse (takeWhile (not . isOpen) (reverse push)) postOpen = reverse (takeWhile (not . isOpen) (reverse push))
openIndex :: Int openIndex :: Int

View File

@ -82,9 +82,6 @@ loadState :: [Gene] -> [Gene] -> State
loadState plushy vals = loadState plushy vals =
(loadProgram (plushyToPush plushy) emptyState){_input = Map.fromList (zip [0..] vals)} (loadProgram (plushyToPush plushy) emptyState){_input = Map.fromList (zip [0..] vals)}
extractField :: Lens' PushData a -> [PushData] -> [a]
extractField accessor pushData = [ view accessor dataPoint | dataPoint <- pushData ]
-- | The error function for a single set of inputs and outputs. -- | The error function for a single set of inputs and outputs.
intErrorFunction :: PushArgs -> [PushData] -> [Gene] -> [Double] intErrorFunction :: PushArgs -> [PushData] -> [Gene] -> [Double]
intErrorFunction _args pushData plushy = intErrorFunction _args pushData plushy =

View File

@ -5,7 +5,7 @@ import System.Random
import System.Random.Shuffle import System.Random.Shuffle
-- |Maps minimum over the transposed [[Double]]. -- |Maps minimum over the transposed [[Double]].
minOfColumns :: [[Int]] -> [Int] minOfColumns :: (Num a, Ord a) => [[a]] -> [a]
minOfColumns columns = map minimum (transpose columns) minOfColumns columns = map minimum (transpose columns)
-- |Returns the index of the maximum value in a list, randomly tiebreaking. -- |Returns the index of the maximum value in a list, randomly tiebreaking.