progress on getDistancesBetweenCases

This commit is contained in:
Rowan Torbitzky-Lane 2025-02-27 22:37:45 -06:00
parent 5f8f0db1c6
commit 720c8296d2
4 changed files with 19 additions and 7 deletions

View File

@ -15,9 +15,9 @@ assignIndiciesToData oldData = zipWith (\dat idx -> dat{_downsampleIndex = Just
-- |Initializes cases distances for passed training data. -- |Initializes cases distances for passed training data.
initializeCaseDistances :: PushArgs -> [PushData] initializeCaseDistances :: PushArgs -> [PushData]
initializeCaseDistances (PushArgs {trainingData = tData, populationSize = popSize}) = [ dat{_caseDistances = Just (replicate (length tData) (fromIntegral @Int @Double popSize))} | dat <- tData ] initializeCaseDistances (PushArgs {trainingData = tData, populationSize = popSize}) = [ dat{_caseDistances = Just (replicate (length tData) popSize)} | dat <- tData ]
-- |Updates the cases distances when downsampling -- |Updates the cases distances when downsampling.
updateCaseDistances :: [Individual] -> [PushData] -> [PushData] -> String -> Double -> [PushData] updateCaseDistances :: [Individual] -> [PushData] -> [PushData] -> String -> Double -> [PushData]
updateCaseDistances evaledPop downsampleData trainingData informedDownsamplingType solutionThreshold = undefined updateCaseDistances evaledPop downsampleData trainingData informedDownsamplingType solutionThreshold = undefined
@ -70,7 +70,7 @@ selectDownsampleMaxminAdaptive (PushArgs {caseDelta = cDelta}) pushData = do
-- original pushData wrapped in a list, the second [PushData] holds the rest of the list -- original pushData wrapped in a list, the second [PushData] holds the rest of the list
-- without the aformentioned head. The Int is the caseDelta derived from the downsample rate -- without the aformentioned head. The Int is the caseDelta derived from the downsample rate
-- and the length of the original [pushData]. -- and the length of the original [pushData].
selectDownsampleMaxminAdaptive' :: [PushData] -> [PushData] -> Double -> IO [PushData] selectDownsampleMaxminAdaptive' :: [PushData] -> [PushData] -> Int -> IO [PushData]
selectDownsampleMaxminAdaptive' newDownsample casesToPickFrom cDelta = do selectDownsampleMaxminAdaptive' newDownsample casesToPickFrom cDelta = do
let newDistances = map extractDistance newDownsample let newDistances = map extractDistance newDownsample
let minCaseDistances = minOfColumns (map (\distList -> filterByIndex distList (map extractIndex casesToPickFrom)) newDistances) let minCaseDistances = minOfColumns (map (\distList -> filterByIndex distList (map extractIndex casesToPickFrom)) newDistances)
@ -82,3 +82,15 @@ selectDownsampleMaxminAdaptive' newDownsample casesToPickFrom cDelta = do
((casesToPickFrom !! selectedCaseIndex) : newDownsample) ((casesToPickFrom !! selectedCaseIndex) : newDownsample)
(shuffle' (deleteAt selectedCaseIndex casesToPickFrom) (length casesToPickFrom - 1) stdGen) (shuffle' (deleteAt selectedCaseIndex casesToPickFrom) (length casesToPickFrom - 1) stdGen)
cDelta cDelta
-- |Returns the distance between two cases given a list of individual error vectors, and the index these
-- cases exist in the error vector. Only makes the distinction between zero and nonzero errors"
getDistanceBetweenCases :: [[Int]] -> Int -> Int -> Int
getDistanceBetweenCases errorLists caseIndex0 caseIndex1 =
if lhe < caseIndex0 || lhe < caseIndex1 || caseIndex0 < 0 || caseIndex1 < 0
then length errorLists
else undefined
where
lhe :: Int
lhe = length $ head errorLists
errors0 :: [Int]
errors0 = map (\lst -> lst !! caseIndex0) errorLists

View File

@ -96,7 +96,7 @@ data PushArgs = PushArgs
epsilons :: Maybe [Double], epsilons :: Maybe [Double],
-- | Used with the CaseMaxminAuto downsampling strategy. Tells downsampling to stop when -- | Used with the CaseMaxminAuto downsampling strategy. Tells downsampling to stop when
-- the maximum minimum distance is too far away. -- the maximum minimum distance is too far away.
caseDelta :: Double caseDelta :: Int
} }
-- | The default values for which all runs of Hush derive -- | The default values for which all runs of Hush derive

View File

@ -9,12 +9,12 @@ data PushData = PushData {
_inputData :: [Gene], _inputData :: [Gene],
_outputData :: Gene, _outputData :: Gene,
_downsampleIndex :: Maybe Int, _downsampleIndex :: Maybe Int,
_caseDistances :: Maybe [Double] _caseDistances :: Maybe [Int]
} deriving (Show) } deriving (Show)
-- |Extracts the case distances from a PushData object. Errors if the -- |Extracts the case distances from a PushData object. Errors if the
-- _caseDistances list is Nothing. -- _caseDistances list is Nothing.
extractDistance :: PushData -> [Double] extractDistance :: PushData -> [Int]
extractDistance PushData{_caseDistances = Nothing} = error "Error: Case distances are empty!. This should never happen" extractDistance PushData{_caseDistances = Nothing} = error "Error: Case distances are empty!. This should never happen"
extractDistance PushData{_caseDistances = Just xs} = xs extractDistance PushData{_caseDistances = Just xs} = xs

View File

@ -5,7 +5,7 @@ import System.Random
import System.Random.Shuffle import System.Random.Shuffle
-- |Maps minimum over the transposed [[Double]]. -- |Maps minimum over the transposed [[Double]].
minOfColumns :: [[Double]] -> [Double] minOfColumns :: [[Int]] -> [Int]
minOfColumns columns = map minimum (transpose columns) minOfColumns columns = map minimum (transpose columns)
-- |Returns the index of the maximum value in a list, randomly tiebreaking. -- |Returns the index of the maximum value in a list, randomly tiebreaking.