diff --git a/HushGP.cabal b/HushGP.cabal index bac8d1e..9df2425 100644 --- a/HushGP.cabal +++ b/HushGP.cabal @@ -62,6 +62,8 @@ library , HushGP.PushTests.UtilTests , HushGP.GP , HushGP.GP.PushArgs + , HushGP.GP.Variation + , HushGP.GP.Downsample , HushGP.GP.PushData , HushGP.Problems.IntegerRegression diff --git a/README.md b/README.md index d9e2b3c..44dd86c 100644 --- a/README.md +++ b/README.md @@ -45,3 +45,7 @@ This is clearly not ideal. 4) For the exec stack itself, typeable, data generic, ghc.generic, data.dynamic, heterogeneous lists, etc. could also help, to detect the type of variables at runtime, but I would rather stick to language basics at first. + +## Nix Users + +This took my machine about 2 hours to build the environment after running `nix develop`. diff --git a/TODO.md b/TODO.md index fe3600d..58f001b 100644 --- a/TODO.md +++ b/TODO.md @@ -27,6 +27,7 @@ - I'm only going to implement propeller's :specified version - Is the best according to the papers - [X] Need a NoOp that opens blocks + - [ ] Have a way to balance amount of closes with open blocks - [ ] Need to make genomes serializable (Check pysh json files) - [ ] Add Memory - [ ] Add history stack(s), like a call stack diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..246ac6b --- /dev/null +++ b/flake.lock @@ -0,0 +1,74 @@ +{ + "nodes": { + "flake-parts": { + "inputs": { + "nixpkgs-lib": "nixpkgs-lib" + }, + "locked": { + "lastModified": 1738453229, + "narHash": "sha256-7H9XgNiGLKN1G1CgRh0vUL4AheZSYzPm+zmZ7vxbJdo=", + "owner": "hercules-ci", + "repo": "flake-parts", + "rev": "32ea77a06711b758da0ad9bd6a844c5740a87abd", + "type": "github" + }, + "original": { + "owner": "hercules-ci", + "repo": "flake-parts", + "type": "github" + } + }, + "haskell-flake": { + "locked": { + "lastModified": 1740325125, + "narHash": "sha256-isWfa0ZxT4BLd00uVgBGZ4YHahODtQkc564ooHBSxmU=", + "owner": "srid", + "repo": "haskell-flake", + "rev": "4e9d319e78c00511cfbd48cc5ef4fb9c4df0dd95", + "type": "github" + }, + "original": { + "owner": "srid", + "repo": "haskell-flake", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1740396192, + "narHash": "sha256-ATMHHrg3sG1KgpQA5x8I+zcYpp5Sf17FaFj/fN+8OoQ=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "d9b69c3ec2a2e2e971c534065bdd53374bd68b97", + "type": "github" + }, + "original": { + "owner": "nixos", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs-lib": { + "locked": { + "lastModified": 1738452942, + "narHash": "sha256-vJzFZGaCpnmo7I6i416HaBLpC+hvcURh/BQwROcGIp8=", + "type": "tarball", + "url": "https://github.com/NixOS/nixpkgs/archive/072a6db25e947df2f31aab9eccd0ab75d5b2da11.tar.gz" + }, + "original": { + "type": "tarball", + "url": "https://github.com/NixOS/nixpkgs/archive/072a6db25e947df2f31aab9eccd0ab75d5b2da11.tar.gz" + } + }, + "root": { + "inputs": { + "flake-parts": "flake-parts", + "haskell-flake": "haskell-flake", + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..e6bb793 --- /dev/null +++ b/flake.nix @@ -0,0 +1,66 @@ +# https://community.flake.parts/haskell-flake/dependency +{ + inputs = { + nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable"; + flake-parts.url = "github:hercules-ci/flake-parts"; + haskell-flake.url = "github:srid/haskell-flake"; + }; + outputs = inputs@{ self, nixpkgs, flake-parts, ... }: + flake-parts.lib.mkFlake { inherit inputs; } { + systems = nixpkgs.lib.systems.flakeExposed; + imports = [ inputs.haskell-flake.flakeModule ]; + + perSystem = { self', pkgs, lib, ... }: { + + # Typically, you just want a single project named "default". But + # multiple projects are also possible, each using different GHC version. + haskellProjects.default = { + # The base package set representing a specific GHC version. + # By default, this is pkgs.haskellPackages. + # You may also create your own. See https://community.flake.parts/haskell-flake/package-set + basePackages = pkgs.haskell.packages.ghc982; + + # Extra package information. See https://community.flake.parts/haskell-flake/dependency + # + # Note that local packages are automatically included in `packages` + # (defined by `defaults.packages` option). + # + projectRoot = builtins.toString (lib.fileset.toSource { + root = ./.; + fileset = lib.fileset.unions [ + ./src + ./HushGP.cabal + ]; + }); + packages = { + # aeson.source = "1.5.0.0"; # Override aeson to a custom version from Hackage + # shower.source = inputs.shower; # Override shower to a custom source path + }; + settings = { + # aeson = { + # check = false; + # }; + # relude = { + # haddock = false; + # broken = false; + # }; + }; + + devShell = { + # Enabled by default + # enable = true; + + # Programs you want to make available in the shell. + # Default programs can be disabled by setting to 'null' + tools = hp: { ormolu = hp.ormolu; threadscope = hp.threadscope; }; + + # Check that haskell-language-server works + # hlsCheck.enable = true; # Requires sandbox to be disabled + }; + }; + + # haskell-flake doesn't set the default package, but you can do it here. + packages.default = self'.packages.HushGP; + }; + }; +} diff --git a/src/HushGP/GP.hs b/src/HushGP/GP.hs index 57215aa..4a5a9ee 100644 --- a/src/HushGP/GP.hs +++ b/src/HushGP/GP.hs @@ -7,6 +7,9 @@ import Data.List (sort, uncons) import HushGP.GP.PushArgs import HushGP.Genome import HushGP.State +import HushGP.GP.Variation +import HushGP.GP.Downsample +import HushGP.Utility -- import Debug.Trace (trace, traceStack) @@ -19,7 +22,7 @@ generatePopulation pushArgs = do -- | Evaluates a population of plushies with the error function passed in via PushArgs and sorts them. -- TODO: Need to make this runnable in parallel too. -evaluatePopulation :: PushArgs -> ([[Gene]], [Gene]) -> [Individual] -> [Individual] +evaluatePopulation :: PushArgs -> ([[Gene]], [Gene], [Int]) -> [Individual] -> [Individual] evaluatePopulation pushArgs passedTrainingData population = sort $ zipWith updateIndividual (map (errorFunction pushArgs pushArgs passedTrainingData . plushy) population) population -- | A helper function used in evaluatePopulation. Takes a [Double] as the error scores and an individual. @@ -27,19 +30,21 @@ evaluatePopulation pushArgs passedTrainingData population = sort $ zipWith updat updateIndividual :: [Double] -> Individual -> Individual updateIndividual errors ind = ind {totalFitness = Just (sum errors), fitnessCases = Just errors} --- | The start of the gp loop. TODO: Make this more accurate later. +-- | The start of the gp loop. Generates the population and then calls +-- gpLoop' with modifications to the variables if needed. gpLoop :: PushArgs -> IO () gpLoop pushArgs = do unEvaledPopulation <- generatePopulation pushArgs - -- let evaledPop = evaluatePopulation pushArgs unEvaledPopulation - -- print evaledPop - print "gamer" + let indexedTrainingData = makeIndexedTrainingData (trainingData pushArgs) + gpLoop' pushArgs 0 0 unEvaledPopulation indexedTrainingData + -- print "do this later" + -- | The guts of the GP loop. Where the work gets done after the initialization happens -- in the main gpLoop function. The first Int holds the generation count. The second Int -- holds the evaluation count. The list of Individuals is the population. The last parameter is -- the training data (possibly downsampled). -gpLoop' :: PushArgs -> Int -> Int -> [Individual] -> ([[Gene]], [Gene]) -> IO () +gpLoop' :: PushArgs -> Int -> Int -> [Individual] -> ([[Gene]], [Gene], [Int]) -> IO () gpLoop' pushArgs generation evaluations population indexedTrainingData = do print "Put information about each generation here." when bestIndPassesDownsample $ print $ "Semi Success Generation: " <> show generation @@ -54,15 +59,18 @@ gpLoop' pushArgs generation evaluations population indexedTrainingData = do print "Total test error simplified: " <> undefined -- Implement later print $ "Simplified plushy: " <> undefined -- show simplifiedPlushy print $ "Simplified program: " <> undefined -- show plushyToPush simplifiedPlushy - | (not (enableDownsampling epsilonPushArgs) && (generation >= maxGenerations epsilonPushArgs)) || (enableDownsampling epsilonPushArgs && (evaluations >= (maxGenerations epsilonPushArgs * length population * length (fst indexedTrainingData)))) = - print "Incomplete Run, saving the best so far." + | (not (enableDownsampling epsilonPushArgs) && (generation >= maxGenerations epsilonPushArgs)) || (enableDownsampling epsilonPushArgs && (evaluations >= (maxGenerations epsilonPushArgs * length population * length (tfst indexedTrainingData)))) = + print $ "Best individual: " <> show (plushy bestInd) | otherwise = gpLoop' pushArgs (succ generation) - (evaluations + (populationSize pushArgs * length (fst $ trainingData pushArgs)) + (if generation `mod` downsampleParentsGens pushArgs == 0 then length parentReps * (length (fst indexedTrainingData) - length (fst $ trainingData pushArgs)) else 0) + (if bestIndPassesDownsample then length (fst indexedTrainingData) - length (fst $ trainingData pushArgs) else 0)) + (evaluations + (populationSize pushArgs * length (fst $ trainingData pushArgs)) + (if generation `mod` downsampleParentsGens pushArgs == 0 then length parentReps * (length (tfst indexedTrainingData) - length (fst $ trainingData pushArgs)) else 0) + (if bestIndPassesDownsample then length (tfst indexedTrainingData) - length (fst $ trainingData pushArgs) else 0)) + (if elitism pushArgs + then bestInd : replicate (populationSize epsilonPushArgs - 1) (newIndividual epsilonPushArgs evaledPop) + else replicate (populationSize epsilonPushArgs) (newIndividual epsilonPushArgs evaledPop)) + (if enableDownsampling pushArgs && ((generation `mod` downsampleParentsGens pushArgs) == 0) + then updateCaseDistances repEvaluatedPop indexedTrainingData indexedTrainingData (informedDownsamplingType pushArgs) (solutionErrorThreshold pushArgs / fromIntegral @Int @Double (length $ tfst indexedTrainingData)) + else indexedTrainingData) nextAction where - -- \| This will have downsampling added to it later. - loopTrainData :: ([[Gene]], [Gene]) - loopTrainData = indexedTrainingData -- \| This will have downsampling functionality added later. parentReps :: [Individual] parentReps = [] @@ -77,4 +85,3 @@ gpLoop' pushArgs generation evaluations population indexedTrainingData = do bestIndPassesDownsample = False -- TODO: fix this later epsilonPushArgs :: PushArgs epsilonPushArgs = pushArgs {epsilons = Nothing} -- TODO: And this -gpLoop' _ _ _ _ _ = error "How did this happen?" diff --git a/src/HushGP/GP/Downsample.hs b/src/HushGP/GP/Downsample.hs new file mode 100644 index 0000000..65960ed --- /dev/null +++ b/src/HushGP/GP/Downsample.hs @@ -0,0 +1,9 @@ +module HushGP.GP.Downsample where + +import HushGP.State +import HushGP.Genome + +updateCaseDistances :: [Individual] -> ([[Gene]], [Gene], [Int]) -> ([[Gene]], [Gene], [Int]) -> String -> Double -> ([[Gene]], [Gene], [Int]) +updateCaseDistances evaledPop downsampleData trainingData informedDownsamplingType solutionThreshold = undefined + +-- assignIndiciesToData :: diff --git a/src/HushGP/GP/PushArgs.hs b/src/HushGP/GP/PushArgs.hs index 9f0eddb..d108041 100644 --- a/src/HushGP/GP/PushArgs.hs +++ b/src/HushGP/GP/PushArgs.hs @@ -49,7 +49,7 @@ data PushArgs = PushArgs -- Arg 2: ([[Gene]], [Gene]) is the input data. Input is the first index and output is the second index. -- Arg 3: [Gene] is the plushy representation of a program. -- Returns the error list for a given set of inputs of type [Double]. - errorFunction :: PushArgs -> ([[Gene]], [Gene]) -> [Gene] -> [Double], + errorFunction :: PushArgs -> ([[Gene]], [Gene], [Int]) -> [Gene] -> [Double], -- | Type of informed downsampling. "solved", "elite", "soft". informedDownsamplingType :: String, -- | List of instructions to use in the evolutionary run. diff --git a/src/HushGP/GP/Variation.hs b/src/HushGP/GP/Variation.hs new file mode 100644 index 0000000..a3439e9 --- /dev/null +++ b/src/HushGP/GP/Variation.hs @@ -0,0 +1,7 @@ +module HushGP.GP.Variation where + +import HushGP.Genome +import HushGP.GP.PushArgs + +newIndividual :: PushArgs -> [Individual] -> Individual +newIndividual = error "Implement this later" diff --git a/src/HushGP/Instructions/Utility.hs b/src/HushGP/Instructions/Utility.hs index 1ab6f61..6ddd11b 100644 --- a/src/HushGP/Instructions/Utility.hs +++ b/src/HushGP/Instructions/Utility.hs @@ -171,7 +171,13 @@ findContainer _ _ = Block [] -- |Utility Function: A helper function for instructionCodeDiscrepancy. The full description is there. countDiscrepancy :: Gene -> Gene -> Integer -countDiscrepancy (Block xs) (Block ys) = sum [if uncurry (==) tup then 0 else 1 | tup <- zip xs ys] + abs (toInteger (length xs) - toInteger (length ys)) +-- countDiscrepancy (Block xs) (Block ys) = sum [if uncurry (==) tup then 0 else 1 | tup <- zip xs ys] + abs (toInteger (length xs) - toInteger (length ys)) +-- countDiscrepancy (Block xs) (Block ys) = sum [if isBlock (fst tup) && isBlock (snd tup) then uncurry countDiscrepancy tup else if uncurry (==) tup then 0 else 1 | tup <- zip xs ys] + abs (toInteger (length xs) - toInteger (length ys)) +countDiscrepancy (Block xs) (Block []) = codeRecursiveSize (Block xs) +countDiscrepancy (Block []) (Block ys) = codeRecursiveSize (Block ys) +countDiscrepancy (Block (x:xs)) (Block (y:ys)) = if x == y then 1 + countDiscrepancy (Block xs) (Block ys) else countDiscrepancy (Block xs) (Block ys) +countDiscrepancy _ (Block ys) = 1 + codeRecursiveSize (Block ys) +countDiscrepancy (Block xs) _ = 1 + codeRecursiveSize (Block xs) countDiscrepancy xgene ygene = if xgene == ygene then 1 else 0 -- |Utility Function: Extracts the first gene from a block. Returns itself if not a block diff --git a/src/HushGP/Problems/IntegerRegression.hs b/src/HushGP/Problems/IntegerRegression.hs index 7abe7c2..2e88c13 100644 --- a/src/HushGP/Problems/IntegerRegression.hs +++ b/src/HushGP/Problems/IntegerRegression.hs @@ -57,8 +57,8 @@ loadState plushy vals = (loadProgram (plushyToPush plushy) emptyState){_input = Map.fromList (zip [0..] vals)} -- | The error function for a single set of inputs and outputs. -intErrorFunction :: PushArgs -> ([[Gene]], [Gene]) -> [Gene] -> [Double] -intErrorFunction _args (inputData, outputData) plushy = +intErrorFunction :: PushArgs -> ([[Gene]], [Gene], [Int]) -> [Gene] -> [Double] +intErrorFunction _args (inputData, outputData, _) plushy = map abs $ zipWith (-) (map ((fromIntegral @Integer @Double . (errorHead . _int) . interpretExec) . loadState plushy) inputData) (map (fromIntegral @Integer @Double . extractGeneInt) outputData) intPushArgs :: PushArgs @@ -76,7 +76,8 @@ intPushArgs = defaultPushArgs tournamentSize = 5, umadRate = 0.1, variation = Map.fromList [("umad", 1.0), ("crossover", 0.0)], - elitism = False + elitism = False, + enableDownsampling = False } main :: IO () diff --git a/src/HushGP/Utility.hs b/src/HushGP/Utility.hs index 384710c..3157f45 100644 --- a/src/HushGP/Utility.hs +++ b/src/HushGP/Utility.hs @@ -13,3 +13,24 @@ randomInstruction instructions = do -- | Generates a list of random instructions from a list of instructions passed in. randomInstructions :: Int -> [Gene] -> IO [Gene] randomInstructions amt instructions = replicateM amt (randomInstruction instructions) + +-- |Utility function: Used for indexed training data. Takes the first element of triple. +tfst :: (a, b, c) -> a +tfst (x, _, _) = x + +-- |Utility function: Used for indexed training data. Takes the second element of triple. +tsnd :: (a, b, c) -> b +tsnd (_, x, _) = x + +-- |Utility function: Used for indexed training data. Takes the third element of triple. +-- The third element in the context of indexed training data represents the index assigned. +thrd :: (a, b, c) -> c +thrd (_, _, x) = x + +-- |Utility function: Converts a tuple to a triple with a passed value. +tupleToTriple :: (a, b) -> c -> (a, b, c) +tupleToTriple (x, y) z = (x, y, z) + +-- |Utility function: Converts the training data passed in to an indexed representation +makeIndexedTrainingData :: ([[Gene]], [Gene]) -> ([[Gene]], [Gene], [Int]) +makeIndexedTrainingData (inputs, outputs) = (inputs, outputs, [0..(length inputs)])