diff --git a/src/gp/args.rs b/src/gp/args.rs index 9ad402a..fa936b8 100644 --- a/src/gp/args.rs +++ b/src/gp/args.rs @@ -4,6 +4,7 @@ use polars::prelude::*; use rust_decimal::prelude::*; use std::collections::HashMap; +#[derive(Clone, Copy)] pub enum ClosingType { Specified, Balanced, @@ -11,28 +12,28 @@ pub enum ClosingType { } #[allow(dead_code)] -struct PushArgs { - alignment_deviation: Decimal, // For alternation, std dev of deviation of index when alternating - alternation_rate: usize, // For alternation, prob of switching parents at each location. A number 0-100 - closes: ClosingType, // How push should automatically place Gene::Close into a plushy - dont_end: bool, // If true, keep running until limit regardless of success - // downsample: bool, // Whether or not to downsample. TODO later with all the related args - elitism: bool, // Whether to always add the best individual to next generation - error_function: fn(&PushArgs, DataFrame, Vec) -> Series, // The error function - instructions: Vec, // Instructions to use in a run - max_init_plushy_size: usize, // max initial plushy size - max_generations: usize, // Max amount of generations - parent_selection: usize, // Selection to use, TODO change this later. - pop_size: usize, // Population size - replacement_rate: Decimal, // For uniform replacement, rate items replaced - use_simplification: bool, // Whether to use simplification at end of run - simplification_k: usize, // Max amt of genes to attempt removal during one round of simplification process - simplification_steps: usize, // How many attempts to find simplified genomes - use_single_thread: bool, // if true, only single threaded - step_limit: usize, // Amount of steps a push interpreter can run for - testing_data: DataFrame, // The testing data, must be formatted the same as training data - tournament_size: usize, // Tournament size for tournament selection - training_data: DataFrame, // The training data, must be formatted the same as testing data - umad_rate: Decimal, // addition rate (deletion rate derived) for UMAD - variation: HashMap, // genetic operators and probability for use. should sum to 1, +pub struct PushArgs { + pub alignment_deviation: Decimal, // For alternation, std dev of deviation of index when alternating + pub alternation_rate: usize, // For alternation, prob of switching parents at each location. A number 0-100 + pub closes: ClosingType, // How push should automatically place Gene::Close into a plushy + pub dont_end: bool, // If true, keep running until limit regardless of success + // pub downsample: bool, // Whether or not to downsample. TODO later with all the related args + pub elitism: bool, // Whether to always add the best individual to next generation + pub error_function: fn(&PushArgs, DataFrame, Vec) -> Series, // The error function + pub instructions: Vec, // Instructions to use in a run + pub max_init_plushy_size: usize, // max initial plushy size + pub max_generations: usize, // Max amount of generations + pub parent_selection: usize, // Selection to use, TODO change this later. + pub pop_size: usize, // Population size + pub replacement_rate: f64, // For uniform replacement, rate items replaced + pub use_simplification: bool, // Whether to use simplification at end of run + pub simplification_k: usize, // Max amt of genes to attempt removal during one round of simplification process + pub simplification_steps: usize, // How many attempts to find simplified genomes + pub use_single_thread: bool, // if true, only single threaded + pub step_limit: usize, // Amount of steps a push interpreter can run for + pub testing_data: DataFrame, // The testing data, must be formatted the same as training data + pub tournament_size: usize, // Tournament size for tournament selection + pub training_data: DataFrame, // The training data, must be formatted the same as testing data + pub umad_rate: f64, // addition rate (deletion rate derived) for UMAD + pub variation: HashMap, // genetic operators and probability for use. should sum to 1, } diff --git a/src/gp/individual.rs b/src/gp/individual.rs new file mode 100644 index 0000000..b35fd3a --- /dev/null +++ b/src/gp/individual.rs @@ -0,0 +1,9 @@ +use crate::push::state::Gene; +use rust_decimal::Decimal; + +#[derive(Clone)] +pub struct Individual { + pub plushy: Vec, + pub total_fitness: Option, + pub fitness_cases: Option>, +} diff --git a/src/gp/mod.rs b/src/gp/mod.rs index 54a0b8c..c4ea562 100644 --- a/src/gp/mod.rs +++ b/src/gp/mod.rs @@ -1,5 +1,7 @@ pub mod args; pub mod genome; +pub mod individual; +pub mod selection; pub mod utils; pub mod variation; diff --git a/src/gp/selection.rs b/src/gp/selection.rs new file mode 100644 index 0000000..8871c82 --- /dev/null +++ b/src/gp/selection.rs @@ -0,0 +1,11 @@ +use crate::gp::args::PushArgs; +use crate::gp::individual::Individual; +use rand::Rng; + +pub fn select_parent( + _pop: Vec, + _push_args: &PushArgs, + _rng: &mut impl Rng, +) -> Individual { + todo!() +} diff --git a/src/gp/variation.rs b/src/gp/variation.rs index c36c02b..869114f 100644 --- a/src/gp/variation.rs +++ b/src/gp/variation.rs @@ -1,10 +1,18 @@ +use crate::gp::args::PushArgs; +use crate::gp::individual::Individual; +use crate::gp::selection::select_parent; use crate::gp::utils::gaussian_noise_factor; use crate::push::state::Gene; use rand::Rng; use rust_decimal::Decimal; use rust_decimal::prelude::ToPrimitive; +use std::collections::HashMap; use std::iter::zip; +use super::args::ClosingType; +use super::utils::random_instruction; + +#[derive(Clone)] pub enum Variation { Crossover, Alternation, @@ -12,13 +20,7 @@ pub enum Variation { UniformAddition, UniformReplacement, UniformDeletion, -} - -fn is_crossover_padding(gene: &Gene) -> bool { - match gene { - Gene::CrossoverPadding => true, - _ => false, - } + Reproduction, } fn crossover(plushy0: Vec, plushy1: Vec, mut rng: impl Rng) -> Vec { @@ -49,7 +51,7 @@ fn crossover(plushy0: Vec, plushy1: Vec, mut rng: impl Rng) -> Vec, plushy1: Vec, mut rng: impl new_plushy .into_iter() - .filter(|gene| !is_crossover_padding(gene)) + .filter(|gene| !matches!(gene, Gene::CrossoverPadding)) .collect() } @@ -125,8 +127,156 @@ fn alternation( } } -fn uniform_addition(plushy: Vec, instructions: Vec, umad_rate: Decimal) { +fn uniform_addition( + plushy: Vec, + instructions: Vec, + umad_rate: f64, + closing_type: ClosingType, + mut rng: impl Rng, +) -> Vec { let mut new_plushy: Vec = vec![]; + + for gene in plushy { + if rng.random::() < umad_rate { + let new_instruction = random_instruction(instructions.clone(), closing_type, &mut rng); + + // Randomly decide order (original first or new first) + if rng.random::() { + new_plushy.push(gene); + new_plushy.push(new_instruction); + } else { + new_plushy.push(new_instruction); + new_plushy.push(gene); + } + } else { + new_plushy.push(gene); + } + } + + new_plushy +} + +fn uniform_replacement( + plushy: Vec, + instructions: Vec, + replacement_rate: f64, + closing_type: ClosingType, + rng: &mut impl Rng, +) -> Vec { + plushy + .into_iter() + .map(|gene| { + if rng.random::() < replacement_rate { + // Replace with random instruction + random_instruction(instructions.to_vec(), closing_type, rng) + } else { + // Keep original gene + gene + } + }) + .collect() +} + +fn uniform_deletion(plushy: Vec, umad_rate: f64, rng: &mut impl Rng) -> Vec { + // If umad_rate is zero, return the original vector + if umad_rate == 0.0 { + return plushy; + } + + // Calculate the adjusted deletion rate + let adjusted_rate = 1.0 / (1.0 + (1.0 / umad_rate)); + + // Filter the vector, keeping items that are either Gene::Skip or pass the random test + plushy + .into_iter() + .filter(|_| rng.random::() >= adjusted_rate) + .collect() +} + +/// Selects a variation operator based on the probabilities +fn select_variation_op(variation_ops: &HashMap, r: f64) -> Variation { + let mut accum = 0.0; + + for (op, prob) in variation_ops { + accum += prob; + if accum >= r { + return op.clone(); + } + } + + // Default to reproduction if no match (or probabilities don't sum to 1.0) + Variation::Reproduction +} + +/// Creates a new individual based on an argmap variation +pub fn new_individual(pop: Vec, argmap: &PushArgs, rng: &mut impl Rng) -> Individual { + // Select variation operator based on probabilities + let r = rng.random::(); + let op = select_variation_op(&argmap.variation, r); + + let plushy = match op { + Variation::Crossover => { + let parent1 = select_parent(pop.clone(), argmap, rng); + let parent2 = select_parent(pop, argmap, rng); + crossover(parent1.plushy, parent2.plushy, rng) + } + + Variation::TailAlignedCrossover => { + let parent1 = select_parent(pop.clone(), argmap, rng); + let parent2 = select_parent(pop, argmap, rng); + tail_aligned_crossover(parent1.plushy, parent2.plushy, rng) + } + + Variation::UniformAddition => { + let parent = select_parent(pop, argmap, rng); + uniform_addition( + parent.plushy.clone(), + argmap.instructions.clone(), + argmap.umad_rate, + argmap.closes, + rng, + ) + } + + Variation::UniformReplacement => { + let parent = select_parent(pop, argmap, rng); + uniform_replacement( + parent.plushy.clone(), + argmap.instructions.clone(), + argmap.replacement_rate, + argmap.closes, + rng, + ) + } + + Variation::UniformDeletion => { + let parent = select_parent(pop, argmap, rng); + uniform_deletion(parent.plushy.clone(), argmap.umad_rate, rng) + } + + Variation::Alternation => { + let parent1 = select_parent(pop.clone(), argmap, rng); + let parent2 = select_parent(pop, argmap, rng); + alternation( + parent1.plushy, + parent2.plushy, + argmap.alternation_rate, + argmap.alignment_deviation, + rng, + ) + } + + Variation::Reproduction => { + let parent = select_parent(pop, argmap, rng); + parent.plushy.clone() + } + }; + + Individual { + plushy, + total_fitness: None, + fitness_cases: None, + } } #[cfg(test)] @@ -134,7 +284,9 @@ mod tests { use super::*; use crate::instructions::common::*; use crate::instructions::numeric::*; + use crate::instructions::vector::*; use crate::push::state::Gene; + use crate::push::utils::most_genes; use rand::SeedableRng; use rand::rngs::StdRng; use rust_decimal::dec; @@ -252,4 +404,79 @@ mod tests { res_plushy ); } + + #[test] + fn uniform_addition_test() { + let rng = StdRng::seed_from_u64(42); + let plushy0 = vec![ + Gene::StateFunc(exec_swap), + Gene::StateFunc(float_tan), + Gene::StateFunc(int_pop), + Gene::Close, + ]; + let res_plushy = uniform_addition(plushy0, most_genes(), 0.75, ClosingType::Balanced, rng); + assert_eq!( + vec![ + Gene::StateFunc(exec_swap), + Gene::StateFunc(float_min), + Gene::StateFunc(float_tan), + Gene::Close, + Gene::StateFunc(int_pop), + Gene::StateFunc(int_yank_dup), + Gene::Close, + Gene::StateFunc(float_is_empty), + ], + res_plushy + ); + } + + #[test] + fn uniform_replacement_test() { + let mut rng = StdRng::seed_from_u64(42); + let plushy0 = vec![ + Gene::StateFunc(exec_swap), + Gene::StateFunc(float_tan), + Gene::StateFunc(int_pop), + Gene::Close, + Gene::Close, + Gene::GeneInt(1), + ]; + let res_plushy = + uniform_replacement(plushy0, most_genes(), 0.5, ClosingType::Balanced, &mut rng); + assert_eq!( + vec![ + Gene::StateFunc(exec_swap), + Gene::StateFunc(float_tan), + Gene::StateFunc(int_pop), + Gene::Close, + Gene::StateFunc(vector_float_sort_reverse), + Gene::GeneInt(1), + ], + res_plushy + ); + } + + #[test] + fn uniform_deletion_test() { + let mut rng = StdRng::seed_from_u64(42); + let plushy0 = vec![ + Gene::StateFunc(exec_swap), + Gene::StateFunc(float_tan), + Gene::StateFunc(int_pop), + Gene::Close, + Gene::Close, + Gene::GeneInt(1), + ]; + let res_plushy = uniform_deletion(plushy0, 0.5, &mut rng); + assert_eq!( + vec![ + Gene::StateFunc(exec_swap), + Gene::StateFunc(float_tan), + Gene::StateFunc(int_pop), + Gene::Close, + Gene::GeneInt(1), + ], + res_plushy + ); + } }