From f424a7bd3a93977d4cc14f09a8c29752c08776d6 Mon Sep 17 00:00:00 2001 From: Rowan Torbitzky-Lane Date: Tue, 29 Apr 2025 23:11:51 -0500 Subject: [PATCH] progress on error function --- src/gp/args.rs | 60 ++++++++++++++++++++++++------- src/gp/selection.rs | 5 +++ src/gp/simplification.rs | 21 ++++++++--- src/gp/variation.rs | 41 ++++++++++++++++++---- src/push/interpreter.rs | 4 ++- src/push/state.rs | 2 ++ tests/simplification_test.rs | 68 ++++++++++++++++++++++++++++++++++++ 7 files changed, 177 insertions(+), 24 deletions(-) create mode 100644 tests/simplification_test.rs diff --git a/src/gp/args.rs b/src/gp/args.rs index 479aa1e..10fbdfd 100644 --- a/src/gp/args.rs +++ b/src/gp/args.rs @@ -1,3 +1,4 @@ +use crate::gp::selection::Selection; use crate::gp::variation::Variation; use crate::push::state::Gene; use polars::prelude::*; @@ -19,23 +20,58 @@ pub struct PushArgs { pub dont_end: bool, // If true, keep running until limit regardless of success // pub downsample: bool, // Whether or not to downsample. TODO later with all the related args pub elitism: bool, // Whether to always add the best individual to next generation - pub error_function: fn(&PushArgs, &DataFrame, Vec) -> Vec, // The error function - pub instructions: Vec, // Instructions to use in a run - pub max_init_plushy_size: usize, // max initial plushy size - pub max_generations: usize, // Max amount of generations - pub parent_selection: usize, // Selection to use, TODO change this later. - pub pop_size: usize, // Population size - pub replacement_rate: f64, // For uniform replacement, rate items replaced - pub use_simplification: bool, // Whether to use simplification at end of run + pub error_function: Option) -> Vec>, // The error function + pub instructions: Option>, // Instructions to use in a run + pub max_init_plushy_size: usize, // max initial plushy size + pub max_generations: usize, // Max amount of generations + pub parent_selection: Selection, // Selection to use, TODO change this later. + pub pop_size: usize, // Population size + pub replacement_rate: f64, // For uniform replacement, rate items replaced + pub use_simplification: bool, // Whether to use simplification at end of run pub simplification_k: usize, // Max amt of genes to attempt removal during one round of simplification process pub simplification_steps: usize, // How many attempts to find simplified genomes pub simplification_verbose: bool, // Whether to send extra messages about simplification or not pub solution_error_threshold: Decimal, // Max total error for solutions pub use_single_thread: bool, // if true, only single threaded pub step_limit: usize, // Amount of steps a push interpreter can run for - pub testing_data: DataFrame, // The testing data, must be formatted the same as training data - pub tournament_size: usize, // Tournament size for tournament selection - pub training_data: DataFrame, // The training data, must be formatted the same as testing data - pub umad_rate: f64, // addition rate (deletion rate derived) for UMAD + pub testing_data: Option, // The testing data, must be formatted the same as training data + pub tournament_size: usize, // Tournament size for tournament selection + pub training_data: Option, // The training data, must be formatted the same as testing data + pub umad_rate: f64, // addition rate (deletion rate derived) for UMAD pub variation: HashMap, // genetic operators and probability for use. should sum to 1, } + +impl PushArgs { + /// Holds the default arguments + pub fn new() -> Self { + let mut map = HashMap::new(); + map.insert(Variation::UMAD, 1.0); + + Self { + alignment_deviation: dec!(2.0), + alternation_rate: 10, + closes: ClosingType::Specified, + dont_end: false, + elitism: false, + error_function: None, + instructions: None, + max_init_plushy_size: 100, + max_generations: 1000, + parent_selection: Selection::Lexicase, + pop_size: 1000, + replacement_rate: 0.1, + use_simplification: true, + simplification_k: 4, + simplification_steps: 1000, + simplification_verbose: true, + use_single_thread: false, + solution_error_threshold: dec!(0.0), + step_limit: 1000, + testing_data: None, + tournament_size: 5, + training_data: None, + umad_rate: 0.1, + variation: map, + } + } +} diff --git a/src/gp/selection.rs b/src/gp/selection.rs index 8871c82..56fba2b 100644 --- a/src/gp/selection.rs +++ b/src/gp/selection.rs @@ -2,6 +2,11 @@ use crate::gp::args::PushArgs; use crate::gp::individual::Individual; use rand::Rng; +pub enum Selection { + Lexicase, + Tournament, +} + pub fn select_parent( _pop: Vec, _push_args: &PushArgs, diff --git a/src/gp/simplification.rs b/src/gp/simplification.rs index 5037fc6..55c0fd3 100644 --- a/src/gp/simplification.rs +++ b/src/gp/simplification.rs @@ -53,7 +53,12 @@ where ); } - let initial_errors = error_func(&push_args, &push_args.training_data, plushy.clone()); + let training_data = (&push_args) + .training_data + .clone() + .expect("Must provide training_data"); + + let mut curr_errors = error_func(&push_args, &training_data, plushy.clone()); let mut step = 0; let mut curr_plushy = plushy; @@ -62,15 +67,23 @@ where let random_k = rng.random_range(1..=push_args.simplification_k); let new_plushy = delete_k_random(random_k, &curr_plushy, &mut rng); - let new_plushy_errors = - error_func(&push_args, &push_args.training_data, new_plushy.clone()); + let new_plushy_errors = error_func(&push_args, &training_data, new_plushy.clone()); - if new_plushy_errors.iter().sum::() <= initial_errors.iter().sum() { + if new_plushy_errors.iter().sum::() <= curr_errors.iter().sum() { curr_plushy = new_plushy; + curr_errors = new_plushy_errors; } step += 1; } + if push_args.simplification_verbose { + println!( + "{{ end_plushy_length: {}, k: {} }}", + curr_plushy.len(), + push_args.simplification_k + ); + } + curr_plushy } diff --git a/src/gp/variation.rs b/src/gp/variation.rs index 869114f..d04d7f8 100644 --- a/src/gp/variation.rs +++ b/src/gp/variation.rs @@ -12,7 +12,7 @@ use std::iter::zip; use super::args::ClosingType; use super::utils::random_instruction; -#[derive(Clone)] +#[derive(Clone, PartialEq, Eq, Hash)] pub enum Variation { Crossover, Alternation, @@ -21,6 +21,7 @@ pub enum Variation { UniformReplacement, UniformDeletion, Reproduction, + UMAD, } fn crossover(plushy0: Vec, plushy1: Vec, mut rng: impl Rng) -> Vec { @@ -132,13 +133,13 @@ fn uniform_addition( instructions: Vec, umad_rate: f64, closing_type: ClosingType, - mut rng: impl Rng, + rng: &mut impl Rng, ) -> Vec { let mut new_plushy: Vec = vec![]; for gene in plushy { if rng.random::() < umad_rate { - let new_instruction = random_instruction(instructions.clone(), closing_type, &mut rng); + let new_instruction = random_instruction(instructions.clone(), closing_type, rng); // Randomly decide order (original first or new first) if rng.random::() { @@ -231,7 +232,10 @@ pub fn new_individual(pop: Vec, argmap: &PushArgs, rng: &mut impl Rn let parent = select_parent(pop, argmap, rng); uniform_addition( parent.plushy.clone(), - argmap.instructions.clone(), + argmap + .instructions + .clone() + .expect("Must provide instructions"), argmap.umad_rate, argmap.closes, rng, @@ -242,7 +246,10 @@ pub fn new_individual(pop: Vec, argmap: &PushArgs, rng: &mut impl Rn let parent = select_parent(pop, argmap, rng); uniform_replacement( parent.plushy.clone(), - argmap.instructions.clone(), + argmap + .instructions + .clone() + .expect("Must provide instructions!"), argmap.replacement_rate, argmap.closes, rng, @@ -266,6 +273,25 @@ pub fn new_individual(pop: Vec, argmap: &PushArgs, rng: &mut impl Rn ) } + Variation::UMAD => { + let parent = select_parent(pop, argmap, rng); + let parent_plushy = parent.plushy.clone(); + + // Apply uniform addition followed by uniform deletion + let after_addition = uniform_addition( + parent_plushy, + argmap + .instructions + .clone() + .expect("Must provide instructions"), + argmap.umad_rate, + argmap.closes, + rng, + ); + + uniform_deletion(after_addition, argmap.umad_rate, rng) + } + Variation::Reproduction => { let parent = select_parent(pop, argmap, rng); parent.plushy.clone() @@ -407,14 +433,15 @@ mod tests { #[test] fn uniform_addition_test() { - let rng = StdRng::seed_from_u64(42); + let mut rng = StdRng::seed_from_u64(42); let plushy0 = vec![ Gene::StateFunc(exec_swap), Gene::StateFunc(float_tan), Gene::StateFunc(int_pop), Gene::Close, ]; - let res_plushy = uniform_addition(plushy0, most_genes(), 0.75, ClosingType::Balanced, rng); + let res_plushy = + uniform_addition(plushy0, most_genes(), 0.75, ClosingType::Balanced, &mut rng); assert_eq!( vec![ Gene::StateFunc(exec_swap), diff --git a/src/push/interpreter.rs b/src/push/interpreter.rs index f6530c8..b8b781c 100644 --- a/src/push/interpreter.rs +++ b/src/push/interpreter.rs @@ -19,7 +19,9 @@ pub fn gene_to_stack(state: &mut PushState, gene: Gene) { Gene::Block(x) => state.exec.extend(x.into_iter().rev()), Gene::Close => panic!("Close found in the exec stack, this should not happen!"), Gene::Open(_) => panic!("Open found in the exec stack, this should not happen!"), - Gene::Skip => panic!("Skip found in the exec stack, this should not happen!"), + Gene::Skip => { + state.exec.pop(); // Skip the next item by removing it. + } Gene::CrossoverPadding => { panic!("CrossoverPadding found in the exec stack, this should not happen!") } diff --git a/src/push/state.rs b/src/push/state.rs index 8feba32..6029a16 100644 --- a/src/push/state.rs +++ b/src/push/state.rs @@ -18,6 +18,7 @@ pub struct PushState { pub vector_char: Vec>, pub exec: Vec, pub code: Vec, + pub input: Vec, } pub const EMPTY_STATE: PushState = PushState { @@ -33,6 +34,7 @@ pub const EMPTY_STATE: PushState = PushState { vector_char: vec![], exec: vec![], code: vec![], + input: vec![], }; #[derive(PartialEq, Eq, Debug, Clone, Hash)] diff --git a/tests/simplification_test.rs b/tests/simplification_test.rs new file mode 100644 index 0000000..a11e917 --- /dev/null +++ b/tests/simplification_test.rs @@ -0,0 +1,68 @@ +use polars::prelude::*; +use rush::gp::args::PushArgs; +use rush::gp::simplification::auto_simplify_plushy; +use rush::instructions::numeric::*; +use rush::push::state::Gene; +use rush::push::utils::most_genes; +use rust_decimal::{Decimal, dec}; + +fn test_error_function( + _push_args: &PushArgs, + data: &DataFrame, + _push_program: Vec, +) -> Vec { + let err_vec: Vec = vec![]; + + let y = data.column("y").unwrap(); + let x = data.drop("y").unwrap(); + + // println!("x: {x:#?}"); + // println!("y: {y:#?}"); + + for n in 0..x.height() { + let mut inputs: Vec = Vec::with_capacity(x.width()); + let row = x.get_row(n).unwrap(); + for datum in row.0.iter() { + // println!("{:?}", val); + inputs.push(match datum { + &AnyValue::Int32(val) => Gene::GeneInt(val as i128), + _ => Gene::Close, + }); + } + println!("{:?}", inputs); + inputs.clear(); + } + + vec![dec!(0.0)] +} + +#[test] +fn simplification_function_test() { + let train_df: DataFrame = df!( + "x0" => [1, 2, 3], + "x1" => [7, 8, 9], + "y" => [8, 10, 12], + ) + .unwrap(); + println!("{}", train_df); + // println!("{:#?}", train_df["x0"]); + + // push program declaration + let push_program: Vec = vec![ + Gene::StateFunc(float_tan), + Gene::StateFunc(float_sub), + Gene::StateFunc(int_add), + Gene::StateFunc(float_tan), + Gene::StateFunc(float_sub), + Gene::StateFunc(float_rem), + Gene::StateFunc(float_inc), + ]; + + let mut args = PushArgs::new(); + args.training_data = Some(train_df.clone()); + args.instructions = Some(most_genes()); + args.simplification_steps = 100; + args.error_function = Some(test_error_function); + + test_error_function(&args, &train_df, push_program); +}