diff --git a/Cargo.toml b/Cargo.toml index 9bf2a58..19d9ff8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,4 +7,5 @@ edition = "2024" rand = "0.9.1" paste = "1.0.15" rust_decimal = { version = "1.37", features = ["macros", "maths"] } -rush_macro = { path = "rush_macro" } \ No newline at end of file +rush_macro = { path = "rush_macro" } +polars = { version = "0.46.0", features = ["lazy"]} diff --git a/src/gp/args.rs b/src/gp/args.rs new file mode 100644 index 0000000..72f3897 --- /dev/null +++ b/src/gp/args.rs @@ -0,0 +1,37 @@ +use crate::gp::variation::Variation; +use crate::push::state::Gene; +use polars::prelude::*; +use rust_decimal::prelude::*; +use std::collections::HashMap; + +pub enum ClosingType { + Specified, + Balanced, + None, +} + +struct PushArgs { + alignment_deviation: usize, // For alternation, std dev of deviation of index when alternating + alternation_rate: Decimal, // For alternation, prob of switching parents at each location + closes: ClosingType, // How push should automatically place Gene::Close into a plushy + dont_end: bool, // If true, keep running until limit regardless of success + // downsample: bool, // Whether or not to downsample. TODO later with all the related args + elitism: bool, // Whether to always add the best individual to next generation + error_function: fn(&PushArgs, DataFrame, Vec) -> Series, // The error function + instructions: Vec, // Instructions to use in a run + max_init_plushy_size: usize, // max initial plushy size + max_generations: usize, // Max amount of generations + parent_selection: usize, // Selection to use, TODO change this later. + pop_size: usize, // Population size + replacement_rate: Decimal, // For uniform replacement, rate items replaced + use_simplification: bool, // Whether to use simplification at end of run + simplification_k: usize, // Max amt of genes to attempt removal during one round of simplification process + simplification_steps: usize, // How many attempts to find simplified genomes + use_single_thread: bool, // if true, only single threaded + step_limit: usize, // Amount of steps a push interpreter can run for + testing_data: DataFrame, // The testing data, must be formatted the same as training data + tournament_size: usize, // Tournament size for tournament selection + training_data: DataFrame, // The training data, must be formatted the same as testing data + umad_rate: Decimal, // addition rate (deletion rate derived) for UMAD + variation: HashMap, // genetic operators and probability for use. should sum to 1, +} diff --git a/src/gp/genome.rs b/src/gp/genome.rs index 184a6a0..802d668 100644 --- a/src/gp/genome.rs +++ b/src/gp/genome.rs @@ -103,26 +103,26 @@ pub fn plushy_to_push(genes: Vec) -> Vec { if plushy_buffer.is_empty() && has_openers(&push_buffer) { plushy_buffer.push(Gene::Close); } else if plushy_buffer.is_empty() { - return plushy_buffer; + return push_buffer; } else { - let first_gene = plushy_buffer.pop().unwrap(); + let first_gene = plushy_buffer.remove(0); match &first_gene { Gene::Close => { if has_openers(&push_buffer) { let mut index: Option = None; - let mut opener: Option<&Gene> = None; + let mut opener: Option = None; // not the most optimal iterating through the entire genome. // Will do for now. - for (ndx, el) in push_buffer.iter().enumerate() { + for (ndx, el) in push_buffer.clone().into_iter().enumerate() { if is_opener(&el) { index = Some(ndx); opener = Some(el); } } - let post_open = push_buffer[(index.unwrap() + 1)..].to_vec(); - let mut push_buffer = push_buffer[..index.unwrap()].to_vec(); - push_buffer.extend(post_open); - if get_opener_count(opener.unwrap()) > &1u8 { + let post_open: Vec<_> = push_buffer.drain((index.unwrap() + 1)..).collect(); + push_buffer.pop(); // Pop the close here + push_buffer.push(Gene::Block(post_open)); + if get_opener_count(&opener.clone().unwrap()) > &1u8 { let opener_new = dec_opener(opener.unwrap().clone()); push_buffer.push(opener_new); } @@ -136,7 +136,7 @@ pub fn plushy_to_push(genes: Vec) -> Vec { #[cfg(test)] mod tests { - // use super::*; + use super::*; // use crate::instructions::vector::{string_iterate, vector_float_maximum}; use crate::instructions::common::*; use crate::instructions::numeric::*; @@ -156,13 +156,39 @@ mod tests { #[test] fn plushy_to_push_test() { let plushy = vec![ - Gene::StateFunc(float_flush), - Gene::StateFunc(exec_pop), - Gene::StateFunc(int_add), - Gene::StateFunc(float_rem), + Gene::StateFunc(exec_swap), + Gene::StateFunc(float_tan), + Gene::StateFunc(int_pop), Gene::Close, - Gene::StateFunc(float_sub), + Gene::StateFunc(exec_flush), + Gene::Close, + Gene::StateFunc(boolean_pop), + ]; + let res_push = plushy_to_push(plushy); + assert_eq!( + res_push, + vec![ + StateFunc(exec_swap), + Gene::Block(vec![Gene::StateFunc(float_tan), Gene::StateFunc(int_pop)]), + Gene::Block(vec![Gene::StateFunc(exec_flush)]), + Gene::StateFunc(boolean_pop), + ] + ); + + let plushy = vec![ + Gene::StateFunc(exec_swap), + Gene::StateFunc(float_tan), + Gene::StateFunc(int_pop), Gene::Close, ]; + let res_push = plushy_to_push(plushy); + assert_eq!( + res_push, + vec![ + StateFunc(exec_swap), + Gene::Block(vec![Gene::StateFunc(float_tan), Gene::StateFunc(int_pop)]), + Gene::Block(vec![]), + ] + ) } } diff --git a/src/gp/mod.rs b/src/gp/mod.rs index d2ccc73..2939b9e 100644 --- a/src/gp/mod.rs +++ b/src/gp/mod.rs @@ -1 +1,5 @@ +pub mod args; pub mod genome; +pub mod variation; + +// pub fn gp_loop diff --git a/src/gp/variation.rs b/src/gp/variation.rs new file mode 100644 index 0000000..22a49a7 --- /dev/null +++ b/src/gp/variation.rs @@ -0,0 +1,8 @@ +pub enum Variation { + Crossover, + Alternation, + TailAlignedCrossover, + UniformAddition, + UniformReplacement, + UniformDeletion, +}