argmax NOW tiebreaks randomly

This commit is contained in:
Ryan Boldi 2022-05-05 14:58:15 -04:00
parent 4fffe9e8f3
commit d9171bcb51

View File

@ -1,13 +1,22 @@
(ns propeller.tools.metrics
(:require [propeller.tools.math :as math]))
(defn argmax
"returns the index of the maximum value in a list"
(defn argmax-last
"returns the index of the maximum value in a list, tiebreaking last"
[coll]
(->> coll
(map-indexed vector)
(apply max-key second)
rand-nth))
first))
(defn argmax
"returns the index of the maximum value in a list, randomly tiebreaking"
[coll]
(->> coll
(map-indexed vector)
(filter #(= (apply max coll) (second %)))
rand-nth
first))
(defn mean
"Returns the mean of a collection."
@ -46,12 +55,12 @@
"computes the next row using the prev-row current-element and the other seq"
[prev-row current-element other-seq pred]
(reduce
(fn [row [diagonal above other-element]]
(let [update-val (if (pred other-element current-element)
(fn [row [diagonal above other-element]]
(let [update-val (if (pred other-element current-element)
;; if the elements are deemed equivalent according to the predicate
;; pred, then no change has taken place to the string, so we are
;; going to set it the same value as diagonal (which is the previous edit-distance)
diagonal
diagonal
;; in the case where the elements are not considered equivalent, then we are going
;; to figure out if its a substitution (then there is a change of 1 from the previous
;; edit distance) thus the value is diagonal + 1 or if its a deletion, then the value
@ -59,18 +68,18 @@
;; of last of row + 1 (since we will be using vectors, peek is more efficient)
;; or it could be a case of insertion, then the value is above+1, and we chose
;; the minimum of the three
(inc (min diagonal above (peek row))))]
(inc (min diagonal above (peek row))))]
(conj row update-val)))
(conj row update-val)))
;; we need to initialize the reduce function with the value of a row, since we are
;; constructing this row from the previous one, the row is a vector of 1 element which
;; consists of 1 + the first element in the previous row (edit distance between the prefix so far
;; and an empty string)
[(inc (first prev-row))]
[(inc (first prev-row))]
;; for the reduction to go over, we need to provide it with three values, the diagonal
;; which is the same as prev-row because it starts from 0, the above, which is the next element
;; from the list and finally the element from the other sequence itself.
(map vector prev-row (next prev-row) other-seq)))
(map vector prev-row (next prev-row) other-seq)))
(defn levenshtein-distance
"Levenshtein Distance - http://en.wikipedia.org/wiki/Levenshtein_distance
@ -83,17 +92,17 @@
(empty? (str a)) (count (str b)) ;; sometimes stack pushes numbers, force
(empty? (str b)) (count (str a)) ;; a and b to be strings
:else (peek
(reduce
(reduce
;; we use a simple reduction to convert the previous row into the
;; next-row using the compute-next-row which takes a current
;; element, the previous-row computed so far, and the predicate
;; to compare for equality
(fn [prev-row current-element]
(compute-next-row prev-row current-element (str b) p))
(fn [prev-row current-element]
(compute-next-row prev-row current-element (str b) p))
;; we need to initialize the prev-row with the edit distance
;; between the various prefixes of b and the empty string
(range (inc (count (str b))))
(str a)))))
(range (inc (count (str b))))
(str a)))))
(defn sequence-similarity
"Returns a number between 0 and 1, indicating how similar the sequences are