Skip to content

Commit

Permalink
Implemented the Nth Concat problem
Browse files Browse the repository at this point in the history
  • Loading branch information
thelmuth committed Jan 29, 2021
1 parent 9626a80 commit 3e2a57e
Showing 1 changed file with 229 additions and 0 deletions.
229 changes: 229 additions & 0 deletions src/clojush/problems/software/benchmarks_v2/nth_concat.clj
Original file line number Diff line number Diff line change
@@ -0,0 1,229 @@
;; nth_concat.clj
;; Tom Helmuth, [email protected]
;;
;; Note that this problem is harder for clojush because string_nth finds the
;; integer mod the size of the string to get the index.
;;
;; Problem inspired by Hamilton College CS 101 lab problem

(ns clojush.problems.software.benchmarks-v2.nth-concat
(:use clojush.pushgp.pushgp
[clojush pushstate interpreter random util globals]
clojush.instructions.tag)
(:require [clojure.math.numeric-tower :as nt]))

; Atom generators
(def atom-generators
(make-proportional-atom-generators
(concat
(registered-for-stacks [:vector_string :string :char :integer :boolean :exec])
(list (tag-instruction-erc [:vector_string :string :char :integer :boolean :exec] 1000) ; tags
(tagged-instruction-erc 1000)))
(list 'in1 'in2) ; inputs
(list []
""
0
1
) ; constants
{:proportion-inputs 0.15
:proportion-constants 0.05}))

(def nth-concat-chars (map char (range 32 127)))

(defn nth-concat-random-string
"Creates a string for this problem of length len"
[len]
(apply str (repeatedly len #(lrand-nth nth-concat-chars))))

(defn rand-int-range
"Returns an integer between start (inclusive) and end (exclusive)"
[start end]
( start (lrand-int (- end start))))

(defn nth-concat-input-all-have-n
"Creates a vector of strings of length len and an index n in [0,19].
This version makes sure all strings have length > n, so that all
strings will contribute to answer"
[len]
(let [n (lrand-int 20)]
(vector (vec (repeatedly len #(nth-concat-random-string
(rand-int-range (inc n) 21))))
n)))

(defn nth-concat-input-some-shorter-than-n
"Creates a vector of strings of length len and an index n in [0,20].
Note that if n = 20, it will always be outside of the indices of the strings.
This version allows strings of any length [0,20], so that some will not
have index n in them."
[len]
(let [n (lrand-int 21)]
(vector (vec (repeatedly len #(nth-concat-random-string
(lrand-int 21))))
n)))

;; A list of data domains. Each domain is a vector containing
;; a "set" of inputs and two integers representing how many cases from the set
;; should be used as training and testing cases respectively. Each "set" of
;; inputs is either a list or a function that, when called, will create a
;; random element of the set.
(def data-domains
[[(list [[] 5]
[[] 20]
[["arbitrary string"] 8]
[["01234567890123456789"] 19]
[["a" "b" "c" "d" "e" "f" "g"] 0]
[["12" "34" "56" "78" "90"] 1]
[["12ab" "34cd" "56ef" "78gh" "90ij"] 1]
[["hi" "there" "world" "is" "nice" "!"] 0]
[["POIUYTREWQLKJHGFDSAM"
"zxcvbnmasdfghjklqwer"
"~!@#$%^&*()_ -=[];'."
"0987654321poiuytNBVC"] 19]
[["cat" "SUNNY" "cat" "SUNNY" "cat" "SUNNY" "cat" "SUNNY" "cat" "SUNNY"
"dog" "HAROLD" "dog" "HAROLD" "dog" "HAROLD" "dog" "HAROLD" "dog" "HAROLD"] 2])
10 0] ; inputs where n is within the bounds of every string in the vector
[(list [["friend"] 10]
[["01234567890123456789"] 20]
[["its in" "out"] 3]
[["too short" "this one is longer"] 16]
[["one long string here" "a bit shorter" "also pretty long"] 14]
[["G" "" "O" "" "A" "" "T" ""] 0]
[["hi" "there" "world" "is" "nice" "!"] 1]
[["hi" "there" "world" "is" "nice" "!"] 2]
[["hi" "there" "world" "is" "nice" "!"] 3]
[["hi" "there" "world" "is" "nice" "!"] 4]
[["hi" "there" "world" "is" "nice" "!"] 5]
[["hi" "there" "world" "is" "nice" "!"] 20]
[["" "1" "23" "456" "789q" "werty" "uiop[]" "asdfghj" "kl;'zxcv" "bnm,./<>?"] 0]
[["" "1" "23" "456" "789q" "werty" "uiop[]" "asdfghj" "kl;'zxcv" "bnm,./<>?"] 5]
[["" "1" "23" "456" "789q" "werty" "uiop[]" "asdfghj" "kl;'zxcv" "bnm,./<>?"] 8]
[["" "1" "23" "456" "789q" "werty" "uiop[]" "asdfghj" "kl;'zxcv" "bnm,./<>?"] 15]
[["POIUYTREWQLKJHGFDSAM"
"zxcvbnmasdfghjklqwer"
"~!@#$%^&*()_ -=[];'."
"0987654321poiuytNBVC"] 20]
[["cat" "SUNNY" "cat" "SUNNY" "cat" "SUNNY" "cat" "SUNNY" "cat" "SUNNY"
"dog" "HAROLD" "dog" "HAROLD" "dog" "HAROLD" "dog" "HAROLD" "dog" "HAROLD"] 3]
[(vec (repeat 20 "")) 12]
[["" "456" "" "" "" "321" "" "" "train" "" " " "QUACK" "" "" "" "" "ken" ""] 1]
)
20 0] ; inputs where n is outside the bounds of some strings
[(fn [] (nth-concat-input-all-have-n (rand-int-range 1 21))) 50 1000] ; all indices are in bounds
[(fn [] (nth-concat-input-some-shorter-than-n (rand-int-range 1 21))) 120 1000]] ; some indices may be out of bounds
)

;;Can make test data like this:
(comment (test-and-train-data-from-domains data-domains))

(defn solve-nth-concat
"Solves the problem given the input."
[[vec-of-str n]]
(apply str
(map (fn [string]
(if (< n (count string))
(nth string n)
""))
vec-of-str)))

; Helper function for error function
(defn test-cases
"Takes a sequence of inputs and gives IO test cases of the form
[[input1 input2] output]."
[inputs]
(map (fn [in]
(vector in
(solve-nth-concat in)))
inputs))

(defn make-error-function-from-cases
"Creates and returns the error function based on the train/test cases."
[train-cases test-cases]
(fn the-actual-error-function
([individual]
(the-actual-error-function individual :train))
([individual data-cases] ;; data-cases should be :train or :test
(the-actual-error-function individual data-cases false))
([individual data-cases print-outputs]
(let [behavior (atom '())
errors (doall
(for [[[input1 input2] correct-output] (case data-cases
:train train-cases
:test test-cases
[])]
(let [final-state (run-push (:program individual)
(->> (make-push-state)
(push-item input2 :input)
(push-item input1 :input)))
result (top-item :string final-state)]
(when print-outputs
(println (format "| Correct output: %s\n| Program output: %s\n" (str correct-output) (str result))))
; Record the behavior
(swap! behavior conj result)
; Error is Levenshtein distance
(if (string? result)
(levenshtein-distance correct-output (str result))
1000000) ; penalty for no return value
)))]
(if (= data-cases :train)
(assoc individual :behaviors @behavior :errors errors)
(assoc individual :test-errors errors))))))

(defn get-train-and-test
"Returns the train and test cases."
[data-domains]
(map test-cases
(test-and-train-data-from-domains data-domains)))

; Define train and test cases
(def train-and-test-cases
(get-train-and-test data-domains))

(defn initial-report
[argmap]
(println "Train and test cases:")
(doseq [[i case] (map vector (range) (first train-and-test-cases))]
(println (format "Train Case: = | Input/Output: %s" i (str case))))
(doseq [[i case] (map vector (range) (second train-and-test-cases))]
(println (format "Test Case: = | Input/Output: %s" i (str case))))
(println ";;******************************"))

(defn custom-report
"Custom generational report."
[best population generation error-function report-simplifications]
(let [best-test-errors (:test-errors (error-function best :test))
best-total-test-error (apply ' best-test-errors)]
(println ";;******************************")
(printf ";; -*- Find Pair problem report - generation %s\n" generation) (flush)
(println "Test total error for best:" best-total-test-error)
(println (format "Test mean error for best: %.5f" (double (/ best-total-test-error (count best-test-errors)))))
(when (zero? (:total-error best))
(doseq [[i error] (map vector
(range)
best-test-errors)]
(println (format "Test Case = | Error: %s" i (str error)))))
(println ";;------------------------------")
(println "Outputs of best individual on training cases:")
(error-function best :train true)
(println ";;******************************"))) ;; To do validation, could have this function return an altered best individual
;; with total-error > 0 if it had error of zero on train but not on validation
;; set. Would need a third category of data cases, or a defined split of training cases.


; Define the argmap
(def argmap
{:error-function (make-error-function-from-cases (first train-and-test-cases)
(second train-and-test-cases))
:atom-generators atom-generators
:max-points 2000
:max-genome-size-in-initial-program 250
:evalpush-limit 2000
:population-size 1000
:max-generations 300
:parent-selection :lexicase
:genetic-operator-probabilities {:uniform-addition-and-deletion 1.0}
:uniform-addition-and-deletion-rate 0.09
:problem-specific-report custom-report
:problem-specific-initial-report initial-report
:report-simplifications 0
:final-report-simplifications 5000
:max-error 1000000})

0 comments on commit 3e2a57e

Please sign in to comment.