From 55f75e90024de64cf1c80e7c48cb05efb3ef294f Mon Sep 17 00:00:00 2001 From: Tom Helmuth Date: Sat, 30 Jan 2021 10:11:32 -0500 Subject: [PATCH] Implemented Square Digits problem --- .../software/benchmarks_v2/square_digits.clj | 168 ++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 src/clojush/problems/software/benchmarks_v2/square_digits.clj diff --git a/src/clojush/problems/software/benchmarks_v2/square_digits.clj b/src/clojush/problems/software/benchmarks_v2/square_digits.clj new file mode 100644 index 00000000..3098ab9b --- /dev/null +++ b/src/clojush/problems/software/benchmarks_v2/square_digits.clj @@ -0,0 +1,168 @@ +;; square_digits.clj +;; +;; Tom Helmuth, thelmuth@hamilton.edu +;; +;; Problem inspired by: https://www.codewars.com/kata/546e2562b03326a88e000020 + +(ns clojush.problems.software.benchmarks-v2.square-digits + (:use clojush.pushgp.pushgp + [clojush pushstate interpreter random util globals] + clojush.instructions.tag) + (:require [clojure.math.numeric-tower :as nt])) + +; Atom generators +(def atom-generators + (make-proportional-atom-generators + (concat + (registered-for-stacks [:string :char :integer :boolean :exec]) + (list (tag-instruction-erc [:string :char :integer :boolean :exec] 1000) ; tags + (tagged-instruction-erc 1000))) + (list 'in1) ; inputs + (list 0 + 1 + 2 + (fn [] (- (lrand-int 201) 100)) ;Integer ERC + "") ; constants + {:proportion-inputs 0.15 + :proportion-constants 0.05})) + +(def hard-coded-inputs + (list 0 1 2 3 4 5 7 9 10 12 16 24 35 46 57 68 79 80 92 98 + 100 185 231 372 408 794 + 321012 + 987654 + 999999 + 1000000)) + +;; A list of data domains. Each domain is a vector containing +;; a "set" of inputs and two integers representing how many cases from the set +;; should be used as training and testing cases respectively. Each "set" of +;; inputs is either a list or a function that, when called, will create a +;; random element of the set. +(def data-domains + [[hard-coded-inputs 30 0] ; fixed integers + [(fn [] + (first (filter #(not (some #{%} hard-coded-inputs)) + (repeatedly #(lrand-int 999999))))) 170 2000] ; random integers, besides those in hard-coded inputs + ]) + +(defn digits + "Gets list of digits of integer" + [x] + (if (zero? x) + '(0) + (loop [x x + result '()] + (if (zero? x) + result + (recur (quot x 10) + (conj result (mod x 10))))))) + +(defn solve-square-digits + "Solves the problem given the input." + [input] + (apply str + (map #(* % %) + (digits input)))) + +; Helper function for error function +(defn test-cases + "Takes a sequence of inputs and gives IO test cases of the form + [[input1 input2] output]." + [inputs] + (map (fn [in] + (vector in + (solve-square-digits in))) + (sort inputs))) + +(defn make-error-function-from-cases + "Creates and returns the error function based on the train/test cases." + [train-cases test-cases] + (fn the-actual-error-function + ([individual] + (the-actual-error-function individual :train)) + ([individual data-cases] ;; data-cases should be :train or :test + (the-actual-error-function individual data-cases false)) + ([individual data-cases print-outputs] + (let [behavior (atom '()) + errors (doall + (for [[input1 correct-output] (case data-cases + :train train-cases + :test test-cases + [])] + (let [final-state (run-push (:program individual) + (->> (make-push-state) + (push-item input1 :input))) + result (top-item :string final-state)] + (when print-outputs + (println (format "Correct output: %s\nProgram output: %s\n\n" correct-output (str result)))) + ; Record the behavior + (swap! behavior conj result) + ; Error is Levenshtein distance + (if (string? result) + (levenshtein-distance correct-output result) + 1000000) ; penalty for no return value + )))] + (if (= data-cases :train) + (assoc individual :behaviors @behavior :errors errors) + (assoc individual :test-errors errors)))))) + +(defn get-train-and-test + "Returns the train and test cases." + [data-domains] + (map test-cases + (test-and-train-data-from-domains data-domains))) + +; Define train and test cases +(def train-and-test-cases + (get-train-and-test data-domains)) + +(defn initial-report + [argmap] + (println "Train and test cases:") + (doseq [[i case] (map vector (range) (first train-and-test-cases))] + (println (format "Train Case: %3d | Input/Output: %s" i (str case)))) + (doseq [[i case] (map vector (range) (second train-and-test-cases))] + (println (format "Test Case: %3d | Input/Output: %s" i (str case)))) + (println ";;******************************")) + +(defn custom-report + "Custom generational report." + [best population generation error-function report-simplifications] + (let [best-test-errors (:test-errors (error-function best :test)) + best-total-test-error (apply +' best-test-errors)] + (println ";;******************************") + (printf ";; -*- Find Pair problem report - generation %s\n" generation) (flush) + (println "Test total error for best:" best-total-test-error) + (println (format "Test mean error for best: %.5f" (double (/ best-total-test-error (count best-test-errors))))) + (when (zero? (:total-error best)) + (doseq [[i error] (map vector + (range) + best-test-errors)] + (println (format "Test Case %3d | Error: %s" i (str error))))) + (println ";;------------------------------") + (println "Outputs of best individual on training cases:") + (error-function best :train true) + (println ";;******************************"))) ;; To do validation, could have this function return an altered best individual + ;; with total-error > 0 if it had error of zero on train but not on validation + ;; set. Would need a third category of data cases, or a defined split of training cases. + + +; Define the argmap +(def argmap + {:error-function (make-error-function-from-cases (first train-and-test-cases) + (second train-and-test-cases)) + :atom-generators atom-generators + :max-points 2000 + :max-genome-size-in-initial-program 250 + :evalpush-limit 2000 + :population-size 1000 + :max-generations 300 + :parent-selection :lexicase + :genetic-operator-probabilities {:uniform-addition-and-deletion 1.0} + :uniform-addition-and-deletion-rate 0.09 + :problem-specific-report custom-report + :problem-specific-initial-report initial-report + :report-simplifications 0 + :final-report-simplifications 5000 + :max-error 1000000})