annotate src/find_words.clj @ 0:307a81e46071 tip

initial committ
author Robert McIntyre <rlm@mit.edu>
date Tue, 18 Oct 2011 01:17:49 -0700
parents
children
rev   line source
rlm@0 1 (ns coderloop.find-words
rlm@0 2 (:use [clojure.contrib def
rlm@0 3 [seq :only [indexed]]
rlm@0 4 [duck-streams :only [read-lines file-str]]]
rlm@0 5 [clojure.java io]
rlm@0 6 [rlm shell-inspect]))
rlm@0 7
rlm@0 8
rlm@0 9
rlm@0 10 "I'm going to solve this using convolution because I like how the kernel
rlm@0 11 looks for this problem. For each word, we build up an asterisk kernel
rlm@0 12 like so:
rlm@0 13
rlm@0 14 {:word \"Robert\"}
rlm@0 15 kernel:
rlm@0 16
rlm@0 17 t t t
rlm@0 18 r r r
rlm@0 19 e e e
rlm@0 20 b b b
rlm@0 21 ooo
rlm@0 22 treboRobert
rlm@0 23 ooo
rlm@0 24 b b b
rlm@0 25 e e e
rlm@0 26 r r r
rlm@0 27 t t t
rlm@0 28
rlm@0 29 isn't it really cute-looking?
rlm@0 30
rlm@0 31 Anyways, I'll drag these asterisks along the word-matrix via convolution
rlm@0 32 and use them to build up a transient structure that contains only the matches.
rlm@0 33
rlm@0 34 Then, I'll use that transitnt structure to remove all matches from the original
rlm@0 35 grid.
rlm@0 36
rlm@0 37 I need stuff to do convolutions, to make kernels given a word, to create the
rlm@0 38 transient structure, and to remove the entries in the transient structure
rlm@0 39 from the original word-grid. I also need a way to represent a word grid."
rlm@0 40
rlm@0 41 (defvar strcat (partial apply str)
rlm@0 42 "flattens a list by glomming everything together in a string.")
rlm@0 43
rlm@0 44 (defn word-grid
rlm@0 45 "create a mapping from elements in R^2 to chars"
rlm@0 46 [words]
rlm@0 47 (reduce
rlm@0 48 merge
rlm@0 49 (map (fn [[idx s]]
rlm@0 50 (zipmap
rlm@0 51 (map #(vector (first %) idx) (indexed s))
rlm@0 52 (seq s)))
rlm@0 53 (indexed words))))
rlm@0 54
rlm@0 55
rlm@0 56 (defn bounding-square
rlm@0 57 "finds the minimal square it takes to bound the grid. works for all
rlm@0 58 geometries."
rlm@0 59 [grid]
rlm@0 60 (let [coords (keys grid)
rlm@0 61 xs (sort (map first coords))
rlm@0 62 ys (sort (map second coords))]
rlm@0 63 [(first xs) (last xs) (first ys) (last ys)]))
rlm@0 64
rlm@0 65 ;;I have no compunctinos with using mutable state in printing
rlm@0 66 (defn print-grid* [grid]
rlm@0 67 (let [[x-min x-max y-min y-max] (bounding-square grid)
rlm@0 68 canvas (atom
rlm@0 69 (vec (for [y (range (inc (- y-max y-min)))]
rlm@0 70 (vec (repeat (inc (- x-max x-min)) ".")))))]
rlm@0 71 (dorun (map (fn [[[x y] c]]
rlm@0 72 (swap! canvas #(assoc-in % [ (- y y-min) (- x x-min)] c))) grid))
rlm@0 73 @canvas))
rlm@0 74
rlm@0 75 (defn print-grid
rlm@0 76 "nice for debugging but irrelevant for the rest of the problem"
rlm@0 77 [grid]
rlm@0 78 (dorun
rlm@0 79 (for [line (print-grid* grid)]
rlm@0 80 (do (dorun (map print line))
rlm@0 81 (println)))))
rlm@0 82
rlm@0 83 (defn asterisk-kernel [word]
rlm@0 84 (let [span (range (inc (count word)))]
rlm@0 85 (vector
rlm@0 86 (zipmap (map #(vector 0 %) span) word)
rlm@0 87 (zipmap (map #(vector 0 (- %)) span) word)
rlm@0 88 (zipmap (map #(vector % %) span) word)
rlm@0 89 (zipmap (map #(vector % (- %)) span) word)
rlm@0 90 (zipmap (map #(vector % 0) span) word)
rlm@0 91 (zipmap (map #(vector (- %) 0) span) word)
rlm@0 92 (zipmap (map #(vector (- %) %) span) word)
rlm@0 93 (zipmap (map #(vector (- %) (- %)) span) word))))
rlm@0 94
rlm@0 95 ;;this is not lazy :(
rlm@0 96 (defn search-grid-at-point [kernel grid [point-x point-y]]
rlm@0 97 (let [shift-kernel
rlm@0 98 (zipmap
rlm@0 99 (map (fn [[x y]]
rlm@0 100 [(+ x point-x)
rlm@0 101 (+ y point-y)])
rlm@0 102 (keys kernel))
rlm@0 103 (vals kernel))]
rlm@0 104 (if (= (select-keys grid (keys shift-kernel))
rlm@0 105 shift-kernel)
rlm@0 106 shift-kernel nil)))
rlm@0 107
rlm@0 108 (defn search-word-kernel
rlm@0 109 "search the grid for a particular kernel and store the reusult in the
rlm@0 110 atom (matches)"
rlm@0 111 [grid kernel]
rlm@0 112 (reduce merge (map (fn [point]
rlm@0 113 (search-grid-at-point kernel grid point))
rlm@0 114 (keys grid))))
rlm@0 115
rlm@0 116 (defn search-word [grid word]
rlm@0 117 (let [kernels (asterisk-kernel word)]
rlm@0 118 (reduce merge (map #(search-word-kernel grid %) kernels))))
rlm@0 119
rlm@0 120 (defn search-words [grid words]
rlm@0 121 (reduce merge (map #(search-word grid %) words)))
rlm@0 122
rlm@0 123 (defn remove-matches [grid matches]
rlm@0 124 (apply (partial dissoc grid) (keys matches)))
rlm@0 125
rlm@0 126 (defn grid->str [grid]
rlm@0 127 (strcat (vals (apply sorted-map
rlm@0 128 (interleave (map (comp vec reverse) (keys grid)) (vals grid))))))
rlm@0 129
rlm@0 130 (defn read-wordsearch [#^java.io.File input]
rlm@0 131 (let [[crossword words] (split-with (comp not (partial = "")) (read-lines input))
rlm@0 132 words (rest words)]
rlm@0 133 [crossword words]))
rlm@0 134
rlm@0 135 (defn process-wordsearch [[crossword words]]
rlm@0 136 (let [grid (word-grid crossword)]
rlm@0 137 (grid->str (remove-matches grid (search-words grid words)))))
rlm@0 138
rlm@0 139 (defn doit [args]
rlm@0 140 (println
rlm@0 141 (process-wordsearch
rlm@0 142 (read-wordsearch
rlm@0 143 (file-str (first args))))))
rlm@0 144
rlm@0 145 ;;********************************************************************************
rlm@0 146 (if (command-line?)
rlm@0 147 (doit *command-line-args*))
rlm@0 148
rlm@0 149
rlm@0 150 (def input (file-str "/home/r/coderloop-test/input.txt"))
rlm@0 151 (def a (file-str "/home/r/coderloop-test/findwords-a.in"))
rlm@0 152 (def d (file-str "/home/r/coderloop-test/findwords-d.in"))
rlm@0 153 (def c (file-str "/home/r/coderloop-test/findwords-c.in"))
rlm@0 154 (def e (file-str "/home/r/coderloop-test/findwords-e.in"))
rlm@0 155
rlm@0 156 (def test-grid
rlm@0 157 ["ELGOOGWWW"
rlm@0 158 "TIRXMAHIR"
rlm@0 159 "BATMANZNC"
rlm@0 160 "CMRVLTOLD"])
rlm@0 161
rlm@0 162 (def test-words
rlm@0 163 ["MAIL"
rlm@0 164 "WIN"
rlm@0 165 "GOOGLE"
rlm@0 166 "TAR"
rlm@0 167 "BATMAN"
rlm@0 168 "CAR"
rlm@0 169 "WWW"
rlm@0 170 "TOLD"
rlm@0 171 "CD"])
rlm@0 172
rlm@0 173
rlm@0 174