changeset 430:5205535237fb

fix skew in self-organizing-touch, work on thesis.
author Robert McIntyre <rlm@mit.edu>
date Sat, 22 Mar 2014 16:10:34 -0400 (2014-03-22)
parents b5d0f0adf19f
children 7410f0d8011c
files org/movement.org org/proprioception.org org/self_organizing_touch.clj org/touch.org org/worm_learn.clj thesis/aux/org/first-chapter.html thesis/aux/org/first-chapter.org thesis/aux/org/roadmap.org thesis/org/first-chapter.html thesis/org/first-chapter.org thesis/org/roadmap.org
diffstat 11 files changed, 961 insertions(+), 958 deletions(-) [+]
line wrap: on
line diff
     1.1 --- a/org/movement.org	Fri Mar 21 20:56:56 2014 -0400
     1.2 +++ b/org/movement.org	Sat Mar 22 16:10:34 2014 -0400
     1.3 @@ -283,7 +283,7 @@
     1.4          muscles (pics "muscles/0")
     1.5          targets (map
     1.6                   #(File. (str base "out/" (format "%07d.png" %)))
     1.7 -                 (range 0 (count main-view)))]
     1.8 +                 (range (count main-view)))]
     1.9      (dorun
    1.10       (pmap
    1.11        (comp
     2.1 --- a/org/proprioception.org	Fri Mar 21 20:56:56 2014 -0400
     2.2 +++ b/org/proprioception.org	Sat Mar 22 16:10:34 2014 -0400
     2.3 @@ -52,7 +52,7 @@
     2.4     system. The three vectors do not have to be normalized or
     2.5     orthogonal."
     2.6    [vec1 vec2 vec3]
     2.7 -  (< 0 (.dot (.cross vec1 vec2) vec3)))
     2.8 +  (pos? (.dot (.cross vec1 vec2) vec3)))
     2.9  
    2.10  (defn absolute-angle
    2.11    "The angle between 'vec1 and 'vec2 around 'axis. In the range 
    2.12 @@ -328,7 +328,7 @@
    2.13          proprioception (pics "proprio/0")
    2.14          targets (map
    2.15                   #(File. (str base "out/" (format "%07d.png" %)))
    2.16 -                 (range 0 (count main-view)))]
    2.17 +                 (range (count main-view)))]
    2.18      (dorun
    2.19       (pmap
    2.20        (comp
    2.21 @@ -385,7 +385,7 @@
    2.22  
    2.23  * Next 
    2.24  
    2.25 -Next time, I'll give the Worm the power to [[./movement.org][move on it's own]].
    2.26 +Next time, I'll give the Worm the power to [[./movement.org][move on its own]].
    2.27  
    2.28  
    2.29  * COMMENT generate source
     3.1 --- a/org/self_organizing_touch.clj	Fri Mar 21 20:56:56 2014 -0400
     3.2 +++ b/org/self_organizing_touch.clj	Sat Mar 22 16:10:34 2014 -0400
     3.3 @@ -62,6 +62,7 @@
     3.4      (merge (worm-world-defaults)
     3.5             {:worm-model single-worm-segment
     3.6              :view single-worm-segment-view
     3.7 +            :experience-watch nil
     3.8              :motor-control
     3.9              (motor-control-program
    3.10               worm-single-segment-muscle-labels
     4.1 --- a/org/touch.org	Fri Mar 21 20:56:56 2014 -0400
     4.2 +++ b/org/touch.org	Sat Mar 22 16:10:34 2014 -0400
     4.3 @@ -78,7 +78,7 @@
     4.4  To simulate touch there are three conceptual steps. For each solid
     4.5  object in the creature, you first have to get UV image and scale
     4.6  parameter which define the position and length of the feelers. Then,
     4.7 -you use the triangles which compose the mesh and the UV data stored in
     4.8 +you use the triangles which comprise the mesh and the UV data stored in
     4.9  the mesh to determine the world-space position and orientation of each
    4.10  feeler. Then once every frame, update these positions and orientations
    4.11  to match the current position and orientation of the object, and use
    4.12 @@ -136,7 +136,7 @@
    4.13  A =Mesh= is composed of =Triangles=, and each =Triangle= has three
    4.14  vertices which have coordinates in world space and UV space.
    4.15   
    4.16 -Here, =triangles= gets all the world-space triangles which compose a
    4.17 +Here, =triangles= gets all the world-space triangles which comprise a
    4.18  mesh, while =pixel-triangles= gets those same triangles expressed in
    4.19  pixel coordinates (which are UV coordinates scaled to fit the height
    4.20  and width of the UV image).
    4.21 @@ -152,7 +152,7 @@
    4.22       (.getTriangle (.getMesh geo) triangle-index scratch) scratch)))
    4.23  
    4.24  (defn triangles
    4.25 -  "Return a sequence of all the Triangles which compose a given
    4.26 +  "Return a sequence of all the Triangles which comprise a given
    4.27     Geometry." 
    4.28    [#^Geometry geo]
    4.29    (map (partial triangle geo) (range (.getTriangleCount (.getMesh geo)))))
    4.30 @@ -240,7 +240,7 @@
    4.31    [#^Triangle t]
    4.32    (let [mat (Matrix4f.)
    4.33          [vert-1 vert-2 vert-3]
    4.34 -        ((comp vec map) #(.get t %) (range 3))
    4.35 +        (mapv #(.get t %) (range 3))
    4.36          unit-normal (do (.calculateNormal t)(.getNormal t))
    4.37          vertices [vert-1 vert-2 vert-3 unit-normal]]
    4.38      (dorun 
     5.1 --- a/org/worm_learn.clj	Fri Mar 21 20:56:56 2014 -0400
     5.2 +++ b/org/worm_learn.clj	Sat Mar 22 16:10:34 2014 -0400
     5.3 @@ -141,9 +141,6 @@
     5.4       (> (Math/sin bend) 0.64))
     5.5     (:proprioception (peek experiences))))
     5.6  
     5.7 -(defn touch-average [[coords touch]]
     5.8 -  (/ (average (map first touch)) (average (map second touch))))
     5.9 -
    5.10  (defn rect-region [[x0 y0] [x1 y1]]
    5.11    (vec
    5.12     (for [x (range x0 (inc x1))
    5.13 @@ -225,15 +222,6 @@
    5.14  
    5.15  (declare phi-space phi-scan)
    5.16  
    5.17 -(defn next-phi-states
    5.18 -  "Given proprioception data, determine the most likely next sensory
    5.19 -   pattern from previous experience."
    5.20 -  [proprio phi-space phi-scan]
    5.21 -  (if-let [results (phi-scan proprio)]
    5.22 -    (mapv phi-space
    5.23 -          (filter (partial > (count phi-space))
    5.24 -                  (map inc results)))))
    5.25 -
    5.26  (defn debug-experience
    5.27    [experiences]
    5.28    (cond
    5.29 @@ -257,14 +245,13 @@
    5.30  
    5.31  (defn worm-world-defaults []
    5.32    (let [direct-control (worm-direct-control worm-muscle-labels 40)]
    5.33 -    {:view worm-side-view
    5.34 -     :motor-control (:motor-control direct-control)
    5.35 -     :keybindings (:keybindings direct-control)
    5.36 -     :record nil
    5.37 -     :experiences (atom [])
    5.38 -     :experience-watch debug-experience
    5.39 -     :worm-model worm-model
    5.40 -     :end-frame nil}))
    5.41 +    (merge direct-control     
    5.42 +           {:view worm-side-view
    5.43 +            :record nil
    5.44 +            :experiences (atom [])
    5.45 +            :experience-watch debug-experience
    5.46 +            :worm-model worm-model
    5.47 +            :end-frame nil})))
    5.48  
    5.49  (defn dir! [file]
    5.50    (if-not (.exists file)
    5.51 @@ -300,7 +287,7 @@
    5.52           (position-camera world view)
    5.53           (.setTimer world timer)
    5.54           (display-dilated-time world timer)
    5.55 -         (if record
    5.56 +         (when record
    5.57             (Capture/captureVideo
    5.58              world
    5.59              (dir! (File. record "main-view"))))
    5.60 @@ -321,13 +308,13 @@
    5.61               (experience-watch @experiences))
    5.62             (muscle-display
    5.63              muscle-data
    5.64 -            (if record (dir! (File. record "muscle"))))
    5.65 +            (when record (dir! (File. record "muscle"))))
    5.66             (prop-display
    5.67              proprioception-data
    5.68 -            (if record (dir! (File. record "proprio"))))
    5.69 +            (when record (dir! (File. record "proprio"))))
    5.70             (touch-display 
    5.71              touch-data
    5.72 -            (if record (dir! (File. record "touch")))))))))
    5.73 +            (when record (dir! (File. record "touch")))))))))
    5.74  
    5.75  
    5.76  
    5.77 @@ -406,22 +393,37 @@
    5.78    (def phi-scan (gen-phi-scan phi-space))
    5.79    )
    5.80  
    5.81 -
    5.82 -
    5.83 +;; (defn infer-nils-dyl [s]
    5.84 +;;   (loop [closed ()
    5.85 +;;          open s
    5.86 +;;          anchor 0]
    5.87 +;;     (if-not (empty? open)
    5.88 +;;       (recur (conj closed
    5.89 +;;                    (or (peek open)
    5.90 +;;                        anchor))
    5.91 +;;              (pop open)
    5.92 +;;              (or (peek open) anchor))
    5.93 +;;        closed)))
    5.94 +      
    5.95 +;; (defn infer-nils [s]
    5.96 +;;   (for [i (range (count s))]
    5.97 +;;     (or (get s i)
    5.98 +;;         (some (comp not nil?) (vector:last-n (- (count s) i)))
    5.99 +;;         0)))
   5.100  
   5.101  
   5.102  (defn infer-nils
   5.103    "Replace nils with the next available non-nil element in the
   5.104     sequence, or barring that, 0."
   5.105    [s]
   5.106 -  (loop [i (dec (count s)) v (transient s)]
   5.107 -    (if (= i 0) (persistent! v)
   5.108 -        (let [cur (v i)]
   5.109 -          (if cur
   5.110 -            (if (get v (dec i) 0)
   5.111 -              (recur (dec i) v)
   5.112 -              (recur (dec i) (assoc! v (dec i) cur)))
   5.113 -            (recur i (assoc! v i 0)))))))
   5.114 +  (loop [i (dec (count s))
   5.115 +         v (transient s)]
   5.116 +    (if (zero? i) (persistent! v)
   5.117 +        (if-let [cur (v i)]
   5.118 +          (if (get v (dec i) 0)
   5.119 +            (recur (dec i) v)
   5.120 +            (recur (dec i) (assoc! v (dec i) cur)))
   5.121 +          (recur i (assoc! v i 0))))))
   5.122  
   5.123  ;; tests
   5.124  
     6.1 --- a/thesis/aux/org/first-chapter.html	Fri Mar 21 20:56:56 2014 -0400
     6.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.3 @@ -1,455 +0,0 @@
     6.4 -<?xml version="1.0" encoding="utf-8"?>
     6.5 -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
     6.6 -               "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
     6.7 -<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
     6.8 -<head>
     6.9 -<title><code>CORTEX</code></title>
    6.10 -<meta http-equiv="Content-Type" content="text/html;charset=utf-8"/>
    6.11 -<meta name="title" content="<code>CORTEX</code>"/>
    6.12 -<meta name="generator" content="Org-mode"/>
    6.13 -<meta name="generated" content="2013-11-07 04:21:29 EST"/>
    6.14 -<meta name="author" content="Robert McIntyre"/>
    6.15 -<meta name="description" content="Using embodied AI to facilitate Artificial Imagination."/>
    6.16 -<meta name="keywords" content="AI, clojure, embodiment"/>
    6.17 -<style type="text/css">
    6.18 - <!--/*--><![CDATA[/*><!--*/
    6.19 -  html { font-family: Times, serif; font-size: 12pt; }
    6.20 -  .title  { text-align: center; }
    6.21 -  .todo   { color: red; }
    6.22 -  .done   { color: green; }
    6.23 -  .tag    { background-color: #add8e6; font-weight:normal }
    6.24 -  .target { }
    6.25 -  .timestamp { color: #bebebe; }
    6.26 -  .timestamp-kwd { color: #5f9ea0; }
    6.27 -  .right  {margin-left:auto; margin-right:0px;  text-align:right;}
    6.28 -  .left   {margin-left:0px;  margin-right:auto; text-align:left;}
    6.29 -  .center {margin-left:auto; margin-right:auto; text-align:center;}
    6.30 -  p.verse { margin-left: 3% }
    6.31 -  pre {
    6.32 -	border: 1pt solid #AEBDCC;
    6.33 -	background-color: #F3F5F7;
    6.34 -	padding: 5pt;
    6.35 -	font-family: courier, monospace;
    6.36 -        font-size: 90%;
    6.37 -        overflow:auto;
    6.38 -  }
    6.39 -  table { border-collapse: collapse; }
    6.40 -  td, th { vertical-align: top;  }
    6.41 -  th.right  { text-align:center;  }
    6.42 -  th.left   { text-align:center;   }
    6.43 -  th.center { text-align:center; }
    6.44 -  td.right  { text-align:right;  }
    6.45 -  td.left   { text-align:left;   }
    6.46 -  td.center { text-align:center; }
    6.47 -  dt { font-weight: bold; }
    6.48 -  div.figure { padding: 0.5em; }
    6.49 -  div.figure p { text-align: center; }
    6.50 -  div.inlinetask {
    6.51 -    padding:10px;
    6.52 -    border:2px solid gray;
    6.53 -    margin:10px;
    6.54 -    background: #ffffcc;
    6.55 -  }
    6.56 -  textarea { overflow-x: auto; }
    6.57 -  .linenr { font-size:smaller }
    6.58 -  .code-highlighted {background-color:#ffff00;}
    6.59 -  .org-info-js_info-navigation { border-style:none; }
    6.60 -  #org-info-js_console-label { font-size:10px; font-weight:bold;
    6.61 -                               white-space:nowrap; }
    6.62 -  .org-info-js_search-highlight {background-color:#ffff00; color:#000000;
    6.63 -                                 font-weight:bold; }
    6.64 -  /*]]>*/-->
    6.65 -</style>
    6.66 -<script type="text/javascript">var _gaq = _gaq || [];_gaq.push(['_setAccount', 'UA-31261312-1']);_gaq.push(['_trackPageview']);(function() {var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);})();</script><link rel="stylesheet" type="text/css" href="../../aurellem/css/argentum.css" />
    6.67 -<script type="text/javascript">
    6.68 -<!--/*--><![CDATA[/*><!--*/
    6.69 - function CodeHighlightOn(elem, id)
    6.70 - {
    6.71 -   var target = document.getElementById(id);
    6.72 -   if(null != target) {
    6.73 -     elem.cacheClassElem = elem.className;
    6.74 -     elem.cacheClassTarget = target.className;
    6.75 -     target.className = "code-highlighted";
    6.76 -     elem.className   = "code-highlighted";
    6.77 -   }
    6.78 - }
    6.79 - function CodeHighlightOff(elem, id)
    6.80 - {
    6.81 -   var target = document.getElementById(id);
    6.82 -   if(elem.cacheClassElem)
    6.83 -     elem.className = elem.cacheClassElem;
    6.84 -   if(elem.cacheClassTarget)
    6.85 -     target.className = elem.cacheClassTarget;
    6.86 - }
    6.87 -/*]]>*///-->
    6.88 -</script>
    6.89 -
    6.90 -</head>
    6.91 -<body>
    6.92 -
    6.93 -
    6.94 -<div id="content">
    6.95 -<h1 class="title"><code>CORTEX</code></h1>
    6.96 -
    6.97 -
    6.98 -<div class="header">
    6.99 -  <div class="float-right">	
   6.100 -    <!-- 
   6.101 -    <form>
   6.102 -      <input type="text"/><input type="submit" value="search the blog &raquo;"/> 
   6.103 -    </form>
   6.104 -    -->
   6.105 -  </div>
   6.106 -
   6.107 -  <h1>aurellem <em>&#x2609;</em></h1>
   6.108 -  <ul class="nav">
   6.109 -    <li><a href="/">read the blog &raquo;</a></li>
   6.110 -    <!-- li><a href="#">learn about us &raquo;</a></li-->
   6.111 -  </ul>
   6.112 -</div>
   6.113 -
   6.114 -<div class="author">Written by <author>Robert McIntyre</author></div>
   6.115 -
   6.116 -
   6.117 -
   6.118 -
   6.119 -
   6.120 -
   6.121 -
   6.122 -<div id="outline-container-1" class="outline-2">
   6.123 -<h2 id="sec-1">Artificial Imagination</h2>
   6.124 -<div class="outline-text-2" id="text-1">
   6.125 -
   6.126 -
   6.127 -<p>
   6.128 -  Imagine watching a video of someone skateboarding. When you watch
   6.129 -  the video, you can imagine yourself skateboarding, and your
   6.130 -  knowledge of the human body and its dynamics guides your
   6.131 -  interpretation of the scene. For example, even if the skateboarder
   6.132 -  is partially occluded, you can infer the positions of his arms and
   6.133 -  body from your own knowledge of how your body would be positioned if
   6.134 -  you were skateboarding. If the skateboarder suffers an accident, you
   6.135 -  wince in sympathy, imagining the pain your own body would experience
   6.136 -  if it were in the same situation. This empathy with other people
   6.137 -  guides our understanding of whatever they are doing because it is a
   6.138 -  powerful constraint on what is probable and possible. In order to
   6.139 -  make use of this powerful empathy constraint, I need a system that
   6.140 -  can generate and make sense of sensory data from the many different
   6.141 -  senses that humans possess. The two key proprieties of such a system
   6.142 -  are <i>embodiment</i> and <i>imagination</i>.
   6.143 -</p>
   6.144 -
   6.145 -</div>
   6.146 -
   6.147 -<div id="outline-container-1-1" class="outline-3">
   6.148 -<h3 id="sec-1-1">What is imagination?</h3>
   6.149 -<div class="outline-text-3" id="text-1-1">
   6.150 -
   6.151 -
   6.152 -<p>
   6.153 -   One kind of imagination is <i>sympathetic</i> imagination: you imagine
   6.154 -   yourself in the position of something/someone you are
   6.155 -   observing. This type of imagination comes into play when you follow
   6.156 -   along visually when watching someone perform actions, or when you
   6.157 -   sympathetically grimace when someone hurts themselves. This type of
   6.158 -   imagination uses the constraints you have learned about your own
   6.159 -   body to highly constrain the possibilities in whatever you are
   6.160 -   seeing. It uses all your senses to including your senses of touch,
   6.161 -   proprioception, etc. Humans are flexible when it comes to "putting
   6.162 -   themselves in another's shoes," and can sympathetically understand
   6.163 -   not only other humans, but entities ranging animals to cartoon
   6.164 -   characters to <a href="http://www.youtube.com/watch?v=0jz4HcwTQmU">single dots</a> on a screen!
   6.165 -</p>
   6.166 -<p>
   6.167 -   Another kind of imagination is <i>predictive</i> imagination: you
   6.168 -   construct scenes in your mind that are not entirely related to
   6.169 -   whatever you are observing, but instead are predictions of the
   6.170 -   future or simply flights of fancy. You use this type of imagination
   6.171 -   to plan out multi-step actions, or play out dangerous situations in
   6.172 -   your mind so as to avoid messing them up in reality.
   6.173 -</p>
   6.174 -<p>
   6.175 -   Of course, sympathetic and predictive imagination blend into each
   6.176 -   other and are not completely separate concepts. One dimension along
   6.177 -   which you can distinguish types of imagination is dependence on raw
   6.178 -   sense data. Sympathetic imagination is highly constrained by your
   6.179 -   senses, while predictive imagination can be more or less dependent
   6.180 -   on your senses depending on how far ahead you imagine. Daydreaming
   6.181 -   is an extreme form of predictive imagination that wanders through
   6.182 -   different possibilities without concern for whether they are
   6.183 -   related to whatever is happening in reality.
   6.184 -</p>
   6.185 -<p>
   6.186 -   For this thesis, I will mostly focus on sympathetic imagination and
   6.187 -   the constraint it provides for understanding sensory data.
   6.188 -</p>
   6.189 -</div>
   6.190 -
   6.191 -</div>
   6.192 -
   6.193 -<div id="outline-container-1-2" class="outline-3">
   6.194 -<h3 id="sec-1-2">What problems can imagination solve?</h3>
   6.195 -<div class="outline-text-3" id="text-1-2">
   6.196 -
   6.197 -
   6.198 -<p>
   6.199 -   Consider a video of a cat drinking some water.
   6.200 -</p>
   6.201 -
   6.202 -<div class="figure">
   6.203 -<p><img src="../images/cat-drinking.jpg"  alt="../images/cat-drinking.jpg" /></p>
   6.204 -<p>A cat drinking some water. Identifying this action is beyond the state of the art for computers.</p>
   6.205 -</div>
   6.206 -
   6.207 -<p>
   6.208 -   It is currently impossible for any computer program to reliably
   6.209 -   label such an video as "drinking". I think humans are able to label
   6.210 -   such video as "drinking" because they imagine <i>themselves</i> as the
   6.211 -   cat, and imagine putting their face up against a stream of water
   6.212 -   and sticking out their tongue. In that imagined world, they can
   6.213 -   feel the cool water hitting their tongue, and feel the water
   6.214 -   entering their body, and are able to recognize that <i>feeling</i> as
   6.215 -   drinking. So, the label of the action is not really in the pixels
   6.216 -   of the image, but is found clearly in a simulation inspired by
   6.217 -   those pixels. An imaginative system, having been trained on
   6.218 -   drinking and non-drinking examples and learning that the most
   6.219 -   important component of drinking is the feeling of water sliding
   6.220 -   down one's throat, would analyze a video of a cat drinking in the
   6.221 -   following manner:
   6.222 -</p>
   6.223 -<ul>
   6.224 -<li>Create a physical model of the video by putting a "fuzzy" model
   6.225 -     of its own body in place of the cat. Also, create a simulation of
   6.226 -     the stream of water.
   6.227 -
   6.228 -</li>
   6.229 -<li>Play out this simulated scene and generate imagined sensory
   6.230 -     experience. This will include relevant muscle contractions, a
   6.231 -     close up view of the stream from the cat's perspective, and most
   6.232 -     importantly, the imagined feeling of water entering the mouth.
   6.233 -
   6.234 -</li>
   6.235 -<li>The action is now easily identified as drinking by the sense of
   6.236 -     taste alone. The other senses (such as the tongue moving in and
   6.237 -     out) help to give plausibility to the simulated action. Note that
   6.238 -     the sense of vision, while critical in creating the simulation,
   6.239 -     is not critical for identifying the action from the simulation.
   6.240 -</li>
   6.241 -</ul>
   6.242 -
   6.243 -
   6.244 -<p>
   6.245 -   More generally, I expect imaginative systems to be particularly
   6.246 -   good at identifying embodied actions in videos.
   6.247 -</p>
   6.248 -</div>
   6.249 -</div>
   6.250 -
   6.251 -</div>
   6.252 -
   6.253 -<div id="outline-container-2" class="outline-2">
   6.254 -<h2 id="sec-2">Cortex</h2>
   6.255 -<div class="outline-text-2" id="text-2">
   6.256 -
   6.257 -
   6.258 -<p>
   6.259 -  The previous example involves liquids, the sense of taste, and
   6.260 -  imagining oneself as a cat. For this thesis I constrain myself to
   6.261 -  simpler, more easily digitizable senses and situations.
   6.262 -</p>
   6.263 -<p>
   6.264 -  My system, <code>Cortex</code> performs imagination in two different simplified
   6.265 -  worlds: <i>worm world</i> and <i>stick figure world</i>. In each of these
   6.266 -  worlds, entities capable of imagination recognize actions by
   6.267 -  simulating the experience from their own perspective, and then
   6.268 -  recognizing the action from a database of examples.
   6.269 -</p>
   6.270 -<p>
   6.271 -  In order to serve as a framework for experiments in imagination,
   6.272 -  <code>Cortex</code> requires simulated bodies, worlds, and senses like vision,
   6.273 -  hearing, touch, proprioception, etc.
   6.274 -</p>
   6.275 -
   6.276 -</div>
   6.277 -
   6.278 -<div id="outline-container-2-1" class="outline-3">
   6.279 -<h3 id="sec-2-1">A Video Game Engine takes care of some of the groundwork</h3>
   6.280 -<div class="outline-text-3" id="text-2-1">
   6.281 -
   6.282 -
   6.283 -<p>
   6.284 -   When it comes to simulation environments, the engines used to
   6.285 -   create the worlds in video games offer top-notch physics and
   6.286 -   graphics support. These engines also have limited support for
   6.287 -   creating cameras and rendering 3D sound, which can be repurposed
   6.288 -   for vision and hearing respectively. Physics collision detection
   6.289 -   can be expanded to create a sense of touch.
   6.290 -</p>
   6.291 -<p>   
   6.292 -   jMonkeyEngine3 is one such engine for creating video games in
   6.293 -   Java. It uses OpenGL to render to the screen and uses screengraphs
   6.294 -   to avoid drawing things that do not appear on the screen. It has an
   6.295 -   active community and several games in the pipeline. The engine was
   6.296 -   not built to serve any particular game but is instead meant to be
   6.297 -   used for any 3D game. I chose jMonkeyEngine3 it because it had the
   6.298 -   most features out of all the open projects I looked at, and because
   6.299 -   I could then write my code in Clojure, an implementation of LISP
   6.300 -   that runs on the JVM.
   6.301 -</p>
   6.302 -</div>
   6.303 -
   6.304 -</div>
   6.305 -
   6.306 -<div id="outline-container-2-2" class="outline-3">
   6.307 -<h3 id="sec-2-2"><code>CORTEX</code> Extends jMonkeyEngine3 to implement rich senses</h3>
   6.308 -<div class="outline-text-3" id="text-2-2">
   6.309 -
   6.310 -
   6.311 -<p>
   6.312 -   Using the game-making primitives provided by jMonkeyEngine3, I have
   6.313 -   constructed every major human sense except for smell and
   6.314 -   taste. <code>Cortex</code> also provides an interface for creating creatures
   6.315 -   in Blender, a 3D modeling environment, and then "rigging" the
   6.316 -   creatures with senses using 3D annotations in Blender. A creature
   6.317 -   can have any number of senses, and there can be any number of
   6.318 -   creatures in a simulation.
   6.319 -</p>
   6.320 -<p>   
   6.321 -   The senses available in <code>Cortex</code> are:
   6.322 -</p>
   6.323 -<ul>
   6.324 -<li><a href="../../cortex/html/vision.html">Vision</a>
   6.325 -</li>
   6.326 -<li><a href="../../cortex/html/hearing.html">Hearing</a>
   6.327 -</li>
   6.328 -<li><a href="../../cortex/html/touch.html">Touch</a>
   6.329 -</li>
   6.330 -<li><a href="../../cortex/html/proprioception.html">Proprioception</a>
   6.331 -</li>
   6.332 -<li><a href="../../cortex/html/movement.html">Muscle Tension</a>
   6.333 -</li>
   6.334 -</ul>
   6.335 -
   6.336 -
   6.337 -</div>
   6.338 -</div>
   6.339 -
   6.340 -</div>
   6.341 -
   6.342 -<div id="outline-container-3" class="outline-2">
   6.343 -<h2 id="sec-3">A roadmap for <code>Cortex</code> experiments</h2>
   6.344 -<div class="outline-text-2" id="text-3">
   6.345 -
   6.346 -
   6.347 -
   6.348 -</div>
   6.349 -
   6.350 -<div id="outline-container-3-1" class="outline-3">
   6.351 -<h3 id="sec-3-1">Worm World</h3>
   6.352 -<div class="outline-text-3" id="text-3-1">
   6.353 -
   6.354 -
   6.355 -<p>
   6.356 -   Worms in <code>Cortex</code> are segmented creatures which vary in length and
   6.357 -   number of segments, and have the senses of vision, proprioception,
   6.358 -   touch, and muscle tension.
   6.359 -</p>
   6.360 -
   6.361 -<div class="figure">
   6.362 -<p><img src="../images/finger-UV.png" width=755 alt="../images/finger-UV.png" /></p>
   6.363 -<p>This is the tactile-sensor-profile for the upper segment of a worm. It defines regions of high touch sensitivity (where there are many white pixels) and regions of low sensitivity (where white pixels are sparse).</p>
   6.364 -</div>
   6.365 -
   6.366 -
   6.367 -
   6.368 -
   6.369 -<div class="figure">
   6.370 -  <center>
   6.371 -    <video controls="controls" width="550">
   6.372 -      <source src="../video/worm-touch.ogg" type="video/ogg"
   6.373 -              preload="none" />
   6.374 -    </video>
   6.375 -    <br> <a href="http://youtu.be/RHx2wqzNVcU"> YouTube </a>
   6.376 -  </center>
   6.377 -  <p>The worm responds to touch.</p>
   6.378 -</div>
   6.379 -
   6.380 -<div class="figure">
   6.381 -  <center>
   6.382 -    <video controls="controls" width="550">
   6.383 -      <source src="../video/test-proprioception.ogg" type="video/ogg"
   6.384 -              preload="none" />
   6.385 -    </video>
   6.386 -    <br> <a href="http://youtu.be/JjdDmyM8b0w"> YouTube </a>
   6.387 -  </center>
   6.388 -  <p>Proprioception in a worm. The proprioceptive readout is
   6.389 -    in the upper left corner of the screen.</p>
   6.390 -</div>
   6.391 -
   6.392 -<p>
   6.393 -   A worm is trained in various actions such as sinusoidal movement,
   6.394 -   curling, flailing, and spinning by directly playing motor
   6.395 -   contractions while the worm "feels" the experience. These actions
   6.396 -   are recorded both as vectors of muscle tension, touch, and
   6.397 -   proprioceptive data, but also in higher level forms such as
   6.398 -   frequencies of the various contractions and a symbolic name for the
   6.399 -   action.
   6.400 -</p>
   6.401 -<p>
   6.402 -   Then, the worm watches a video of another worm performing one of
   6.403 -   the actions, and must judge which action was performed. Normally
   6.404 -   this would be an extremely difficult problem, but the worm is able
   6.405 -   to greatly diminish the search space through sympathetic
   6.406 -   imagination. First, it creates an imagined copy of its body which
   6.407 -   it observes from a third person point of view. Then for each frame
   6.408 -   of the video, it maneuvers its simulated body to be in registration
   6.409 -   with the worm depicted in the video. The physical constraints
   6.410 -   imposed by the physics simulation greatly decrease the number of
   6.411 -   poses that have to be tried, making the search feasible. As the
   6.412 -   imaginary worm moves, it generates imaginary muscle tension and
   6.413 -   proprioceptive sensations. The worm determines the action not by
   6.414 -   vision, but by matching the imagined proprioceptive data with
   6.415 -   previous examples.
   6.416 -</p>
   6.417 -<p>
   6.418 -   By using non-visual sensory data such as touch, the worms can also
   6.419 -   answer body related questions such as "did your head touch your
   6.420 -   tail?" and "did worm A touch worm B?"
   6.421 -</p>
   6.422 -<p>
   6.423 -   The proprioceptive information used for action identification is
   6.424 -   body-centric, so only the registration step is dependent on point
   6.425 -   of view, not the identification step. Registration is not specific
   6.426 -   to any particular action. Thus, action identification can be
   6.427 -   divided into a point-of-view dependent generic registration step,
   6.428 -   and a action-specific step that is body-centered and invariant to
   6.429 -   point of view.
   6.430 -</p>
   6.431 -</div>
   6.432 -
   6.433 -</div>
   6.434 -
   6.435 -<div id="outline-container-3-2" class="outline-3">
   6.436 -<h3 id="sec-3-2">Stick Figure World</h3>
   6.437 -<div class="outline-text-3" id="text-3-2">
   6.438 -
   6.439 -
   6.440 -<p>
   6.441 -   This environment is similar to Worm World, except the creatures are
   6.442 -   more complicated and the actions and questions more varied. It is
   6.443 -   an experiment to see how far imagination can go in interpreting
   6.444 -   actions.  
   6.445 -</p></div>
   6.446 -</div>
   6.447 -</div>
   6.448 -</div>
   6.449 -
   6.450 -<div id="postamble">
   6.451 -<p class="date">Date: 2013-11-07 04:21:29 EST</p>
   6.452 -<p class="author">Author: Robert McIntyre</p>
   6.453 -<p class="creator">Org version 7.7 with Emacs version 24</p>
   6.454 -<a href="http://validator.w3.org/check?uri=referer">Validate XHTML 1.0</a>
   6.455 -
   6.456 -</div>
   6.457 -</body>
   6.458 -</html>
     7.1 --- a/thesis/aux/org/first-chapter.org	Fri Mar 21 20:56:56 2014 -0400
     7.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.3 @@ -1,241 +0,0 @@
     7.4 -#+title: =CORTEX=
     7.5 -#+author: Robert McIntyre
     7.6 -#+email: rlm@mit.edu
     7.7 -#+description: Using embodied AI to facilitate Artificial Imagination.
     7.8 -#+keywords: AI, clojure, embodiment
     7.9 -#+SETUPFILE: ../../aurellem/org/setup.org
    7.10 -#+INCLUDE: ../../aurellem/org/level-0.org
    7.11 -#+babel: :mkdirp yes :noweb yes :exports both
    7.12 -#+OPTIONS: toc:nil, num:nil
    7.13 -
    7.14 -* Artificial Imagination
    7.15 -  Imagine watching a video of someone skateboarding. When you watch
    7.16 -  the video, you can imagine yourself skateboarding, and your
    7.17 -  knowledge of the human body and its dynamics guides your
    7.18 -  interpretation of the scene. For example, even if the skateboarder
    7.19 -  is partially occluded, you can infer the positions of his arms and
    7.20 -  body from your own knowledge of how your body would be positioned if
    7.21 -  you were skateboarding. If the skateboarder suffers an accident, you
    7.22 -  wince in sympathy, imagining the pain your own body would experience
    7.23 -  if it were in the same situation. This empathy with other people
    7.24 -  guides our understanding of whatever they are doing because it is a
    7.25 -  powerful constraint on what is probable and possible. In order to
    7.26 -  make use of this powerful empathy constraint, I need a system that
    7.27 -  can generate and make sense of sensory data from the many different
    7.28 -  senses that humans possess. The two key proprieties of such a system
    7.29 -  are /embodiment/ and /imagination/.
    7.30 -
    7.31 -** What is imagination?
    7.32 -
    7.33 -   One kind of imagination is /sympathetic/ imagination: you imagine
    7.34 -   yourself in the position of something/someone you are
    7.35 -   observing. This type of imagination comes into play when you follow
    7.36 -   along visually when watching someone perform actions, or when you
    7.37 -   sympathetically grimace when someone hurts themselves. This type of
    7.38 -   imagination uses the constraints you have learned about your own
    7.39 -   body to highly constrain the possibilities in whatever you are
    7.40 -   seeing. It uses all your senses to including your senses of touch,
    7.41 -   proprioception, etc. Humans are flexible when it comes to "putting
    7.42 -   themselves in another's shoes," and can sympathetically understand
    7.43 -   not only other humans, but entities ranging from animals to cartoon
    7.44 -   characters to [[http://www.youtube.com/watch?v=0jz4HcwTQmU][single dots]] on a screen!
    7.45 -
    7.46 -# and can infer intention from the actions of not only other humans,
    7.47 -# but also animals, cartoon characters, and even abstract moving dots
    7.48 -# on a screen!
    7.49 -
    7.50 -   Another kind of imagination is /predictive/ imagination: you
    7.51 -   construct scenes in your mind that are not entirely related to
    7.52 -   whatever you are observing, but instead are predictions of the
    7.53 -   future or simply flights of fancy. You use this type of imagination
    7.54 -   to plan out multi-step actions, or play out dangerous situations in
    7.55 -   your mind so as to avoid messing them up in reality.
    7.56 -
    7.57 -   Of course, sympathetic and predictive imagination blend into each
    7.58 -   other and are not completely separate concepts. One dimension along
    7.59 -   which you can distinguish types of imagination is dependence on raw
    7.60 -   sense data. Sympathetic imagination is highly constrained by your
    7.61 -   senses, while predictive imagination can be more or less dependent
    7.62 -   on your senses depending on how far ahead you imagine. Daydreaming
    7.63 -   is an extreme form of predictive imagination that wanders through
    7.64 -   different possibilities without concern for whether they are
    7.65 -   related to whatever is happening in reality.
    7.66 -
    7.67 -   For this thesis, I will mostly focus on sympathetic imagination and
    7.68 -   the constraint it provides for understanding sensory data.
    7.69 -   
    7.70 -** What problems can imagination solve?
    7.71 -
    7.72 -   Consider a video of a cat drinking some water.
    7.73 -
    7.74 -   #+caption: A cat drinking some water. Identifying this action is beyond the state of the art for computers.
    7.75 -   #+ATTR_LaTeX: width=5cm
    7.76 -   [[../images/cat-drinking.jpg]]
    7.77 -
    7.78 -   It is currently impossible for any computer program to reliably
    7.79 -   label such an video as "drinking". I think humans are able to label
    7.80 -   such video as "drinking" because they imagine /themselves/ as the
    7.81 -   cat, and imagine putting their face up against a stream of water
    7.82 -   and sticking out their tongue. In that imagined world, they can
    7.83 -   feel the cool water hitting their tongue, and feel the water
    7.84 -   entering their body, and are able to recognize that /feeling/ as
    7.85 -   drinking. So, the label of the action is not really in the pixels
    7.86 -   of the image, but is found clearly in a simulation inspired by
    7.87 -   those pixels. An imaginative system, having been trained on
    7.88 -   drinking and non-drinking examples and learning that the most
    7.89 -   important component of drinking is the feeling of water sliding
    7.90 -   down one's throat, would analyze a video of a cat drinking in the
    7.91 -   following manner:
    7.92 -   
    7.93 -   - Create a physical model of the video by putting a "fuzzy" model
    7.94 -     of its own body in place of the cat. Also, create a simulation of
    7.95 -     the stream of water.
    7.96 -
    7.97 -   - Play out this simulated scene and generate imagined sensory
    7.98 -     experience. This will include relevant muscle contractions, a
    7.99 -     close up view of the stream from the cat's perspective, and most
   7.100 -     importantly, the imagined feeling of water entering the mouth.
   7.101 -
   7.102 -   - The action is now easily identified as drinking by the sense of
   7.103 -     taste alone. The other senses (such as the tongue moving in and
   7.104 -     out) help to give plausibility to the simulated action. Note that
   7.105 -     the sense of vision, while critical in creating the simulation,
   7.106 -     is not critical for identifying the action from the simulation.
   7.107 -
   7.108 -   More generally, I expect imaginative systems to be particularly
   7.109 -   good at identifying embodied actions in videos.
   7.110 -
   7.111 -* Cortex
   7.112 -
   7.113 -  The previous example involves liquids, the sense of taste, and
   7.114 -  imagining oneself as a cat. For this thesis I constrain myself to
   7.115 -  simpler, more easily digitizable senses and situations.
   7.116 -
   7.117 -  My system, =CORTEX= performs imagination in two different simplified
   7.118 -  worlds: /worm world/ and /stick-figure world/. In each of these
   7.119 -  worlds, entities capable of imagination recognize actions by
   7.120 -  simulating the experience from their own perspective, and then
   7.121 -  recognizing the action from a database of examples.
   7.122 -
   7.123 -  In order to serve as a framework for experiments in imagination,
   7.124 -  =CORTEX= requires simulated bodies, worlds, and senses like vision,
   7.125 -  hearing, touch, proprioception, etc.
   7.126 -
   7.127 -** A Video Game Engine takes care of some of the groundwork
   7.128 -
   7.129 -   When it comes to simulation environments, the engines used to
   7.130 -   create the worlds in video games offer top-notch physics and
   7.131 -   graphics support. These engines also have limited support for
   7.132 -   creating cameras and rendering 3D sound, which can be repurposed
   7.133 -   for vision and hearing respectively. Physics collision detection
   7.134 -   can be expanded to create a sense of touch.
   7.135 -   
   7.136 -   jMonkeyEngine3 is one such engine for creating video games in
   7.137 -   Java. It uses OpenGL to render to the screen and uses screengraphs
   7.138 -   to avoid drawing things that do not appear on the screen. It has an
   7.139 -   active community and several games in the pipeline. The engine was
   7.140 -   not built to serve any particular game but is instead meant to be
   7.141 -   used for any 3D game. I chose jMonkeyEngine3 it because it had the
   7.142 -   most features out of all the open projects I looked at, and because
   7.143 -   I could then write my code in Clojure, an implementation of LISP
   7.144 -   that runs on the JVM.
   7.145 -
   7.146 -** =CORTEX= Extends jMonkeyEngine3 to implement rich senses
   7.147 -
   7.148 -   Using the game-making primitives provided by jMonkeyEngine3, I have
   7.149 -   constructed every major human sense except for smell and
   7.150 -   taste. =CORTEX= also provides an interface for creating creatures
   7.151 -   in Blender, a 3D modeling environment, and then "rigging" the
   7.152 -   creatures with senses using 3D annotations in Blender. A creature
   7.153 -   can have any number of senses, and there can be any number of
   7.154 -   creatures in a simulation.
   7.155 -   
   7.156 -   The senses available in =CORTEX= are:
   7.157 -
   7.158 -   - [[../../cortex/html/vision.html][Vision]]
   7.159 -   - [[../../cortex/html/hearing.html][Hearing]]
   7.160 -   - [[../../cortex/html/touch.html][Touch]]
   7.161 -   - [[../../cortex/html/proprioception.html][Proprioception]]
   7.162 -   - [[../../cortex/html/movement.html][Muscle Tension]]
   7.163 -
   7.164 -* A roadmap for =CORTEX= experiments
   7.165 -
   7.166 -** Worm World
   7.167 -
   7.168 -   Worms in =CORTEX= are segmented creatures which vary in length and
   7.169 -   number of segments, and have the senses of vision, proprioception,
   7.170 -   touch, and muscle tension.
   7.171 -
   7.172 -#+attr_html: width=755
   7.173 -#+caption: This is the tactile-sensor-profile for the upper segment of a worm. It defines regions of high touch sensitivity (where there are many white pixels) and regions of low sensitivity (where white pixels are sparse).
   7.174 -[[../images/finger-UV.png]]
   7.175 -
   7.176 -
   7.177 -#+begin_html
   7.178 -<div class="figure">
   7.179 -  <center>
   7.180 -    <video controls="controls" width="550">
   7.181 -      <source src="../video/worm-touch.ogg" type="video/ogg"
   7.182 -	      preload="none" />
   7.183 -    </video>
   7.184 -    <br> <a href="http://youtu.be/RHx2wqzNVcU"> YouTube </a>
   7.185 -  </center>
   7.186 -  <p>The worm responds to touch.</p>
   7.187 -</div>
   7.188 -#+end_html
   7.189 -
   7.190 -#+begin_html
   7.191 -<div class="figure">
   7.192 -  <center>
   7.193 -    <video controls="controls" width="550">
   7.194 -      <source src="../video/test-proprioception.ogg" type="video/ogg"
   7.195 -	      preload="none" />
   7.196 -    </video>
   7.197 -    <br> <a href="http://youtu.be/JjdDmyM8b0w"> YouTube </a>
   7.198 -  </center>
   7.199 -  <p>Proprioception in a worm. The proprioceptive readout is
   7.200 -    in the upper left corner of the screen.</p>
   7.201 -</div>
   7.202 -#+end_html
   7.203 -
   7.204 -   A worm is trained in various actions such as sinusoidal movement,
   7.205 -   curling, flailing, and spinning by directly playing motor
   7.206 -   contractions while the worm "feels" the experience. These actions
   7.207 -   are recorded both as vectors of muscle tension, touch, and
   7.208 -   proprioceptive data, but also in higher level forms such as
   7.209 -   frequencies of the various contractions and a symbolic name for the
   7.210 -   action.
   7.211 -
   7.212 -   Then, the worm watches a video of another worm performing one of
   7.213 -   the actions, and must judge which action was performed. Normally
   7.214 -   this would be an extremely difficult problem, but the worm is able
   7.215 -   to greatly diminish the search space through sympathetic
   7.216 -   imagination. First, it creates an imagined copy of its body which
   7.217 -   it observes from a third person point of view. Then for each frame
   7.218 -   of the video, it maneuvers its simulated body to be in registration
   7.219 -   with the worm depicted in the video. The physical constraints
   7.220 -   imposed by the physics simulation greatly decrease the number of
   7.221 -   poses that have to be tried, making the search feasible. As the
   7.222 -   imaginary worm moves, it generates imaginary muscle tension and
   7.223 -   proprioceptive sensations. The worm determines the action not by
   7.224 -   vision, but by matching the imagined proprioceptive data with
   7.225 -   previous examples.
   7.226 -
   7.227 -   By using non-visual sensory data such as touch, the worms can also
   7.228 -   answer body related questions such as "did your head touch your
   7.229 -   tail?" and "did worm A touch worm B?"
   7.230 -
   7.231 -   The proprioceptive information used for action identification is
   7.232 -   body-centric, so only the registration step is dependent on point
   7.233 -   of view, not the identification step. Registration is not specific
   7.234 -   to any particular action. Thus, action identification can be
   7.235 -   divided into a point-of-view dependent generic registration step,
   7.236 -   and a action-specific step that is body-centered and invariant to
   7.237 -   point of view.
   7.238 -
   7.239 -** Stick Figure World
   7.240 -
   7.241 -   This environment is similar to Worm World, except the creatures are
   7.242 -   more complicated and the actions and questions more varied. It is
   7.243 -   an experiment to see how far imagination can go in interpreting
   7.244 -   actions.  
     8.1 --- a/thesis/aux/org/roadmap.org	Fri Mar 21 20:56:56 2014 -0400
     8.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.3 @@ -1,220 +0,0 @@
     8.4 -In order for this to be a reasonable thesis that I can be proud of,
     8.5 -what are the /minimum/ number of things I need to get done?
     8.6 -
     8.7 -
     8.8 -* worm OR hand registration
     8.9 -  - training from a few examples (2 to start out)
    8.10 -  - aligning the body with the scene
    8.11 -  - generating sensory data
    8.12 -  - matching previous labeled examples using dot-products or some
    8.13 -    other basic thing
    8.14 -  - showing that it works with different views
    8.15 -
    8.16 -* first draft
    8.17 -  - draft of thesis without bibliography or formatting
    8.18 -  - should have basic experiment and have full description of
    8.19 -    framework with code
    8.20 -  - review with Winston
    8.21 -  
    8.22 -* final draft
    8.23 -  - implement stretch goals from Winston if possible
    8.24 -  - complete final formatting and submit
    8.25 -
    8.26 -* CORTEX
    8.27 -  DEADLINE: <2014-05-09 Fri>
    8.28 -  SHIT THAT'S IN 67 DAYS!!!
    8.29 -
    8.30 -** program simple feature matching code for the worm's segments
    8.31 -
    8.32 -Subgoals:
    8.33 -*** DONE Get cortex working again, run tests, no jmonkeyengine updates
    8.34 -    CLOSED: [2014-03-03 Mon 22:07] SCHEDULED: <2014-03-03 Mon>
    8.35 -*** DONE get blender working again
    8.36 -    CLOSED: [2014-03-03 Mon 22:43] SCHEDULED: <2014-03-03 Mon>
    8.37 -*** DONE make sparce touch worm segment in blender
    8.38 -    CLOSED: [2014-03-03 Mon 23:16] SCHEDULED: <2014-03-03 Mon>
    8.39 -    CLOCK: [2014-03-03 Mon 22:44]--[2014-03-03 Mon 23:16] =>  0:32
    8.40 -*** DONE make multi-segment touch worm with touch sensors and display
    8.41 -    CLOSED: [2014-03-03 Mon 23:54] SCHEDULED: <2014-03-03 Mon>
    8.42 -
    8.43 -*** DONE Make a worm wiggle and curl
    8.44 -    CLOSED: [2014-03-04 Tue 23:03] SCHEDULED: <2014-03-04 Tue>
    8.45 -
    8.46 -
    8.47 -** First draft
    8.48 -
    8.49 -Subgoals:
    8.50 -*** Writeup new worm experiments.
    8.51 -*** Triage implementation code and get it into chapter form.
    8.52 -
    8.53 -
    8.54 -
    8.55 - 
    8.56 -
    8.57 -** for today
    8.58 -
    8.59 -- guided worm :: control the worm with the keyboard. Useful for
    8.60 -                 testing the body-centered recog scripts, and for
    8.61 -                 preparing a cool demo video.
    8.62 -
    8.63 -- body-centered recognition :: detect actions using hard coded
    8.64 -     body-centered scripts. 
    8.65 -
    8.66 -- cool demo video of the worm being moved and recognizing things ::
    8.67 -     will be a neat part of the thesis.
    8.68 -
    8.69 -- thesis export :: refactoring and organization of code so that it
    8.70 -                   spits out a thesis in addition to the web page.
    8.71 -
    8.72 -- video alignment :: analyze the frames of a video in order to align
    8.73 -     the worm. Requires body-centered recognition. Can "cheat".
    8.74 -
    8.75 -- smoother actions :: use debugging controls to directly influence the
    8.76 -     demo actions, and to generate recoginition procedures.
    8.77 -
    8.78 -- degenerate video demonstration :: show the system recognizing a
    8.79 -     curled worm from dead on. Crowning achievement of thesis.
    8.80 -
    8.81 -** Ordered from easiest to hardest
    8.82 -
    8.83 -Just report the positions of everything. I don't think that this
    8.84 -necessairly shows anything usefull.
    8.85 -
    8.86 -Worm-segment vision -- you initialize a view of the worm, but instead
    8.87 -of pixels you use labels via ray tracing. Has the advantage of still
    8.88 -allowing for visual occlusion, but reliably identifies the objects,
    8.89 -even without rainbow coloring. You can code this as an image. 
    8.90 -
    8.91 -Same as above, except just with worm/non-worm labels.
    8.92 -
    8.93 -Color code each worm segment and then recognize them using blob
    8.94 -detectors. Then you solve for the perspective and the action
    8.95 -simultaneously.
    8.96 -
    8.97 -The entire worm can be colored the same, high contrast color against a
    8.98 -nearly black background.
    8.99 -
   8.100 -"Rooted" vision. You give the exact coordinates of ONE piece of the
   8.101 -worm, but the algorithm figures out the rest.
   8.102 -
   8.103 -More rooted vision -- start off the entire worm with one posistion.
   8.104 -
   8.105 -The right way to do alignment is to use motion over multiple frames to
   8.106 -snap individual pieces of the model into place sharing and
   8.107 -propragating the individual alignments over the whole model. We also
   8.108 -want to limit the alignment search to just those actions we are
   8.109 -prepared to identify. This might mean that I need some small "micro
   8.110 -actions" such as the individual movements of the worm pieces.
   8.111 -
   8.112 -Get just the centers of each segment projected onto the imaging
   8.113 -plane. (best so far).
   8.114 -
   8.115 -
   8.116 -Repertoire of actions  +  video frames -->
   8.117 -   directed multi-frame-search alg
   8.118 -
   8.119 -
   8.120 -
   8.121 -
   8.122 -
   8.123 -
   8.124 -!! Could also have a bounding box around the worm provided by
   8.125 -filtering the worm/non-worm render, and use bbbgs. As a bonus, I get
   8.126 -to include bbbgs in my thesis! Could finally do that recursive things
   8.127 -where I make bounding boxes be those things that give results that
   8.128 -give good bounding boxes. If I did this I could use a disruptive
   8.129 -pattern on the worm.
   8.130 -
   8.131 -Re imagining using default textures is very simple for this system,
   8.132 -but hard for others.
   8.133 -
   8.134 -
   8.135 -Want to demonstrate, at minimum, alignment of some model of the worm
   8.136 -to the video, and a lookup of the action by simulated perception.
   8.137 -
   8.138 -note: the purple/white points is a very beautiful texture, because
   8.139 -when it moves slightly, the white dots look like they're
   8.140 -twinkling. Would look even better if it was a darker purple. Also
   8.141 -would look better more spread out.
   8.142 -
   8.143 -
   8.144 -embed assumption of one frame of view, search by moving around in
   8.145 -simulated world.
   8.146 -
   8.147 -Allowed to limit search by setting limits to a hemisphere around the
   8.148 -imagined worm! This limits scale also.
   8.149 -
   8.150 -
   8.151 -
   8.152 -
   8.153 -
   8.154 -!! Limited search with worm/non-worm rendering. 
   8.155 -How much inverse kinematics do we have to do?
   8.156 -What about cached (allowed state-space) paths, derived from labeled
   8.157 -training. You have to lead from one to another.
   8.158 -
   8.159 -What about initial state? Could start the input videos at a specific
   8.160 -state, then just match that explicitly.
   8.161 -
   8.162 -!! The training doesn't have to be labeled -- you can just move around
   8.163 -for a while!!
   8.164 -
   8.165 -!! Limited search with motion based alignment.
   8.166 -
   8.167 -
   8.168 -
   8.169 -
   8.170 -"play arounds" can establish a chain of linked sensoriums. Future
   8.171 -matches must fall into one of the already experienced things, and once
   8.172 -they do, it greatly limits the things that are possible in the future.
   8.173 -
   8.174 -
   8.175 -frame differences help to detect muscle exertion.
   8.176 -
   8.177 -Can try to match on a few "representative" frames. Can also just have
   8.178 -a few "bodies" in various states which we try to match.
   8.179 -
   8.180 -
   8.181 -
   8.182 -Paths through state-space have the exact same signature as
   8.183 -simulation. BUT, these can be searched in parallel and don't interfere
   8.184 -with each other.
   8.185 -
   8.186 -
   8.187 -
   8.188 -
   8.189 -** Final stretch up to First Draft
   8.190 -
   8.191 -*** DONE complete debug control of worm
   8.192 -    CLOSED: [2014-03-17 Mon 17:29] SCHEDULED: <2014-03-17 Mon>
   8.193 -    CLOCK: [2014-03-17 Mon 14:01]--[2014-03-17 Mon 17:29] =>  3:28
   8.194 -*** DONE add phi-space output to debug control
   8.195 -    CLOSED: [2014-03-17 Mon 17:42] SCHEDULED: <2014-03-17 Mon>
   8.196 -    CLOCK: [2014-03-17 Mon 17:31]--[2014-03-17 Mon 17:42] =>  0:11
   8.197 -
   8.198 -*** DONE complete automatic touch partitioning
   8.199 -    CLOSED: [2014-03-18 Tue 21:43] SCHEDULED: <2014-03-18 Tue>
   8.200 -*** DONE complete cyclic predicate
   8.201 -    CLOSED: [2014-03-19 Wed 16:34] SCHEDULED: <2014-03-18 Tue>
   8.202 -    CLOCK: [2014-03-19 Wed 13:16]--[2014-03-19 Wed 16:34] =>  3:18
   8.203 -*** DONE complete three phi-stream action predicatates; test them with debug control
   8.204 -    CLOSED: [2014-03-19 Wed 16:35] SCHEDULED: <2014-03-17 Mon>
   8.205 -    CLOCK: [2014-03-18 Tue 18:36]--[2014-03-18 Tue 21:43] =>  3:07
   8.206 -    CLOCK: [2014-03-18 Tue 18:34]--[2014-03-18 Tue 18:36] =>  0:02
   8.207 -    CLOCK: [2014-03-17 Mon 19:19]--[2014-03-17 Mon 21:19] =>  2:00
   8.208 -*** DONE build an automatic "do all the things" sequence.
   8.209 -    CLOSED: [2014-03-19 Wed 16:55] SCHEDULED: <2014-03-19 Wed>
   8.210 -    CLOCK: [2014-03-19 Wed 16:53]--[2014-03-19 Wed 16:55] =>  0:02
   8.211 -*** DONE implement proprioception based movement lookup in phi-space
   8.212 -    CLOSED: [2014-03-19 Wed 22:04] SCHEDULED: <2014-03-19 Wed>
   8.213 -    CLOCK: [2014-03-19 Wed 19:32]--[2014-03-19 Wed 22:04] =>  2:32
   8.214 -*** DONE make proprioception reference phi-space indexes
   8.215 -    CLOSED: [2014-03-19 Wed 22:47] SCHEDULED: <2014-03-19 Wed>
   8.216 -    CLOCK: [2014-03-19 Wed 22:07]
   8.217 -
   8.218 -
   8.219 -*** DONE create test videos, also record positions of worm segments
   8.220 -    CLOSED: [2014-03-20 Thu 22:02] SCHEDULED: <2014-03-19 Wed>
   8.221 -
   8.222 -*** TODO Collect intro, worm-learn and cortex creation into draft thesis. 
   8.223 -    
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/thesis/org/first-chapter.html	Sat Mar 22 16:10:34 2014 -0400
     9.3 @@ -0,0 +1,455 @@
     9.4 +<?xml version="1.0" encoding="utf-8"?>
     9.5 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
     9.6 +               "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
     9.7 +<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
     9.8 +<head>
     9.9 +<title><code>CORTEX</code></title>
    9.10 +<meta http-equiv="Content-Type" content="text/html;charset=utf-8"/>
    9.11 +<meta name="title" content="<code>CORTEX</code>"/>
    9.12 +<meta name="generator" content="Org-mode"/>
    9.13 +<meta name="generated" content="2013-11-07 04:21:29 EST"/>
    9.14 +<meta name="author" content="Robert McIntyre"/>
    9.15 +<meta name="description" content="Using embodied AI to facilitate Artificial Imagination."/>
    9.16 +<meta name="keywords" content="AI, clojure, embodiment"/>
    9.17 +<style type="text/css">
    9.18 + <!--/*--><![CDATA[/*><!--*/
    9.19 +  html { font-family: Times, serif; font-size: 12pt; }
    9.20 +  .title  { text-align: center; }
    9.21 +  .todo   { color: red; }
    9.22 +  .done   { color: green; }
    9.23 +  .tag    { background-color: #add8e6; font-weight:normal }
    9.24 +  .target { }
    9.25 +  .timestamp { color: #bebebe; }
    9.26 +  .timestamp-kwd { color: #5f9ea0; }
    9.27 +  .right  {margin-left:auto; margin-right:0px;  text-align:right;}
    9.28 +  .left   {margin-left:0px;  margin-right:auto; text-align:left;}
    9.29 +  .center {margin-left:auto; margin-right:auto; text-align:center;}
    9.30 +  p.verse { margin-left: 3% }
    9.31 +  pre {
    9.32 +	border: 1pt solid #AEBDCC;
    9.33 +	background-color: #F3F5F7;
    9.34 +	padding: 5pt;
    9.35 +	font-family: courier, monospace;
    9.36 +        font-size: 90%;
    9.37 +        overflow:auto;
    9.38 +  }
    9.39 +  table { border-collapse: collapse; }
    9.40 +  td, th { vertical-align: top;  }
    9.41 +  th.right  { text-align:center;  }
    9.42 +  th.left   { text-align:center;   }
    9.43 +  th.center { text-align:center; }
    9.44 +  td.right  { text-align:right;  }
    9.45 +  td.left   { text-align:left;   }
    9.46 +  td.center { text-align:center; }
    9.47 +  dt { font-weight: bold; }
    9.48 +  div.figure { padding: 0.5em; }
    9.49 +  div.figure p { text-align: center; }
    9.50 +  div.inlinetask {
    9.51 +    padding:10px;
    9.52 +    border:2px solid gray;
    9.53 +    margin:10px;
    9.54 +    background: #ffffcc;
    9.55 +  }
    9.56 +  textarea { overflow-x: auto; }
    9.57 +  .linenr { font-size:smaller }
    9.58 +  .code-highlighted {background-color:#ffff00;}
    9.59 +  .org-info-js_info-navigation { border-style:none; }
    9.60 +  #org-info-js_console-label { font-size:10px; font-weight:bold;
    9.61 +                               white-space:nowrap; }
    9.62 +  .org-info-js_search-highlight {background-color:#ffff00; color:#000000;
    9.63 +                                 font-weight:bold; }
    9.64 +  /*]]>*/-->
    9.65 +</style>
    9.66 +<script type="text/javascript">var _gaq = _gaq || [];_gaq.push(['_setAccount', 'UA-31261312-1']);_gaq.push(['_trackPageview']);(function() {var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);})();</script><link rel="stylesheet" type="text/css" href="../../aurellem/css/argentum.css" />
    9.67 +<script type="text/javascript">
    9.68 +<!--/*--><![CDATA[/*><!--*/
    9.69 + function CodeHighlightOn(elem, id)
    9.70 + {
    9.71 +   var target = document.getElementById(id);
    9.72 +   if(null != target) {
    9.73 +     elem.cacheClassElem = elem.className;
    9.74 +     elem.cacheClassTarget = target.className;
    9.75 +     target.className = "code-highlighted";
    9.76 +     elem.className   = "code-highlighted";
    9.77 +   }
    9.78 + }
    9.79 + function CodeHighlightOff(elem, id)
    9.80 + {
    9.81 +   var target = document.getElementById(id);
    9.82 +   if(elem.cacheClassElem)
    9.83 +     elem.className = elem.cacheClassElem;
    9.84 +   if(elem.cacheClassTarget)
    9.85 +     target.className = elem.cacheClassTarget;
    9.86 + }
    9.87 +/*]]>*///-->
    9.88 +</script>
    9.89 +
    9.90 +</head>
    9.91 +<body>
    9.92 +
    9.93 +
    9.94 +<div id="content">
    9.95 +<h1 class="title"><code>CORTEX</code></h1>
    9.96 +
    9.97 +
    9.98 +<div class="header">
    9.99 +  <div class="float-right">	
   9.100 +    <!-- 
   9.101 +    <form>
   9.102 +      <input type="text"/><input type="submit" value="search the blog &raquo;"/> 
   9.103 +    </form>
   9.104 +    -->
   9.105 +  </div>
   9.106 +
   9.107 +  <h1>aurellem <em>&#x2609;</em></h1>
   9.108 +  <ul class="nav">
   9.109 +    <li><a href="/">read the blog &raquo;</a></li>
   9.110 +    <!-- li><a href="#">learn about us &raquo;</a></li-->
   9.111 +  </ul>
   9.112 +</div>
   9.113 +
   9.114 +<div class="author">Written by <author>Robert McIntyre</author></div>
   9.115 +
   9.116 +
   9.117 +
   9.118 +
   9.119 +
   9.120 +
   9.121 +
   9.122 +<div id="outline-container-1" class="outline-2">
   9.123 +<h2 id="sec-1">Artificial Imagination</h2>
   9.124 +<div class="outline-text-2" id="text-1">
   9.125 +
   9.126 +
   9.127 +<p>
   9.128 +  Imagine watching a video of someone skateboarding. When you watch
   9.129 +  the video, you can imagine yourself skateboarding, and your
   9.130 +  knowledge of the human body and its dynamics guides your
   9.131 +  interpretation of the scene. For example, even if the skateboarder
   9.132 +  is partially occluded, you can infer the positions of his arms and
   9.133 +  body from your own knowledge of how your body would be positioned if
   9.134 +  you were skateboarding. If the skateboarder suffers an accident, you
   9.135 +  wince in sympathy, imagining the pain your own body would experience
   9.136 +  if it were in the same situation. This empathy with other people
   9.137 +  guides our understanding of whatever they are doing because it is a
   9.138 +  powerful constraint on what is probable and possible. In order to
   9.139 +  make use of this powerful empathy constraint, I need a system that
   9.140 +  can generate and make sense of sensory data from the many different
   9.141 +  senses that humans possess. The two key proprieties of such a system
   9.142 +  are <i>embodiment</i> and <i>imagination</i>.
   9.143 +</p>
   9.144 +
   9.145 +</div>
   9.146 +
   9.147 +<div id="outline-container-1-1" class="outline-3">
   9.148 +<h3 id="sec-1-1">What is imagination?</h3>
   9.149 +<div class="outline-text-3" id="text-1-1">
   9.150 +
   9.151 +
   9.152 +<p>
   9.153 +   One kind of imagination is <i>sympathetic</i> imagination: you imagine
   9.154 +   yourself in the position of something/someone you are
   9.155 +   observing. This type of imagination comes into play when you follow
   9.156 +   along visually when watching someone perform actions, or when you
   9.157 +   sympathetically grimace when someone hurts themselves. This type of
   9.158 +   imagination uses the constraints you have learned about your own
   9.159 +   body to highly constrain the possibilities in whatever you are
   9.160 +   seeing. It uses all your senses to including your senses of touch,
   9.161 +   proprioception, etc. Humans are flexible when it comes to "putting
   9.162 +   themselves in another's shoes," and can sympathetically understand
   9.163 +   not only other humans, but entities ranging animals to cartoon
   9.164 +   characters to <a href="http://www.youtube.com/watch?v=0jz4HcwTQmU">single dots</a> on a screen!
   9.165 +</p>
   9.166 +<p>
   9.167 +   Another kind of imagination is <i>predictive</i> imagination: you
   9.168 +   construct scenes in your mind that are not entirely related to
   9.169 +   whatever you are observing, but instead are predictions of the
   9.170 +   future or simply flights of fancy. You use this type of imagination
   9.171 +   to plan out multi-step actions, or play out dangerous situations in
   9.172 +   your mind so as to avoid messing them up in reality.
   9.173 +</p>
   9.174 +<p>
   9.175 +   Of course, sympathetic and predictive imagination blend into each
   9.176 +   other and are not completely separate concepts. One dimension along
   9.177 +   which you can distinguish types of imagination is dependence on raw
   9.178 +   sense data. Sympathetic imagination is highly constrained by your
   9.179 +   senses, while predictive imagination can be more or less dependent
   9.180 +   on your senses depending on how far ahead you imagine. Daydreaming
   9.181 +   is an extreme form of predictive imagination that wanders through
   9.182 +   different possibilities without concern for whether they are
   9.183 +   related to whatever is happening in reality.
   9.184 +</p>
   9.185 +<p>
   9.186 +   For this thesis, I will mostly focus on sympathetic imagination and
   9.187 +   the constraint it provides for understanding sensory data.
   9.188 +</p>
   9.189 +</div>
   9.190 +
   9.191 +</div>
   9.192 +
   9.193 +<div id="outline-container-1-2" class="outline-3">
   9.194 +<h3 id="sec-1-2">What problems can imagination solve?</h3>
   9.195 +<div class="outline-text-3" id="text-1-2">
   9.196 +
   9.197 +
   9.198 +<p>
   9.199 +   Consider a video of a cat drinking some water.
   9.200 +</p>
   9.201 +
   9.202 +<div class="figure">
   9.203 +<p><img src="../images/cat-drinking.jpg"  alt="../images/cat-drinking.jpg" /></p>
   9.204 +<p>A cat drinking some water. Identifying this action is beyond the state of the art for computers.</p>
   9.205 +</div>
   9.206 +
   9.207 +<p>
   9.208 +   It is currently impossible for any computer program to reliably
   9.209 +   label such an video as "drinking". I think humans are able to label
   9.210 +   such video as "drinking" because they imagine <i>themselves</i> as the
   9.211 +   cat, and imagine putting their face up against a stream of water
   9.212 +   and sticking out their tongue. In that imagined world, they can
   9.213 +   feel the cool water hitting their tongue, and feel the water
   9.214 +   entering their body, and are able to recognize that <i>feeling</i> as
   9.215 +   drinking. So, the label of the action is not really in the pixels
   9.216 +   of the image, but is found clearly in a simulation inspired by
   9.217 +   those pixels. An imaginative system, having been trained on
   9.218 +   drinking and non-drinking examples and learning that the most
   9.219 +   important component of drinking is the feeling of water sliding
   9.220 +   down one's throat, would analyze a video of a cat drinking in the
   9.221 +   following manner:
   9.222 +</p>
   9.223 +<ul>
   9.224 +<li>Create a physical model of the video by putting a "fuzzy" model
   9.225 +     of its own body in place of the cat. Also, create a simulation of
   9.226 +     the stream of water.
   9.227 +
   9.228 +</li>
   9.229 +<li>Play out this simulated scene and generate imagined sensory
   9.230 +     experience. This will include relevant muscle contractions, a
   9.231 +     close up view of the stream from the cat's perspective, and most
   9.232 +     importantly, the imagined feeling of water entering the mouth.
   9.233 +
   9.234 +</li>
   9.235 +<li>The action is now easily identified as drinking by the sense of
   9.236 +     taste alone. The other senses (such as the tongue moving in and
   9.237 +     out) help to give plausibility to the simulated action. Note that
   9.238 +     the sense of vision, while critical in creating the simulation,
   9.239 +     is not critical for identifying the action from the simulation.
   9.240 +</li>
   9.241 +</ul>
   9.242 +
   9.243 +
   9.244 +<p>
   9.245 +   More generally, I expect imaginative systems to be particularly
   9.246 +   good at identifying embodied actions in videos.
   9.247 +</p>
   9.248 +</div>
   9.249 +</div>
   9.250 +
   9.251 +</div>
   9.252 +
   9.253 +<div id="outline-container-2" class="outline-2">
   9.254 +<h2 id="sec-2">Cortex</h2>
   9.255 +<div class="outline-text-2" id="text-2">
   9.256 +
   9.257 +
   9.258 +<p>
   9.259 +  The previous example involves liquids, the sense of taste, and
   9.260 +  imagining oneself as a cat. For this thesis I constrain myself to
   9.261 +  simpler, more easily digitizable senses and situations.
   9.262 +</p>
   9.263 +<p>
   9.264 +  My system, <code>Cortex</code> performs imagination in two different simplified
   9.265 +  worlds: <i>worm world</i> and <i>stick figure world</i>. In each of these
   9.266 +  worlds, entities capable of imagination recognize actions by
   9.267 +  simulating the experience from their own perspective, and then
   9.268 +  recognizing the action from a database of examples.
   9.269 +</p>
   9.270 +<p>
   9.271 +  In order to serve as a framework for experiments in imagination,
   9.272 +  <code>Cortex</code> requires simulated bodies, worlds, and senses like vision,
   9.273 +  hearing, touch, proprioception, etc.
   9.274 +</p>
   9.275 +
   9.276 +</div>
   9.277 +
   9.278 +<div id="outline-container-2-1" class="outline-3">
   9.279 +<h3 id="sec-2-1">A Video Game Engine takes care of some of the groundwork</h3>
   9.280 +<div class="outline-text-3" id="text-2-1">
   9.281 +
   9.282 +
   9.283 +<p>
   9.284 +   When it comes to simulation environments, the engines used to
   9.285 +   create the worlds in video games offer top-notch physics and
   9.286 +   graphics support. These engines also have limited support for
   9.287 +   creating cameras and rendering 3D sound, which can be repurposed
   9.288 +   for vision and hearing respectively. Physics collision detection
   9.289 +   can be expanded to create a sense of touch.
   9.290 +</p>
   9.291 +<p>   
   9.292 +   jMonkeyEngine3 is one such engine for creating video games in
   9.293 +   Java. It uses OpenGL to render to the screen and uses screengraphs
   9.294 +   to avoid drawing things that do not appear on the screen. It has an
   9.295 +   active community and several games in the pipeline. The engine was
   9.296 +   not built to serve any particular game but is instead meant to be
   9.297 +   used for any 3D game. I chose jMonkeyEngine3 it because it had the
   9.298 +   most features out of all the open projects I looked at, and because
   9.299 +   I could then write my code in Clojure, an implementation of LISP
   9.300 +   that runs on the JVM.
   9.301 +</p>
   9.302 +</div>
   9.303 +
   9.304 +</div>
   9.305 +
   9.306 +<div id="outline-container-2-2" class="outline-3">
   9.307 +<h3 id="sec-2-2"><code>CORTEX</code> Extends jMonkeyEngine3 to implement rich senses</h3>
   9.308 +<div class="outline-text-3" id="text-2-2">
   9.309 +
   9.310 +
   9.311 +<p>
   9.312 +   Using the game-making primitives provided by jMonkeyEngine3, I have
   9.313 +   constructed every major human sense except for smell and
   9.314 +   taste. <code>Cortex</code> also provides an interface for creating creatures
   9.315 +   in Blender, a 3D modeling environment, and then "rigging" the
   9.316 +   creatures with senses using 3D annotations in Blender. A creature
   9.317 +   can have any number of senses, and there can be any number of
   9.318 +   creatures in a simulation.
   9.319 +</p>
   9.320 +<p>   
   9.321 +   The senses available in <code>Cortex</code> are:
   9.322 +</p>
   9.323 +<ul>
   9.324 +<li><a href="../../cortex/html/vision.html">Vision</a>
   9.325 +</li>
   9.326 +<li><a href="../../cortex/html/hearing.html">Hearing</a>
   9.327 +</li>
   9.328 +<li><a href="../../cortex/html/touch.html">Touch</a>
   9.329 +</li>
   9.330 +<li><a href="../../cortex/html/proprioception.html">Proprioception</a>
   9.331 +</li>
   9.332 +<li><a href="../../cortex/html/movement.html">Muscle Tension</a>
   9.333 +</li>
   9.334 +</ul>
   9.335 +
   9.336 +
   9.337 +</div>
   9.338 +</div>
   9.339 +
   9.340 +</div>
   9.341 +
   9.342 +<div id="outline-container-3" class="outline-2">
   9.343 +<h2 id="sec-3">A roadmap for <code>Cortex</code> experiments</h2>
   9.344 +<div class="outline-text-2" id="text-3">
   9.345 +
   9.346 +
   9.347 +
   9.348 +</div>
   9.349 +
   9.350 +<div id="outline-container-3-1" class="outline-3">
   9.351 +<h3 id="sec-3-1">Worm World</h3>
   9.352 +<div class="outline-text-3" id="text-3-1">
   9.353 +
   9.354 +
   9.355 +<p>
   9.356 +   Worms in <code>Cortex</code> are segmented creatures which vary in length and
   9.357 +   number of segments, and have the senses of vision, proprioception,
   9.358 +   touch, and muscle tension.
   9.359 +</p>
   9.360 +
   9.361 +<div class="figure">
   9.362 +<p><img src="../images/finger-UV.png" width=755 alt="../images/finger-UV.png" /></p>
   9.363 +<p>This is the tactile-sensor-profile for the upper segment of a worm. It defines regions of high touch sensitivity (where there are many white pixels) and regions of low sensitivity (where white pixels are sparse).</p>
   9.364 +</div>
   9.365 +
   9.366 +
   9.367 +
   9.368 +
   9.369 +<div class="figure">
   9.370 +  <center>
   9.371 +    <video controls="controls" width="550">
   9.372 +      <source src="../video/worm-touch.ogg" type="video/ogg"
   9.373 +              preload="none" />
   9.374 +    </video>
   9.375 +    <br> <a href="http://youtu.be/RHx2wqzNVcU"> YouTube </a>
   9.376 +  </center>
   9.377 +  <p>The worm responds to touch.</p>
   9.378 +</div>
   9.379 +
   9.380 +<div class="figure">
   9.381 +  <center>
   9.382 +    <video controls="controls" width="550">
   9.383 +      <source src="../video/test-proprioception.ogg" type="video/ogg"
   9.384 +              preload="none" />
   9.385 +    </video>
   9.386 +    <br> <a href="http://youtu.be/JjdDmyM8b0w"> YouTube </a>
   9.387 +  </center>
   9.388 +  <p>Proprioception in a worm. The proprioceptive readout is
   9.389 +    in the upper left corner of the screen.</p>
   9.390 +</div>
   9.391 +
   9.392 +<p>
   9.393 +   A worm is trained in various actions such as sinusoidal movement,
   9.394 +   curling, flailing, and spinning by directly playing motor
   9.395 +   contractions while the worm "feels" the experience. These actions
   9.396 +   are recorded both as vectors of muscle tension, touch, and
   9.397 +   proprioceptive data, but also in higher level forms such as
   9.398 +   frequencies of the various contractions and a symbolic name for the
   9.399 +   action.
   9.400 +</p>
   9.401 +<p>
   9.402 +   Then, the worm watches a video of another worm performing one of
   9.403 +   the actions, and must judge which action was performed. Normally
   9.404 +   this would be an extremely difficult problem, but the worm is able
   9.405 +   to greatly diminish the search space through sympathetic
   9.406 +   imagination. First, it creates an imagined copy of its body which
   9.407 +   it observes from a third person point of view. Then for each frame
   9.408 +   of the video, it maneuvers its simulated body to be in registration
   9.409 +   with the worm depicted in the video. The physical constraints
   9.410 +   imposed by the physics simulation greatly decrease the number of
   9.411 +   poses that have to be tried, making the search feasible. As the
   9.412 +   imaginary worm moves, it generates imaginary muscle tension and
   9.413 +   proprioceptive sensations. The worm determines the action not by
   9.414 +   vision, but by matching the imagined proprioceptive data with
   9.415 +   previous examples.
   9.416 +</p>
   9.417 +<p>
   9.418 +   By using non-visual sensory data such as touch, the worms can also
   9.419 +   answer body related questions such as "did your head touch your
   9.420 +   tail?" and "did worm A touch worm B?"
   9.421 +</p>
   9.422 +<p>
   9.423 +   The proprioceptive information used for action identification is
   9.424 +   body-centric, so only the registration step is dependent on point
   9.425 +   of view, not the identification step. Registration is not specific
   9.426 +   to any particular action. Thus, action identification can be
   9.427 +   divided into a point-of-view dependent generic registration step,
   9.428 +   and a action-specific step that is body-centered and invariant to
   9.429 +   point of view.
   9.430 +</p>
   9.431 +</div>
   9.432 +
   9.433 +</div>
   9.434 +
   9.435 +<div id="outline-container-3-2" class="outline-3">
   9.436 +<h3 id="sec-3-2">Stick Figure World</h3>
   9.437 +<div class="outline-text-3" id="text-3-2">
   9.438 +
   9.439 +
   9.440 +<p>
   9.441 +   This environment is similar to Worm World, except the creatures are
   9.442 +   more complicated and the actions and questions more varied. It is
   9.443 +   an experiment to see how far imagination can go in interpreting
   9.444 +   actions.  
   9.445 +</p></div>
   9.446 +</div>
   9.447 +</div>
   9.448 +</div>
   9.449 +
   9.450 +<div id="postamble">
   9.451 +<p class="date">Date: 2013-11-07 04:21:29 EST</p>
   9.452 +<p class="author">Author: Robert McIntyre</p>
   9.453 +<p class="creator">Org version 7.7 with Emacs version 24</p>
   9.454 +<a href="http://validator.w3.org/check?uri=referer">Validate XHTML 1.0</a>
   9.455 +
   9.456 +</div>
   9.457 +</body>
   9.458 +</html>
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/thesis/org/first-chapter.org	Sat Mar 22 16:10:34 2014 -0400
    10.3 @@ -0,0 +1,241 @@
    10.4 +#+title: =CORTEX=
    10.5 +#+author: Robert McIntyre
    10.6 +#+email: rlm@mit.edu
    10.7 +#+description: Using embodied AI to facilitate Artificial Imagination.
    10.8 +#+keywords: AI, clojure, embodiment
    10.9 +#+SETUPFILE: ../../aurellem/org/setup.org
   10.10 +#+INCLUDE: ../../aurellem/org/level-0.org
   10.11 +#+babel: :mkdirp yes :noweb yes :exports both
   10.12 +#+OPTIONS: toc:nil, num:nil
   10.13 +
   10.14 +* Artificial Imagination
   10.15 +  Imagine watching a video of someone skateboarding. When you watch
   10.16 +  the video, you can imagine yourself skateboarding, and your
   10.17 +  knowledge of the human body and its dynamics guides your
   10.18 +  interpretation of the scene. For example, even if the skateboarder
   10.19 +  is partially occluded, you can infer the positions of his arms and
   10.20 +  body from your own knowledge of how your body would be positioned if
   10.21 +  you were skateboarding. If the skateboarder suffers an accident, you
   10.22 +  wince in sympathy, imagining the pain your own body would experience
   10.23 +  if it were in the same situation. This empathy with other people
   10.24 +  guides our understanding of whatever they are doing because it is a
   10.25 +  powerful constraint on what is probable and possible. In order to
   10.26 +  make use of this powerful empathy constraint, I need a system that
   10.27 +  can generate and make sense of sensory data from the many different
   10.28 +  senses that humans possess. The two key proprieties of such a system
   10.29 +  are /embodiment/ and /imagination/.
   10.30 +
   10.31 +** What is imagination?
   10.32 +
   10.33 +   One kind of imagination is /sympathetic/ imagination: you imagine
   10.34 +   yourself in the position of something/someone you are
   10.35 +   observing. This type of imagination comes into play when you follow
   10.36 +   along visually when watching someone perform actions, or when you
   10.37 +   sympathetically grimace when someone hurts themselves. This type of
   10.38 +   imagination uses the constraints you have learned about your own
   10.39 +   body to highly constrain the possibilities in whatever you are
   10.40 +   seeing. It uses all your senses to including your senses of touch,
   10.41 +   proprioception, etc. Humans are flexible when it comes to "putting
   10.42 +   themselves in another's shoes," and can sympathetically understand
   10.43 +   not only other humans, but entities ranging from animals to cartoon
   10.44 +   characters to [[http://www.youtube.com/watch?v=0jz4HcwTQmU][single dots]] on a screen!
   10.45 +
   10.46 +# and can infer intention from the actions of not only other humans,
   10.47 +# but also animals, cartoon characters, and even abstract moving dots
   10.48 +# on a screen!
   10.49 +
   10.50 +   Another kind of imagination is /predictive/ imagination: you
   10.51 +   construct scenes in your mind that are not entirely related to
   10.52 +   whatever you are observing, but instead are predictions of the
   10.53 +   future or simply flights of fancy. You use this type of imagination
   10.54 +   to plan out multi-step actions, or play out dangerous situations in
   10.55 +   your mind so as to avoid messing them up in reality.
   10.56 +
   10.57 +   Of course, sympathetic and predictive imagination blend into each
   10.58 +   other and are not completely separate concepts. One dimension along
   10.59 +   which you can distinguish types of imagination is dependence on raw
   10.60 +   sense data. Sympathetic imagination is highly constrained by your
   10.61 +   senses, while predictive imagination can be more or less dependent
   10.62 +   on your senses depending on how far ahead you imagine. Daydreaming
   10.63 +   is an extreme form of predictive imagination that wanders through
   10.64 +   different possibilities without concern for whether they are
   10.65 +   related to whatever is happening in reality.
   10.66 +
   10.67 +   For this thesis, I will mostly focus on sympathetic imagination and
   10.68 +   the constraint it provides for understanding sensory data.
   10.69 +   
   10.70 +** What problems can imagination solve?
   10.71 +
   10.72 +   Consider a video of a cat drinking some water.
   10.73 +
   10.74 +   #+caption: A cat drinking some water. Identifying this action is beyond the state of the art for computers.
   10.75 +   #+ATTR_LaTeX: width=5cm
   10.76 +   [[../images/cat-drinking.jpg]]
   10.77 +
   10.78 +   It is currently impossible for any computer program to reliably
   10.79 +   label such an video as "drinking". I think humans are able to label
   10.80 +   such video as "drinking" because they imagine /themselves/ as the
   10.81 +   cat, and imagine putting their face up against a stream of water
   10.82 +   and sticking out their tongue. In that imagined world, they can
   10.83 +   feel the cool water hitting their tongue, and feel the water
   10.84 +   entering their body, and are able to recognize that /feeling/ as
   10.85 +   drinking. So, the label of the action is not really in the pixels
   10.86 +   of the image, but is found clearly in a simulation inspired by
   10.87 +   those pixels. An imaginative system, having been trained on
   10.88 +   drinking and non-drinking examples and learning that the most
   10.89 +   important component of drinking is the feeling of water sliding
   10.90 +   down one's throat, would analyze a video of a cat drinking in the
   10.91 +   following manner:
   10.92 +   
   10.93 +   - Create a physical model of the video by putting a "fuzzy" model
   10.94 +     of its own body in place of the cat. Also, create a simulation of
   10.95 +     the stream of water.
   10.96 +
   10.97 +   - Play out this simulated scene and generate imagined sensory
   10.98 +     experience. This will include relevant muscle contractions, a
   10.99 +     close up view of the stream from the cat's perspective, and most
  10.100 +     importantly, the imagined feeling of water entering the mouth.
  10.101 +
  10.102 +   - The action is now easily identified as drinking by the sense of
  10.103 +     taste alone. The other senses (such as the tongue moving in and
  10.104 +     out) help to give plausibility to the simulated action. Note that
  10.105 +     the sense of vision, while critical in creating the simulation,
  10.106 +     is not critical for identifying the action from the simulation.
  10.107 +
  10.108 +   More generally, I expect imaginative systems to be particularly
  10.109 +   good at identifying embodied actions in videos.
  10.110 +
  10.111 +* Cortex
  10.112 +
  10.113 +  The previous example involves liquids, the sense of taste, and
  10.114 +  imagining oneself as a cat. For this thesis I constrain myself to
  10.115 +  simpler, more easily digitizable senses and situations.
  10.116 +
  10.117 +  My system, =CORTEX= performs imagination in two different simplified
  10.118 +  worlds: /worm world/ and /stick-figure world/. In each of these
  10.119 +  worlds, entities capable of imagination recognize actions by
  10.120 +  simulating the experience from their own perspective, and then
  10.121 +  recognizing the action from a database of examples.
  10.122 +
  10.123 +  In order to serve as a framework for experiments in imagination,
  10.124 +  =CORTEX= requires simulated bodies, worlds, and senses like vision,
  10.125 +  hearing, touch, proprioception, etc.
  10.126 +
  10.127 +** A Video Game Engine takes care of some of the groundwork
  10.128 +
  10.129 +   When it comes to simulation environments, the engines used to
  10.130 +   create the worlds in video games offer top-notch physics and
  10.131 +   graphics support. These engines also have limited support for
  10.132 +   creating cameras and rendering 3D sound, which can be repurposed
  10.133 +   for vision and hearing respectively. Physics collision detection
  10.134 +   can be expanded to create a sense of touch.
  10.135 +   
  10.136 +   jMonkeyEngine3 is one such engine for creating video games in
  10.137 +   Java. It uses OpenGL to render to the screen and uses screengraphs
  10.138 +   to avoid drawing things that do not appear on the screen. It has an
  10.139 +   active community and several games in the pipeline. The engine was
  10.140 +   not built to serve any particular game but is instead meant to be
  10.141 +   used for any 3D game. I chose jMonkeyEngine3 it because it had the
  10.142 +   most features out of all the open projects I looked at, and because
  10.143 +   I could then write my code in Clojure, an implementation of LISP
  10.144 +   that runs on the JVM.
  10.145 +
  10.146 +** =CORTEX= Extends jMonkeyEngine3 to implement rich senses
  10.147 +
  10.148 +   Using the game-making primitives provided by jMonkeyEngine3, I have
  10.149 +   constructed every major human sense except for smell and
  10.150 +   taste. =CORTEX= also provides an interface for creating creatures
  10.151 +   in Blender, a 3D modeling environment, and then "rigging" the
  10.152 +   creatures with senses using 3D annotations in Blender. A creature
  10.153 +   can have any number of senses, and there can be any number of
  10.154 +   creatures in a simulation.
  10.155 +   
  10.156 +   The senses available in =CORTEX= are:
  10.157 +
  10.158 +   - [[../../cortex/html/vision.html][Vision]]
  10.159 +   - [[../../cortex/html/hearing.html][Hearing]]
  10.160 +   - [[../../cortex/html/touch.html][Touch]]
  10.161 +   - [[../../cortex/html/proprioception.html][Proprioception]]
  10.162 +   - [[../../cortex/html/movement.html][Muscle Tension]]
  10.163 +
  10.164 +* A roadmap for =CORTEX= experiments
  10.165 +
  10.166 +** Worm World
  10.167 +
  10.168 +   Worms in =CORTEX= are segmented creatures which vary in length and
  10.169 +   number of segments, and have the senses of vision, proprioception,
  10.170 +   touch, and muscle tension.
  10.171 +
  10.172 +#+attr_html: width=755
  10.173 +#+caption: This is the tactile-sensor-profile for the upper segment of a worm. It defines regions of high touch sensitivity (where there are many white pixels) and regions of low sensitivity (where white pixels are sparse).
  10.174 +[[../images/finger-UV.png]]
  10.175 +
  10.176 +
  10.177 +#+begin_html
  10.178 +<div class="figure">
  10.179 +  <center>
  10.180 +    <video controls="controls" width="550">
  10.181 +      <source src="../video/worm-touch.ogg" type="video/ogg"
  10.182 +	      preload="none" />
  10.183 +    </video>
  10.184 +    <br> <a href="http://youtu.be/RHx2wqzNVcU"> YouTube </a>
  10.185 +  </center>
  10.186 +  <p>The worm responds to touch.</p>
  10.187 +</div>
  10.188 +#+end_html
  10.189 +
  10.190 +#+begin_html
  10.191 +<div class="figure">
  10.192 +  <center>
  10.193 +    <video controls="controls" width="550">
  10.194 +      <source src="../video/test-proprioception.ogg" type="video/ogg"
  10.195 +	      preload="none" />
  10.196 +    </video>
  10.197 +    <br> <a href="http://youtu.be/JjdDmyM8b0w"> YouTube </a>
  10.198 +  </center>
  10.199 +  <p>Proprioception in a worm. The proprioceptive readout is
  10.200 +    in the upper left corner of the screen.</p>
  10.201 +</div>
  10.202 +#+end_html
  10.203 +
  10.204 +   A worm is trained in various actions such as sinusoidal movement,
  10.205 +   curling, flailing, and spinning by directly playing motor
  10.206 +   contractions while the worm "feels" the experience. These actions
  10.207 +   are recorded both as vectors of muscle tension, touch, and
  10.208 +   proprioceptive data, but also in higher level forms such as
  10.209 +   frequencies of the various contractions and a symbolic name for the
  10.210 +   action.
  10.211 +
  10.212 +   Then, the worm watches a video of another worm performing one of
  10.213 +   the actions, and must judge which action was performed. Normally
  10.214 +   this would be an extremely difficult problem, but the worm is able
  10.215 +   to greatly diminish the search space through sympathetic
  10.216 +   imagination. First, it creates an imagined copy of its body which
  10.217 +   it observes from a third person point of view. Then for each frame
  10.218 +   of the video, it maneuvers its simulated body to be in registration
  10.219 +   with the worm depicted in the video. The physical constraints
  10.220 +   imposed by the physics simulation greatly decrease the number of
  10.221 +   poses that have to be tried, making the search feasible. As the
  10.222 +   imaginary worm moves, it generates imaginary muscle tension and
  10.223 +   proprioceptive sensations. The worm determines the action not by
  10.224 +   vision, but by matching the imagined proprioceptive data with
  10.225 +   previous examples.
  10.226 +
  10.227 +   By using non-visual sensory data such as touch, the worms can also
  10.228 +   answer body related questions such as "did your head touch your
  10.229 +   tail?" and "did worm A touch worm B?"
  10.230 +
  10.231 +   The proprioceptive information used for action identification is
  10.232 +   body-centric, so only the registration step is dependent on point
  10.233 +   of view, not the identification step. Registration is not specific
  10.234 +   to any particular action. Thus, action identification can be
  10.235 +   divided into a point-of-view dependent generic registration step,
  10.236 +   and a action-specific step that is body-centered and invariant to
  10.237 +   point of view.
  10.238 +
  10.239 +** Stick Figure World
  10.240 +
  10.241 +   This environment is similar to Worm World, except the creatures are
  10.242 +   more complicated and the actions and questions more varied. It is
  10.243 +   an experiment to see how far imagination can go in interpreting
  10.244 +   actions.  
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/thesis/org/roadmap.org	Sat Mar 22 16:10:34 2014 -0400
    11.3 @@ -0,0 +1,220 @@
    11.4 +In order for this to be a reasonable thesis that I can be proud of,
    11.5 +what are the /minimum/ number of things I need to get done?
    11.6 +
    11.7 +
    11.8 +* worm OR hand registration
    11.9 +  - training from a few examples (2 to start out)
   11.10 +  - aligning the body with the scene
   11.11 +  - generating sensory data
   11.12 +  - matching previous labeled examples using dot-products or some
   11.13 +    other basic thing
   11.14 +  - showing that it works with different views
   11.15 +
   11.16 +* first draft
   11.17 +  - draft of thesis without bibliography or formatting
   11.18 +  - should have basic experiment and have full description of
   11.19 +    framework with code
   11.20 +  - review with Winston
   11.21 +  
   11.22 +* final draft
   11.23 +  - implement stretch goals from Winston if possible
   11.24 +  - complete final formatting and submit
   11.25 +
   11.26 +* CORTEX
   11.27 +  DEADLINE: <2014-05-09 Fri>
   11.28 +  SHIT THAT'S IN 67 DAYS!!!
   11.29 +
   11.30 +** program simple feature matching code for the worm's segments
   11.31 +
   11.32 +Subgoals:
   11.33 +*** DONE Get cortex working again, run tests, no jmonkeyengine updates
   11.34 +    CLOSED: [2014-03-03 Mon 22:07] SCHEDULED: <2014-03-03 Mon>
   11.35 +*** DONE get blender working again
   11.36 +    CLOSED: [2014-03-03 Mon 22:43] SCHEDULED: <2014-03-03 Mon>
   11.37 +*** DONE make sparce touch worm segment in blender
   11.38 +    CLOSED: [2014-03-03 Mon 23:16] SCHEDULED: <2014-03-03 Mon>
   11.39 +    CLOCK: [2014-03-03 Mon 22:44]--[2014-03-03 Mon 23:16] =>  0:32
   11.40 +*** DONE make multi-segment touch worm with touch sensors and display
   11.41 +    CLOSED: [2014-03-03 Mon 23:54] SCHEDULED: <2014-03-03 Mon>
   11.42 +
   11.43 +*** DONE Make a worm wiggle and curl
   11.44 +    CLOSED: [2014-03-04 Tue 23:03] SCHEDULED: <2014-03-04 Tue>
   11.45 +
   11.46 +
   11.47 +** First draft
   11.48 +
   11.49 +Subgoals:
   11.50 +*** Writeup new worm experiments.
   11.51 +*** Triage implementation code and get it into chapter form.
   11.52 +
   11.53 +
   11.54 +
   11.55 + 
   11.56 +
   11.57 +** for today
   11.58 +
   11.59 +- guided worm :: control the worm with the keyboard. Useful for
   11.60 +                 testing the body-centered recog scripts, and for
   11.61 +                 preparing a cool demo video.
   11.62 +
   11.63 +- body-centered recognition :: detect actions using hard coded
   11.64 +     body-centered scripts. 
   11.65 +
   11.66 +- cool demo video of the worm being moved and recognizing things ::
   11.67 +     will be a neat part of the thesis.
   11.68 +
   11.69 +- thesis export :: refactoring and organization of code so that it
   11.70 +                   spits out a thesis in addition to the web page.
   11.71 +
   11.72 +- video alignment :: analyze the frames of a video in order to align
   11.73 +     the worm. Requires body-centered recognition. Can "cheat".
   11.74 +
   11.75 +- smoother actions :: use debugging controls to directly influence the
   11.76 +     demo actions, and to generate recoginition procedures.
   11.77 +
   11.78 +- degenerate video demonstration :: show the system recognizing a
   11.79 +     curled worm from dead on. Crowning achievement of thesis.
   11.80 +
   11.81 +** Ordered from easiest to hardest
   11.82 +
   11.83 +Just report the positions of everything. I don't think that this
   11.84 +necessairly shows anything usefull.
   11.85 +
   11.86 +Worm-segment vision -- you initialize a view of the worm, but instead
   11.87 +of pixels you use labels via ray tracing. Has the advantage of still
   11.88 +allowing for visual occlusion, but reliably identifies the objects,
   11.89 +even without rainbow coloring. You can code this as an image. 
   11.90 +
   11.91 +Same as above, except just with worm/non-worm labels.
   11.92 +
   11.93 +Color code each worm segment and then recognize them using blob
   11.94 +detectors. Then you solve for the perspective and the action
   11.95 +simultaneously.
   11.96 +
   11.97 +The entire worm can be colored the same, high contrast color against a
   11.98 +nearly black background.
   11.99 +
  11.100 +"Rooted" vision. You give the exact coordinates of ONE piece of the
  11.101 +worm, but the algorithm figures out the rest.
  11.102 +
  11.103 +More rooted vision -- start off the entire worm with one posistion.
  11.104 +
  11.105 +The right way to do alignment is to use motion over multiple frames to
  11.106 +snap individual pieces of the model into place sharing and
  11.107 +propragating the individual alignments over the whole model. We also
  11.108 +want to limit the alignment search to just those actions we are
  11.109 +prepared to identify. This might mean that I need some small "micro
  11.110 +actions" such as the individual movements of the worm pieces.
  11.111 +
  11.112 +Get just the centers of each segment projected onto the imaging
  11.113 +plane. (best so far).
  11.114 +
  11.115 +
  11.116 +Repertoire of actions  +  video frames -->
  11.117 +   directed multi-frame-search alg
  11.118 +
  11.119 +
  11.120 +
  11.121 +
  11.122 +
  11.123 +
  11.124 +!! Could also have a bounding box around the worm provided by
  11.125 +filtering the worm/non-worm render, and use bbbgs. As a bonus, I get
  11.126 +to include bbbgs in my thesis! Could finally do that recursive things
  11.127 +where I make bounding boxes be those things that give results that
  11.128 +give good bounding boxes. If I did this I could use a disruptive
  11.129 +pattern on the worm.
  11.130 +
  11.131 +Re imagining using default textures is very simple for this system,
  11.132 +but hard for others.
  11.133 +
  11.134 +
  11.135 +Want to demonstrate, at minimum, alignment of some model of the worm
  11.136 +to the video, and a lookup of the action by simulated perception.
  11.137 +
  11.138 +note: the purple/white points is a very beautiful texture, because
  11.139 +when it moves slightly, the white dots look like they're
  11.140 +twinkling. Would look even better if it was a darker purple. Also
  11.141 +would look better more spread out.
  11.142 +
  11.143 +
  11.144 +embed assumption of one frame of view, search by moving around in
  11.145 +simulated world.
  11.146 +
  11.147 +Allowed to limit search by setting limits to a hemisphere around the
  11.148 +imagined worm! This limits scale also.
  11.149 +
  11.150 +
  11.151 +
  11.152 +
  11.153 +
  11.154 +!! Limited search with worm/non-worm rendering. 
  11.155 +How much inverse kinematics do we have to do?
  11.156 +What about cached (allowed state-space) paths, derived from labeled
  11.157 +training. You have to lead from one to another.
  11.158 +
  11.159 +What about initial state? Could start the input videos at a specific
  11.160 +state, then just match that explicitly.
  11.161 +
  11.162 +!! The training doesn't have to be labeled -- you can just move around
  11.163 +for a while!!
  11.164 +
  11.165 +!! Limited search with motion based alignment.
  11.166 +
  11.167 +
  11.168 +
  11.169 +
  11.170 +"play arounds" can establish a chain of linked sensoriums. Future
  11.171 +matches must fall into one of the already experienced things, and once
  11.172 +they do, it greatly limits the things that are possible in the future.
  11.173 +
  11.174 +
  11.175 +frame differences help to detect muscle exertion.
  11.176 +
  11.177 +Can try to match on a few "representative" frames. Can also just have
  11.178 +a few "bodies" in various states which we try to match.
  11.179 +
  11.180 +
  11.181 +
  11.182 +Paths through state-space have the exact same signature as
  11.183 +simulation. BUT, these can be searched in parallel and don't interfere
  11.184 +with each other.
  11.185 +
  11.186 +
  11.187 +
  11.188 +
  11.189 +** Final stretch up to First Draft
  11.190 +
  11.191 +*** DONE complete debug control of worm
  11.192 +    CLOSED: [2014-03-17 Mon 17:29] SCHEDULED: <2014-03-17 Mon>
  11.193 +    CLOCK: [2014-03-17 Mon 14:01]--[2014-03-17 Mon 17:29] =>  3:28
  11.194 +*** DONE add phi-space output to debug control
  11.195 +    CLOSED: [2014-03-17 Mon 17:42] SCHEDULED: <2014-03-17 Mon>
  11.196 +    CLOCK: [2014-03-17 Mon 17:31]--[2014-03-17 Mon 17:42] =>  0:11
  11.197 +
  11.198 +*** DONE complete automatic touch partitioning
  11.199 +    CLOSED: [2014-03-18 Tue 21:43] SCHEDULED: <2014-03-18 Tue>
  11.200 +*** DONE complete cyclic predicate
  11.201 +    CLOSED: [2014-03-19 Wed 16:34] SCHEDULED: <2014-03-18 Tue>
  11.202 +    CLOCK: [2014-03-19 Wed 13:16]--[2014-03-19 Wed 16:34] =>  3:18
  11.203 +*** DONE complete three phi-stream action predicatates; test them with debug control
  11.204 +    CLOSED: [2014-03-19 Wed 16:35] SCHEDULED: <2014-03-17 Mon>
  11.205 +    CLOCK: [2014-03-18 Tue 18:36]--[2014-03-18 Tue 21:43] =>  3:07
  11.206 +    CLOCK: [2014-03-18 Tue 18:34]--[2014-03-18 Tue 18:36] =>  0:02
  11.207 +    CLOCK: [2014-03-17 Mon 19:19]--[2014-03-17 Mon 21:19] =>  2:00
  11.208 +*** DONE build an automatic "do all the things" sequence.
  11.209 +    CLOSED: [2014-03-19 Wed 16:55] SCHEDULED: <2014-03-19 Wed>
  11.210 +    CLOCK: [2014-03-19 Wed 16:53]--[2014-03-19 Wed 16:55] =>  0:02
  11.211 +*** DONE implement proprioception based movement lookup in phi-space
  11.212 +    CLOSED: [2014-03-19 Wed 22:04] SCHEDULED: <2014-03-19 Wed>
  11.213 +    CLOCK: [2014-03-19 Wed 19:32]--[2014-03-19 Wed 22:04] =>  2:32
  11.214 +*** DONE make proprioception reference phi-space indexes
  11.215 +    CLOSED: [2014-03-19 Wed 22:47] SCHEDULED: <2014-03-19 Wed>
  11.216 +    CLOCK: [2014-03-19 Wed 22:07]
  11.217 +
  11.218 +
  11.219 +*** DONE create test videos, also record positions of worm segments
  11.220 +    CLOSED: [2014-03-20 Thu 22:02] SCHEDULED: <2014-03-19 Wed>
  11.221 +
  11.222 +*** TODO Collect intro, worm-learn and cortex creation into draft thesis. 
  11.223 +