rlm@401
|
1 <?xml version="1.0" encoding="utf-8"?>
|
rlm@401
|
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
rlm@401
|
3 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
rlm@401
|
4 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
|
rlm@401
|
5 <head>
|
rlm@401
|
6 <title><code>CORTEX</code></title>
|
rlm@401
|
7 <meta http-equiv="Content-Type" content="text/html;charset=utf-8"/>
|
rlm@401
|
8 <meta name="title" content="<code>CORTEX</code>"/>
|
rlm@401
|
9 <meta name="generator" content="Org-mode"/>
|
rlm@401
|
10 <meta name="generated" content="2013-11-07 04:21:29 EST"/>
|
rlm@401
|
11 <meta name="author" content="Robert McIntyre"/>
|
rlm@401
|
12 <meta name="description" content="Using embodied AI to facilitate Artificial Imagination."/>
|
rlm@401
|
13 <meta name="keywords" content="AI, clojure, embodiment"/>
|
rlm@401
|
14 <style type="text/css">
|
rlm@401
|
15 <!--/*--><![CDATA[/*><!--*/
|
rlm@401
|
16 html { font-family: Times, serif; font-size: 12pt; }
|
rlm@401
|
17 .title { text-align: center; }
|
rlm@401
|
18 .todo { color: red; }
|
rlm@401
|
19 .done { color: green; }
|
rlm@401
|
20 .tag { background-color: #add8e6; font-weight:normal }
|
rlm@401
|
21 .target { }
|
rlm@401
|
22 .timestamp { color: #bebebe; }
|
rlm@401
|
23 .timestamp-kwd { color: #5f9ea0; }
|
rlm@401
|
24 .right {margin-left:auto; margin-right:0px; text-align:right;}
|
rlm@401
|
25 .left {margin-left:0px; margin-right:auto; text-align:left;}
|
rlm@401
|
26 .center {margin-left:auto; margin-right:auto; text-align:center;}
|
rlm@401
|
27 p.verse { margin-left: 3% }
|
rlm@401
|
28 pre {
|
rlm@401
|
29 border: 1pt solid #AEBDCC;
|
rlm@401
|
30 background-color: #F3F5F7;
|
rlm@401
|
31 padding: 5pt;
|
rlm@401
|
32 font-family: courier, monospace;
|
rlm@401
|
33 font-size: 90%;
|
rlm@401
|
34 overflow:auto;
|
rlm@401
|
35 }
|
rlm@401
|
36 table { border-collapse: collapse; }
|
rlm@401
|
37 td, th { vertical-align: top; }
|
rlm@401
|
38 th.right { text-align:center; }
|
rlm@401
|
39 th.left { text-align:center; }
|
rlm@401
|
40 th.center { text-align:center; }
|
rlm@401
|
41 td.right { text-align:right; }
|
rlm@401
|
42 td.left { text-align:left; }
|
rlm@401
|
43 td.center { text-align:center; }
|
rlm@401
|
44 dt { font-weight: bold; }
|
rlm@401
|
45 div.figure { padding: 0.5em; }
|
rlm@401
|
46 div.figure p { text-align: center; }
|
rlm@401
|
47 div.inlinetask {
|
rlm@401
|
48 padding:10px;
|
rlm@401
|
49 border:2px solid gray;
|
rlm@401
|
50 margin:10px;
|
rlm@401
|
51 background: #ffffcc;
|
rlm@401
|
52 }
|
rlm@401
|
53 textarea { overflow-x: auto; }
|
rlm@401
|
54 .linenr { font-size:smaller }
|
rlm@401
|
55 .code-highlighted {background-color:#ffff00;}
|
rlm@401
|
56 .org-info-js_info-navigation { border-style:none; }
|
rlm@401
|
57 #org-info-js_console-label { font-size:10px; font-weight:bold;
|
rlm@401
|
58 white-space:nowrap; }
|
rlm@401
|
59 .org-info-js_search-highlight {background-color:#ffff00; color:#000000;
|
rlm@401
|
60 font-weight:bold; }
|
rlm@401
|
61 /*]]>*/-->
|
rlm@401
|
62 </style>
|
rlm@401
|
63 <script type="text/javascript">var _gaq = _gaq || [];_gaq.push(['_setAccount', 'UA-31261312-1']);_gaq.push(['_trackPageview']);(function() {var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);})();</script><link rel="stylesheet" type="text/css" href="../../aurellem/css/argentum.css" />
|
rlm@401
|
64 <script type="text/javascript">
|
rlm@401
|
65 <!--/*--><![CDATA[/*><!--*/
|
rlm@401
|
66 function CodeHighlightOn(elem, id)
|
rlm@401
|
67 {
|
rlm@401
|
68 var target = document.getElementById(id);
|
rlm@401
|
69 if(null != target) {
|
rlm@401
|
70 elem.cacheClassElem = elem.className;
|
rlm@401
|
71 elem.cacheClassTarget = target.className;
|
rlm@401
|
72 target.className = "code-highlighted";
|
rlm@401
|
73 elem.className = "code-highlighted";
|
rlm@401
|
74 }
|
rlm@401
|
75 }
|
rlm@401
|
76 function CodeHighlightOff(elem, id)
|
rlm@401
|
77 {
|
rlm@401
|
78 var target = document.getElementById(id);
|
rlm@401
|
79 if(elem.cacheClassElem)
|
rlm@401
|
80 elem.className = elem.cacheClassElem;
|
rlm@401
|
81 if(elem.cacheClassTarget)
|
rlm@401
|
82 target.className = elem.cacheClassTarget;
|
rlm@401
|
83 }
|
rlm@401
|
84 /*]]>*///-->
|
rlm@401
|
85 </script>
|
rlm@401
|
86
|
rlm@401
|
87 </head>
|
rlm@401
|
88 <body>
|
rlm@401
|
89
|
rlm@401
|
90
|
rlm@401
|
91 <div id="content">
|
rlm@401
|
92 <h1 class="title"><code>CORTEX</code></h1>
|
rlm@401
|
93
|
rlm@401
|
94
|
rlm@401
|
95 <div class="header">
|
rlm@401
|
96 <div class="float-right">
|
rlm@401
|
97 <!--
|
rlm@401
|
98 <form>
|
rlm@401
|
99 <input type="text"/><input type="submit" value="search the blog »"/>
|
rlm@401
|
100 </form>
|
rlm@401
|
101 -->
|
rlm@401
|
102 </div>
|
rlm@401
|
103
|
rlm@401
|
104 <h1>aurellem <em>☉</em></h1>
|
rlm@401
|
105 <ul class="nav">
|
rlm@401
|
106 <li><a href="/">read the blog »</a></li>
|
rlm@401
|
107 <!-- li><a href="#">learn about us »</a></li-->
|
rlm@401
|
108 </ul>
|
rlm@401
|
109 </div>
|
rlm@401
|
110
|
rlm@401
|
111 <div class="author">Written by <author>Robert McIntyre</author></div>
|
rlm@401
|
112
|
rlm@401
|
113
|
rlm@401
|
114
|
rlm@401
|
115
|
rlm@401
|
116
|
rlm@401
|
117
|
rlm@401
|
118
|
rlm@401
|
119 <div id="outline-container-1" class="outline-2">
|
rlm@401
|
120 <h2 id="sec-1">Artificial Imagination</h2>
|
rlm@401
|
121 <div class="outline-text-2" id="text-1">
|
rlm@401
|
122
|
rlm@401
|
123
|
rlm@401
|
124 <p>
|
rlm@401
|
125 Imagine watching a video of someone skateboarding. When you watch
|
rlm@401
|
126 the video, you can imagine yourself skateboarding, and your
|
rlm@401
|
127 knowledge of the human body and its dynamics guides your
|
rlm@401
|
128 interpretation of the scene. For example, even if the skateboarder
|
rlm@401
|
129 is partially occluded, you can infer the positions of his arms and
|
rlm@401
|
130 body from your own knowledge of how your body would be positioned if
|
rlm@401
|
131 you were skateboarding. If the skateboarder suffers an accident, you
|
rlm@401
|
132 wince in sympathy, imagining the pain your own body would experience
|
rlm@401
|
133 if it were in the same situation. This empathy with other people
|
rlm@401
|
134 guides our understanding of whatever they are doing because it is a
|
rlm@401
|
135 powerful constraint on what is probable and possible. In order to
|
rlm@401
|
136 make use of this powerful empathy constraint, I need a system that
|
rlm@401
|
137 can generate and make sense of sensory data from the many different
|
rlm@401
|
138 senses that humans possess. The two key proprieties of such a system
|
rlm@401
|
139 are <i>embodiment</i> and <i>imagination</i>.
|
rlm@401
|
140 </p>
|
rlm@401
|
141
|
rlm@401
|
142 </div>
|
rlm@401
|
143
|
rlm@401
|
144 <div id="outline-container-1-1" class="outline-3">
|
rlm@401
|
145 <h3 id="sec-1-1">What is imagination?</h3>
|
rlm@401
|
146 <div class="outline-text-3" id="text-1-1">
|
rlm@401
|
147
|
rlm@401
|
148
|
rlm@401
|
149 <p>
|
rlm@401
|
150 One kind of imagination is <i>sympathetic</i> imagination: you imagine
|
rlm@401
|
151 yourself in the position of something/someone you are
|
rlm@401
|
152 observing. This type of imagination comes into play when you follow
|
rlm@401
|
153 along visually when watching someone perform actions, or when you
|
rlm@401
|
154 sympathetically grimace when someone hurts themselves. This type of
|
rlm@401
|
155 imagination uses the constraints you have learned about your own
|
rlm@401
|
156 body to highly constrain the possibilities in whatever you are
|
rlm@401
|
157 seeing. It uses all your senses to including your senses of touch,
|
rlm@401
|
158 proprioception, etc. Humans are flexible when it comes to "putting
|
rlm@401
|
159 themselves in another's shoes," and can sympathetically understand
|
rlm@401
|
160 not only other humans, but entities ranging animals to cartoon
|
rlm@401
|
161 characters to <a href="http://www.youtube.com/watch?v=0jz4HcwTQmU">single dots</a> on a screen!
|
rlm@401
|
162 </p>
|
rlm@401
|
163 <p>
|
rlm@401
|
164 Another kind of imagination is <i>predictive</i> imagination: you
|
rlm@401
|
165 construct scenes in your mind that are not entirely related to
|
rlm@401
|
166 whatever you are observing, but instead are predictions of the
|
rlm@401
|
167 future or simply flights of fancy. You use this type of imagination
|
rlm@401
|
168 to plan out multi-step actions, or play out dangerous situations in
|
rlm@401
|
169 your mind so as to avoid messing them up in reality.
|
rlm@401
|
170 </p>
|
rlm@401
|
171 <p>
|
rlm@401
|
172 Of course, sympathetic and predictive imagination blend into each
|
rlm@401
|
173 other and are not completely separate concepts. One dimension along
|
rlm@401
|
174 which you can distinguish types of imagination is dependence on raw
|
rlm@401
|
175 sense data. Sympathetic imagination is highly constrained by your
|
rlm@401
|
176 senses, while predictive imagination can be more or less dependent
|
rlm@401
|
177 on your senses depending on how far ahead you imagine. Daydreaming
|
rlm@401
|
178 is an extreme form of predictive imagination that wanders through
|
rlm@401
|
179 different possibilities without concern for whether they are
|
rlm@401
|
180 related to whatever is happening in reality.
|
rlm@401
|
181 </p>
|
rlm@401
|
182 <p>
|
rlm@401
|
183 For this thesis, I will mostly focus on sympathetic imagination and
|
rlm@401
|
184 the constraint it provides for understanding sensory data.
|
rlm@401
|
185 </p>
|
rlm@401
|
186 </div>
|
rlm@401
|
187
|
rlm@401
|
188 </div>
|
rlm@401
|
189
|
rlm@401
|
190 <div id="outline-container-1-2" class="outline-3">
|
rlm@401
|
191 <h3 id="sec-1-2">What problems can imagination solve?</h3>
|
rlm@401
|
192 <div class="outline-text-3" id="text-1-2">
|
rlm@401
|
193
|
rlm@401
|
194
|
rlm@401
|
195 <p>
|
rlm@401
|
196 Consider a video of a cat drinking some water.
|
rlm@401
|
197 </p>
|
rlm@401
|
198
|
rlm@401
|
199 <div class="figure">
|
rlm@401
|
200 <p><img src="../images/cat-drinking.jpg" alt="../images/cat-drinking.jpg" /></p>
|
rlm@401
|
201 <p>A cat drinking some water. Identifying this action is beyond the state of the art for computers.</p>
|
rlm@401
|
202 </div>
|
rlm@401
|
203
|
rlm@401
|
204 <p>
|
rlm@401
|
205 It is currently impossible for any computer program to reliably
|
rlm@401
|
206 label such an video as "drinking". I think humans are able to label
|
rlm@401
|
207 such video as "drinking" because they imagine <i>themselves</i> as the
|
rlm@401
|
208 cat, and imagine putting their face up against a stream of water
|
rlm@401
|
209 and sticking out their tongue. In that imagined world, they can
|
rlm@401
|
210 feel the cool water hitting their tongue, and feel the water
|
rlm@401
|
211 entering their body, and are able to recognize that <i>feeling</i> as
|
rlm@401
|
212 drinking. So, the label of the action is not really in the pixels
|
rlm@401
|
213 of the image, but is found clearly in a simulation inspired by
|
rlm@401
|
214 those pixels. An imaginative system, having been trained on
|
rlm@401
|
215 drinking and non-drinking examples and learning that the most
|
rlm@401
|
216 important component of drinking is the feeling of water sliding
|
rlm@401
|
217 down one's throat, would analyze a video of a cat drinking in the
|
rlm@401
|
218 following manner:
|
rlm@401
|
219 </p>
|
rlm@401
|
220 <ul>
|
rlm@401
|
221 <li>Create a physical model of the video by putting a "fuzzy" model
|
rlm@401
|
222 of its own body in place of the cat. Also, create a simulation of
|
rlm@401
|
223 the stream of water.
|
rlm@401
|
224
|
rlm@401
|
225 </li>
|
rlm@401
|
226 <li>Play out this simulated scene and generate imagined sensory
|
rlm@401
|
227 experience. This will include relevant muscle contractions, a
|
rlm@401
|
228 close up view of the stream from the cat's perspective, and most
|
rlm@401
|
229 importantly, the imagined feeling of water entering the mouth.
|
rlm@401
|
230
|
rlm@401
|
231 </li>
|
rlm@401
|
232 <li>The action is now easily identified as drinking by the sense of
|
rlm@401
|
233 taste alone. The other senses (such as the tongue moving in and
|
rlm@401
|
234 out) help to give plausibility to the simulated action. Note that
|
rlm@401
|
235 the sense of vision, while critical in creating the simulation,
|
rlm@401
|
236 is not critical for identifying the action from the simulation.
|
rlm@401
|
237 </li>
|
rlm@401
|
238 </ul>
|
rlm@401
|
239
|
rlm@401
|
240
|
rlm@401
|
241 <p>
|
rlm@401
|
242 More generally, I expect imaginative systems to be particularly
|
rlm@401
|
243 good at identifying embodied actions in videos.
|
rlm@401
|
244 </p>
|
rlm@401
|
245 </div>
|
rlm@401
|
246 </div>
|
rlm@401
|
247
|
rlm@401
|
248 </div>
|
rlm@401
|
249
|
rlm@401
|
250 <div id="outline-container-2" class="outline-2">
|
rlm@401
|
251 <h2 id="sec-2">Cortex</h2>
|
rlm@401
|
252 <div class="outline-text-2" id="text-2">
|
rlm@401
|
253
|
rlm@401
|
254
|
rlm@401
|
255 <p>
|
rlm@401
|
256 The previous example involves liquids, the sense of taste, and
|
rlm@401
|
257 imagining oneself as a cat. For this thesis I constrain myself to
|
rlm@401
|
258 simpler, more easily digitizable senses and situations.
|
rlm@401
|
259 </p>
|
rlm@401
|
260 <p>
|
rlm@401
|
261 My system, <code>Cortex</code> performs imagination in two different simplified
|
rlm@401
|
262 worlds: <i>worm world</i> and <i>stick figure world</i>. In each of these
|
rlm@401
|
263 worlds, entities capable of imagination recognize actions by
|
rlm@401
|
264 simulating the experience from their own perspective, and then
|
rlm@401
|
265 recognizing the action from a database of examples.
|
rlm@401
|
266 </p>
|
rlm@401
|
267 <p>
|
rlm@401
|
268 In order to serve as a framework for experiments in imagination,
|
rlm@401
|
269 <code>Cortex</code> requires simulated bodies, worlds, and senses like vision,
|
rlm@401
|
270 hearing, touch, proprioception, etc.
|
rlm@401
|
271 </p>
|
rlm@401
|
272
|
rlm@401
|
273 </div>
|
rlm@401
|
274
|
rlm@401
|
275 <div id="outline-container-2-1" class="outline-3">
|
rlm@401
|
276 <h3 id="sec-2-1">A Video Game Engine takes care of some of the groundwork</h3>
|
rlm@401
|
277 <div class="outline-text-3" id="text-2-1">
|
rlm@401
|
278
|
rlm@401
|
279
|
rlm@401
|
280 <p>
|
rlm@401
|
281 When it comes to simulation environments, the engines used to
|
rlm@401
|
282 create the worlds in video games offer top-notch physics and
|
rlm@401
|
283 graphics support. These engines also have limited support for
|
rlm@401
|
284 creating cameras and rendering 3D sound, which can be repurposed
|
rlm@401
|
285 for vision and hearing respectively. Physics collision detection
|
rlm@401
|
286 can be expanded to create a sense of touch.
|
rlm@401
|
287 </p>
|
rlm@401
|
288 <p>
|
rlm@401
|
289 jMonkeyEngine3 is one such engine for creating video games in
|
rlm@401
|
290 Java. It uses OpenGL to render to the screen and uses screengraphs
|
rlm@401
|
291 to avoid drawing things that do not appear on the screen. It has an
|
rlm@401
|
292 active community and several games in the pipeline. The engine was
|
rlm@401
|
293 not built to serve any particular game but is instead meant to be
|
rlm@401
|
294 used for any 3D game. I chose jMonkeyEngine3 it because it had the
|
rlm@401
|
295 most features out of all the open projects I looked at, and because
|
rlm@401
|
296 I could then write my code in Clojure, an implementation of LISP
|
rlm@401
|
297 that runs on the JVM.
|
rlm@401
|
298 </p>
|
rlm@401
|
299 </div>
|
rlm@401
|
300
|
rlm@401
|
301 </div>
|
rlm@401
|
302
|
rlm@401
|
303 <div id="outline-container-2-2" class="outline-3">
|
rlm@401
|
304 <h3 id="sec-2-2"><code>CORTEX</code> Extends jMonkeyEngine3 to implement rich senses</h3>
|
rlm@401
|
305 <div class="outline-text-3" id="text-2-2">
|
rlm@401
|
306
|
rlm@401
|
307
|
rlm@401
|
308 <p>
|
rlm@401
|
309 Using the game-making primitives provided by jMonkeyEngine3, I have
|
rlm@401
|
310 constructed every major human sense except for smell and
|
rlm@401
|
311 taste. <code>Cortex</code> also provides an interface for creating creatures
|
rlm@401
|
312 in Blender, a 3D modeling environment, and then "rigging" the
|
rlm@401
|
313 creatures with senses using 3D annotations in Blender. A creature
|
rlm@401
|
314 can have any number of senses, and there can be any number of
|
rlm@401
|
315 creatures in a simulation.
|
rlm@401
|
316 </p>
|
rlm@401
|
317 <p>
|
rlm@401
|
318 The senses available in <code>Cortex</code> are:
|
rlm@401
|
319 </p>
|
rlm@401
|
320 <ul>
|
rlm@401
|
321 <li><a href="../../cortex/html/vision.html">Vision</a>
|
rlm@401
|
322 </li>
|
rlm@401
|
323 <li><a href="../../cortex/html/hearing.html">Hearing</a>
|
rlm@401
|
324 </li>
|
rlm@401
|
325 <li><a href="../../cortex/html/touch.html">Touch</a>
|
rlm@401
|
326 </li>
|
rlm@401
|
327 <li><a href="../../cortex/html/proprioception.html">Proprioception</a>
|
rlm@401
|
328 </li>
|
rlm@401
|
329 <li><a href="../../cortex/html/movement.html">Muscle Tension</a>
|
rlm@401
|
330 </li>
|
rlm@401
|
331 </ul>
|
rlm@401
|
332
|
rlm@401
|
333
|
rlm@401
|
334 </div>
|
rlm@401
|
335 </div>
|
rlm@401
|
336
|
rlm@401
|
337 </div>
|
rlm@401
|
338
|
rlm@401
|
339 <div id="outline-container-3" class="outline-2">
|
rlm@401
|
340 <h2 id="sec-3">A roadmap for <code>Cortex</code> experiments</h2>
|
rlm@401
|
341 <div class="outline-text-2" id="text-3">
|
rlm@401
|
342
|
rlm@401
|
343
|
rlm@401
|
344
|
rlm@401
|
345 </div>
|
rlm@401
|
346
|
rlm@401
|
347 <div id="outline-container-3-1" class="outline-3">
|
rlm@401
|
348 <h3 id="sec-3-1">Worm World</h3>
|
rlm@401
|
349 <div class="outline-text-3" id="text-3-1">
|
rlm@401
|
350
|
rlm@401
|
351
|
rlm@401
|
352 <p>
|
rlm@401
|
353 Worms in <code>Cortex</code> are segmented creatures which vary in length and
|
rlm@401
|
354 number of segments, and have the senses of vision, proprioception,
|
rlm@401
|
355 touch, and muscle tension.
|
rlm@401
|
356 </p>
|
rlm@401
|
357
|
rlm@401
|
358 <div class="figure">
|
rlm@401
|
359 <p><img src="../images/finger-UV.png" width=755 alt="../images/finger-UV.png" /></p>
|
rlm@401
|
360 <p>This is the tactile-sensor-profile for the upper segment of a worm. It defines regions of high touch sensitivity (where there are many white pixels) and regions of low sensitivity (where white pixels are sparse).</p>
|
rlm@401
|
361 </div>
|
rlm@401
|
362
|
rlm@401
|
363
|
rlm@401
|
364
|
rlm@401
|
365
|
rlm@401
|
366 <div class="figure">
|
rlm@401
|
367 <center>
|
rlm@401
|
368 <video controls="controls" width="550">
|
rlm@401
|
369 <source src="../video/worm-touch.ogg" type="video/ogg"
|
rlm@401
|
370 preload="none" />
|
rlm@401
|
371 </video>
|
rlm@401
|
372 <br> <a href="http://youtu.be/RHx2wqzNVcU"> YouTube </a>
|
rlm@401
|
373 </center>
|
rlm@401
|
374 <p>The worm responds to touch.</p>
|
rlm@401
|
375 </div>
|
rlm@401
|
376
|
rlm@401
|
377 <div class="figure">
|
rlm@401
|
378 <center>
|
rlm@401
|
379 <video controls="controls" width="550">
|
rlm@401
|
380 <source src="../video/test-proprioception.ogg" type="video/ogg"
|
rlm@401
|
381 preload="none" />
|
rlm@401
|
382 </video>
|
rlm@401
|
383 <br> <a href="http://youtu.be/JjdDmyM8b0w"> YouTube </a>
|
rlm@401
|
384 </center>
|
rlm@401
|
385 <p>Proprioception in a worm. The proprioceptive readout is
|
rlm@401
|
386 in the upper left corner of the screen.</p>
|
rlm@401
|
387 </div>
|
rlm@401
|
388
|
rlm@401
|
389 <p>
|
rlm@401
|
390 A worm is trained in various actions such as sinusoidal movement,
|
rlm@401
|
391 curling, flailing, and spinning by directly playing motor
|
rlm@401
|
392 contractions while the worm "feels" the experience. These actions
|
rlm@401
|
393 are recorded both as vectors of muscle tension, touch, and
|
rlm@401
|
394 proprioceptive data, but also in higher level forms such as
|
rlm@401
|
395 frequencies of the various contractions and a symbolic name for the
|
rlm@401
|
396 action.
|
rlm@401
|
397 </p>
|
rlm@401
|
398 <p>
|
rlm@401
|
399 Then, the worm watches a video of another worm performing one of
|
rlm@401
|
400 the actions, and must judge which action was performed. Normally
|
rlm@401
|
401 this would be an extremely difficult problem, but the worm is able
|
rlm@401
|
402 to greatly diminish the search space through sympathetic
|
rlm@401
|
403 imagination. First, it creates an imagined copy of its body which
|
rlm@401
|
404 it observes from a third person point of view. Then for each frame
|
rlm@401
|
405 of the video, it maneuvers its simulated body to be in registration
|
rlm@401
|
406 with the worm depicted in the video. The physical constraints
|
rlm@401
|
407 imposed by the physics simulation greatly decrease the number of
|
rlm@401
|
408 poses that have to be tried, making the search feasible. As the
|
rlm@401
|
409 imaginary worm moves, it generates imaginary muscle tension and
|
rlm@401
|
410 proprioceptive sensations. The worm determines the action not by
|
rlm@401
|
411 vision, but by matching the imagined proprioceptive data with
|
rlm@401
|
412 previous examples.
|
rlm@401
|
413 </p>
|
rlm@401
|
414 <p>
|
rlm@401
|
415 By using non-visual sensory data such as touch, the worms can also
|
rlm@401
|
416 answer body related questions such as "did your head touch your
|
rlm@401
|
417 tail?" and "did worm A touch worm B?"
|
rlm@401
|
418 </p>
|
rlm@401
|
419 <p>
|
rlm@401
|
420 The proprioceptive information used for action identification is
|
rlm@401
|
421 body-centric, so only the registration step is dependent on point
|
rlm@401
|
422 of view, not the identification step. Registration is not specific
|
rlm@401
|
423 to any particular action. Thus, action identification can be
|
rlm@401
|
424 divided into a point-of-view dependent generic registration step,
|
rlm@401
|
425 and a action-specific step that is body-centered and invariant to
|
rlm@401
|
426 point of view.
|
rlm@401
|
427 </p>
|
rlm@401
|
428 </div>
|
rlm@401
|
429
|
rlm@401
|
430 </div>
|
rlm@401
|
431
|
rlm@401
|
432 <div id="outline-container-3-2" class="outline-3">
|
rlm@401
|
433 <h3 id="sec-3-2">Stick Figure World</h3>
|
rlm@401
|
434 <div class="outline-text-3" id="text-3-2">
|
rlm@401
|
435
|
rlm@401
|
436
|
rlm@401
|
437 <p>
|
rlm@401
|
438 This environment is similar to Worm World, except the creatures are
|
rlm@401
|
439 more complicated and the actions and questions more varied. It is
|
rlm@401
|
440 an experiment to see how far imagination can go in interpreting
|
rlm@401
|
441 actions.
|
rlm@401
|
442 </p></div>
|
rlm@401
|
443 </div>
|
rlm@401
|
444 </div>
|
rlm@401
|
445 </div>
|
rlm@401
|
446
|
rlm@401
|
447 <div id="postamble">
|
rlm@401
|
448 <p class="date">Date: 2013-11-07 04:21:29 EST</p>
|
rlm@401
|
449 <p class="author">Author: Robert McIntyre</p>
|
rlm@401
|
450 <p class="creator">Org version 7.7 with Emacs version 24</p>
|
rlm@401
|
451 <a href="http://validator.w3.org/check?uri=referer">Validate XHTML 1.0</a>
|
rlm@401
|
452
|
rlm@401
|
453 </div>
|
rlm@401
|
454 </body>
|
rlm@401
|
455 </html>
|