comparison src/rlm/dna_melting.clj @ 0:78a630e650d2

initial import
author Robert McIntyre <rlm@mit.edu>
date Tue, 18 Oct 2011 00:57:08 -0700
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:78a630e650d2
1 (ns rlm.dna-melting
2 (:refer-clojure :only [])
3 (:require rlm.ns-rlm mobius.base))
4 (rlm.ns-rlm/ns-clone mobius.base)
5 (use :reload-all 'rlm.web-stuff)
6
7
8 (def pre "/home/r/MIT/6/6.122/DNA/samples/")
9
10
11
12 (defrecord dna-data [temp glow name temp-units glow-units]
13 clojure.lang.IFn
14 (invoke [this k] (get this k))
15 (invoke [this k not-found] (get this k not-found))
16 (toString [this] "whatever"))
17
18 (defmethod print-method dna-data [d w]
19 (.write w (.toString d)))
20
21 (defmethod print-dup dna-data [d w]
22 (print-method d w))
23
24 (defn load-data [source]
25 (let [data (read-dataset source :delim \tab)
26 temp (vec ($ :col0 data))
27 glow (vec ($ :col1 data))]
28 (dna-data. temp glow
29 (last (re-split #"/" source))
30 "Raw Temperature (Volts)"
31 "Raw Glow (Volts)")))
32
33
34 (def ?-1 (load-data (str pre "sample4-1.txt")))
35
36 ;; I accidentally removed the cuvette while it was still recording!
37 ;; drop last 5 seconds of invalid data!
38 (def ?-2* (load-data (str pre "sample4-2.txt")))
39 (def ?-2 (assoc ?-2*
40 :temp (drop-last 500 (:temp ?-2*))
41 :glow (drop-last 500 (:glow ?-2*))))
42
43 (def ?-3 (load-data (str pre "sample4-3.txt")))
44 (def A-1 (load-data (str pre "sampleA-1.txt")))
45 (def A-2 (load-data (str pre "sampleA-2.txt")))
46 (def A-3 (load-data (str pre "sampleA-3.txt")))
47 (def B-2 (load-data (str pre "sampleB-2.txt")))
48 (def B-3 (load-data (str pre "sampleB-3.txt")))
49 (def C-1 (load-data (str pre "sampleC-1.txt")))
50 (def C-2 (load-data (str pre "sampleC-2.txt")))
51 (def C-3 (load-data (str pre "sampleC-3.txt")))
52
53
54 (def pure-bleaching (load-data (str pre "sampleB-1(failed-to-heat).txt")))
55 (def double-cycle (load-data (str pre "sampleC-3(double).txt")))
56 (def triple-cycle (load-data (str pre "sampleC-3(triple).txt")))
57
58 (def all-samples
59 [?-1 ?-3 A-1 A-2 A-3 B-2 B-3 C-1 C-2 C-3])
60
61 (def everything
62 (concat [pure-bleaching double-cycle triple-cycle] all-samples))
63
64
65
66 (defn dna-plot [d]
67 (view
68 (->
69 (visual (:temp d))
70 (set-y-label (:temp-units d))
71 (set-x-label "Time (deci-seconds)")
72 (set-title (:name d))))
73 (view
74 (->
75 (visual (:glow d))
76 (set-y-label (:glow-units d))
77 (set-x-label "Time (deci-seconds)")
78 (set-title (:name d))))
79 d)
80
81 (defn dna-melting [d]
82
83 (->
84 (scatter-plot (:temp d) (:glow d))
85 (set-y-label (:glow-units d))
86 (set-x-label (:temp-units d))
87 (set-title (:name d))))
88
89
90 (defmethod visual clojure.lang.LazySeq [ls]
91 (visual (vec ls)))
92
93 (defmethod visual clojure.lang.PersistentVector [v]
94 (set-y-range (xy-plot (vec (range (count v))) v)
95 (apply min v) (apply max v)))
96
97 (defmethod visual dna-data [d]
98 (dna-plot d))
99
100
101
102
103
104
105 ;;; Filter V_RTD and V_F to reduce noise
106
107
108
109 ;(let [a (int 1.8e4) b (int 2.2e4)]
110 ; (visual (map (vec (temperature triple-cycle)) (range a b)))
111 ; (visual (map (vec (dna-fraction triple-cycle)) (range a b))))
112
113 ;; Differentiate the resulting function with respect to temperature.
114
115 (def test-temp-range (vec (range 260 380 0.1)))
116
117 (defn trans-print [& args] (apply println args) args)
118
119 ;; (def search
120 ;; #(for [x (range -1000 1000 100) y (range -1000 1000 100)]
121 ;; (do
122 ;; (print [x y])
123 ;; (try (trans-print (non-linear-model
124 ;; (ideal-dna-fraction 30e-6)
125 ;; test-dna-fraction test-temperature [x y]
126 ;; :method :newton-raphson))
127
128 ;; (catch Exception e (trans-print nil))))))
129
130
131 ;;(non-linear-model (ideal-dna-fraction 30e-6) test-dna-fraction test-temperature [-184 -71e3])
132
133 ;;(time (def answer (non-linear-model (ideal-dna-fraction 30e-6) test-dna-fraction test-temperature [-200 -90e3])))
134
135 ;;we're assuming that the SYBR Green 1 doesn't glow at all when there is only
136 ;;single stranded DNA, but it actually does. V_f only ever gets down to ~0.1 V
137 ;;even with everything melted.
138
139
140 ;;correct for thermal capacatance
141
142 (defn find-max [v]
143 (first (reduce (fn [a b] (if (> (last b) (last a)) b a)) (indexed v))))
144
145 (defn find-min [v]
146 (first (reduce (fn [a b] (if (< (last b) (last a)) b a)) (indexed v))))
147
148 (defn plot-thermal-delay []
149 (let [a (int 2.5e3) b (int 5e3)]
150 (dna-plot (assoc A-1
151 :temp (map (vec (:temp A-1)) (range a b))
152 :glow (map (vec (:glow A-1)) (range a b))))))
153
154 (defvar thermal-delay
155 (let [a (int 2.5e3) b (int 5e3)]
156 (-
157 (find-min (map (vec (:glow A-1)) (range a b)))
158 (find-max (map (vec (:temp A-1)) (range a b)))))
159 "There are two sources of thermal delay: thermal capacatance
160 of the cuvette holder and thermal capacatance from the time
161 it takes DNA to unwind.
162
163 Use A-1 to find the thermal delay. There was almost no delay
164 between heating and cooling for A-1, and thus the extremum of
165 temperature and glow should temporally coincide if there were
166 no delays. It turns out that there is about a 30 second lag!" )
167
168 (defn thermal-delay-correction*
169 "There is a certain delay between the RTD reporting changes in
170 temperature and the temperature of the DNA actually changing.
171 This shifts the temperature values so that the delay is eliminated."
172 [thermal-delay #^dna-data d]
173 (assoc d
174 :name (str (:name d) " |thermal-correction| ")
175 :temp (drop-last thermal-delay (:temp d))
176 :glow (drop thermal-delay (:glow d))))
177
178 (def thermal-delay-correction (partial thermal-delay-correction* thermal-delay))
179
180 ;;correct for photo-bleaching
181
182 (defn general-linear [[A B] x] (+ (* A x) B))
183
184 (defvar bleaching-coef nil
185 ;; (let [[A B]
186 ;; (:coefs
187 ;; (let [a (int 3e4) b (length (:glow pure-bleaching))]
188 ;; (non-linear-model
189 ;; general-linear
190 ;; (map (vec (:glow pure-bleaching)) (range a b))
191 ;; (range a b)
192 ;; [1 1])))]
193 ;; A)
194 "This function corrects V_f for photo-bleaching over a period of time.
195 Photo-bleaching was found to be linear(!) over quite a long period of time.
196 Data is taken from sample B-1, which I left in the holder for an hour
197 with no heating")
198
199 (defn bleaching-correction*
200 [bleaching-coef #^dna-data d]
201 ;;the correction is clibrated to apply to the raw data only
202 (assert (= (:glow-units d) "Raw Glow (Volts)"))
203 ;;we need to modulate this correction factor
204
205 (let [baseline (first (drop 3e4 (:temp pure-bleaching)))]
206 (assoc d
207 :name (str (:name d) " |bleaching-correction| ")
208 :glow (map (fn [[i n]] (- n (*
209 (/ n baseline)
210 ((partial general-linear [bleaching-coef 0]) i))))
211 (indexed (:glow d))))))
212
213 (def bleaching-correction (partial bleaching-correction* bleaching-coef))
214
215 ;; compensate for residual fluorescence when everything is melted
216
217 (defn residual-fluorescence
218 "the SYBR1 green dye doesn't completly stop glowing once everything
219 is melted."
220 [#^dna-data d]
221 (let [low (apply min (:glow d))
222 high (apply max (:glow d))]
223 (assoc d
224 :name (str (:name d) " |residual-fluorescence| ")
225 :glow (map (fn [f]
226 (- f (* (- 1 (/ f high)) low))) (:glow d)))))
227
228
229
230
231 ;; extract the cooling phase
232
233 (defn cooling-phase
234 "first stretch where the temperature is going down for a while
235 equivalent to where the f curve starts to recover"
236 [#^dna-data d]
237 (let [t (:temp d)
238 f (:glow d)
239 dividing-line (reduce (fn [a b] (if (< (a 1) (b 1)) a b)) (indexed f))
240 t-new (drop (dividing-line 0) t)
241 f-new (drop (dividing-line 0) f)]
242 (assoc d
243 :name (str (:name d) " |extract cooling| ")
244 :temp (vec t-new) :glow (vec f-new))))
245
246
247
248 ;; filter noise
249 ;;(http://clojure-log.n01se.net/date/2009-10-13.html by ambient
250 ;;this is such a cool transform!!!
251
252
253 (defn lo-pass
254 [d coll]
255 (reductions #(+ (* %2 d) (* %1 (- 1.0 d))) coll))
256
257
258 ;;)
259
260 (def reasonable-low-pass (partial lo-pass 1e-2))
261
262 (defn low-pass-correction
263 "filter the raw data through a low pass filter to remove
264 electronic noise"
265 [#^dna-data d]
266 (assert (= (:glow-units d) "Raw Glow (Volts)"))
267 (assert (= (:temp-units d) "Raw Temperature (Volts)"))
268 (assoc d
269 :name (str (:name d) " |low-pass| ")
270 :temp (reasonable-low-pass (:temp d))
271 :glow (reasonable-low-pass (:glow d))))
272
273
274
275
276
277 ;; correct for units
278
279 (defn RTD-voltage->temperature
280 "transform RTD voltage to temperature.
281 R_RTD = 1000 + 3.85 * (T - 273)
282 T = ((R_RTD - 1000) / 3.85 ) + 273
283 V_RTD = 15 * R_RTD / (R + R_RTD)
284 R_RTD = R / (15/V_RTD - 1)"
285 [coll]
286 (let [R 14.97e3
287 V 15]
288 (letfn [(T [R_RTD] (+ 273 (/ (- R_RTD 1000) 3.85))) ;; from lab notrd
289 (R_RTD [V_RTD] (/ R (- (/ V V_RTD) 1)))]
290 (map (comp T R_RTD) coll))))
291
292 (defn normalize
293 "Transform photodiode current to relative fluorescence."
294 [coll]
295 (let [low (apply min coll)
296 high (apply max coll)]
297 (map #(/ (- % low) (- high low)) coll)))
298
299 (defn unit-transform
300 "turn raw voltage into temperature and dna-concentration"
301 [#^dna-data d]
302 (assert (= (:glow-units d) "Raw Glow (Volts)"))
303 (assert (= (:temp-units d) "Raw Temperature (Volts)"))
304 (assoc d
305 :name (str (:name d) " |units| ")
306 :temp-units "Temperature (Kelvin)"
307 :glow-units "DNA-Fracton"
308 :temp (RTD-voltage->temperature (:temp d))
309 :glow (normalize (:glow d))))
310
311
312 (defn temp-unit-transform
313 "turn raw voltage into temperature and dna-concentration"
314 [#^dna-data d]
315 (assert (= (:temp-units d) "Raw Temperature (Volts)"))
316 (assoc d
317 :name (str (:name d) " |temp-units| ")
318 :temp-units "Temperature (Kelvin)"
319 :temp (RTD-voltage->temperature (:temp d))))
320
321 (defn glow-unit-transform
322 "turn raw voltage into temperature and dna-concentration"
323 [#^dna-data d]
324 (assert (= (:glow-units d) "Raw Glow (Volts)"))
325 (assoc d
326 :name (str (:name d) " |glow-units| ")
327 :glow-units "DNA-Fracton"
328 :glow (normalize (:glow d))))
329
330
331 (defn thermal-efficiency
332 [#^dna-data d]
333 (assert (= (:temp-units d) "Temperature (Kelvin)"))
334 (assert (= (:glow-units d) "Raw Glow (Volts)"))
335 (assoc d
336 :name (str (:name d) " |thermal-efficiency| ")
337 :glow nil))
338
339
340
341 (defn remove-first-values
342 "remove the first minute or so of data so that the initial temperature of
343 the cuvette itself doesn't matter"
344 [#^dna-data d]
345 (assoc d
346 :name (str (:name d) " |remove-first| ")
347 :temp (drop 600 (:temp d))
348 :glow (drop 600 (:glow d))))
349
350 (defn diff [coll] (map - (rest coll) coll ))
351 (defn temp-transform
352 "my assumption will be that at the very end of the temperature data, the
353 cuvette is exactly the same temperature as the RTD. Then, I use newton's
354 heat transfer and numerical integration to derive the heat of the cuvette
355 for previous times"
356 [k-value temperature-data]
357 (let [T1 (vec (reverse temperature-data))
358 ;; want fast indexed access, and instead of doing the problem in reverse
359 ;; just reverse the vector at the start, treat it normally, and
360 ;; then reverse at the end
361 initial-cuvette-temp (first T1)
362 initial-RTD-temp (first T1)
363 step (fn [prev-T2 current-T1]
364 (let [new-T2 (+ prev-T2 (* k-value (- current-T1 prev-T2)))]
365 new-T2))]
366 (reverse (reductions step initial-cuvette-temp T1))))
367
368 (def k-value 0.1)
369
370 (defn temp-corr-2 [k-value T1*]
371 (let [T1 (reverse T1*)]
372 (reverse (cons (first T1) (map + (map (partial * k-value) (diff T1)) (rest T1))))))
373
374 (defn temp-corr-3 [initial-cuvette-temp k-value T1]
375 (let [step
376 (fn [prev-T2 current-T1]
377 (+ prev-T2 (* k-value (- current-T1 prev-T2))))]
378 (rest (reductions step initial-cuvette-temp T1))))
379
380 (defn newton-temp-correction* [k-value #^dna-data d]
381 (assoc d
382 :name (str (:name d) " |newton-temp| ")
383 :temp (temp-transform k-value (:temp d))))
384 (defn newton-temp-correction2* [k-value #^dna-data d]
385 (assoc d
386 :name (str (:name d) " |newton-temp-reverse| ")
387 :temp (temp-corr-2 k-value (:temp d))))
388 (defn newton-temp-correction3* [init-temp k-value #^dna-data d]
389 (assoc d
390 :name (str (:name d) " |newton-temp-reverse| ")
391 :temp (temp-corr-3 init-temp k-value (:temp d))))
392
393 (defn turn-e [n] (format "%5.2e" n))
394
395 (defn newton-temp-correction4*
396 "now with AIR!!!"
397 [k-block-sample k-sample-air initial-sample-temp room-temp #^dna-data d]
398 (let [old-temp (:temp d)
399 step (fn [prev-T2 current-T1]
400 (+ prev-T2
401 (* k-block-sample (- current-T1 prev-T2))
402 (* k-sample-air (- room-temp prev-T2))))
403 new-temp (rest (reductions step initial-sample-temp old-temp))]
404 (assoc d
405 :name (str (:name d)
406 " |temp-adjusted {"
407 "initial-sample:" (turn initial-sample-temp) " "
408 "room-temp:" (turn room-temp) " "
409 "k-block-sample:" (turn-e k-block-sample) " "
410 "k-sample-air:" (turn-e k-sample-air) "}")
411 :temp new-temp)))
412
413 (defn plot-adjusted-temp
414 "visualize the temperature correction"
415 [adjuster #^dna-data d]
416 (view
417 (add-lines
418 (visual (:temp d))
419 (vec (range (count (:temp d))))
420 (vec (:temp (adjuster d))))))
421
422
423
424
425
426 ;; process data
427
428 (defn process [#^dna-data d]
429 (cooling-phase
430 (unit-transform
431 (residual-fluorescence
432 (bleaching-correction
433 (low-pass-correction
434 ;(thermal-delay-correction
435 d))))))
436
437
438 (defn first-pass
439 "after this, the only thing stopping the melting curve from
440 being a straight line is thermal capacatance"
441 [#^dna-data data]
442 (low-pass-correction
443 ;(remove-first-values
444 (bleaching-correction
445 data)))
446
447
448
449 (defn explore [k-value]
450 (dna-melting (newton-temp-correction* k-value (first-pass A-2))))
451
452
453 (defn explore2 [k-value]
454 (dna-melting (newton-temp-correction2* k-value (first-pass A-2))))
455
456
457 (defn explore3 [init-temp k-value]
458 (dna-melting (newton-temp-correction3* init-temp k-value (unit-transform (first-pass A-2)))))
459
460
461
462 ;; get the three characteristic values we want.
463
464 (defn ideal-dna-fraction* [concentration [delta-S delta-H] temperature]
465 (let [R 8.31447215
466 t temperature
467 Ct concentration
468 K_eq (exp (- (/ delta-S R) (/ delta-H (* R t))))
469 f (/
470 (+ 1 (* Ct K_eq)
471 (- (sqrt (+ 1 (* 2 Ct K_eq)))))
472 (* Ct K_eq))]
473 f))
474
475 (def ideal-dna-fraction (partial ideal-dna-fraction* 30e-6))
476
477 (defvar starting-values
478 ;; [-184 -71e3]
479 [-904 -32e4]
480 "chosen from theoritical estimation")
481
482
483 (defn analyze* [iterations #^dna-data d]
484 (let [best-fit-results
485 (non-linear-model
486 ideal-dna-fraction
487 (:glow d)
488 (:temp d)
489 starting-values
490 :max-iter iterations)]
491 best-fit-results))
492
493
494 (def analyze (partial analyze* 200))
495
496 (defn plot-results [d]
497 (let [ans (analyze d)]
498 (view (doto (xy-plot (:x ans) (:y ans))
499 (set-title (:name d))
500 (add-lines (:x ans) (:fitted ans))))
501 ans))
502
503 (defn zzz [d [A B]]
504 (let [gar (vec (map (partial ideal-dna-fraction [A B]) (:temp d)))]
505 (view (doto (xy-plot (:temp d) (:glow d))
506 (set-title (:name d))
507 (add-lines (:temp d) gar)))))
508
509
510
511
512 (defn index-filter [pred coll]
513 (when pred
514 (for [[idx elt] (indexed coll) :when (pred elt)] idx)))
515
516
517
518
519 ;; we're going to estimate two correction factors,
520 ;; a constant capacative temperature delay,
521 ;; and a linear decrease in brightness. We'll do this
522 ;; before unit-transforms.
523
524 (defn zipmap-collect
525 "Returns a map with the keys mapped to the corresponding vals.
526 adds extra values to a list"
527 [keys vals]
528 (loop [map {}
529 ks (seq keys)
530 vs (seq vals)]
531 (if (and ks vs)
532 (recur (if (contains? map (first ks))
533 (assoc map (first ks) (cons (first vs) (map (first ks))))
534 (assoc map (first ks) (list (first vs))))
535 (next ks)
536 (next vs))
537 map)))
538
539
540 (defn little-process [thermal-cap bleaching #^dna-data d]
541 ((partial thermal-delay-correction thermal-cap)
542 ((partial bleaching-correction bleaching) d)))
543
544 (def little-process (memoize little-process))
545
546 ;(defn lookup-map [#^dna-data d]
547
548
549 (defn converge-function
550 "this is the vertical discrepancy after applying the two transforms"
551 [#^dna-data d [thermal-cap bleaching] x]
552 (let [new-d (little-process thermal-cap bleaching d)
553 spreads
554 (map (vec (:glow new-d))
555 (index-filter (partial = x) (vec (:temp new-d))))
556 spread (- (apply max spreads) (apply min spreads))]
557 spread))
558
559
560
561
562
563
564
565
566
567
568 (defn blah [k] (comp
569 ( partial newton-temp-correction3* 280 k ) temp-unit-transform first-pass))
570
571 ;(def ggg (comp
572 ; dna-melting ( partial thermal-delay-correction* 120 ) (blah 0.0018)))
573
574
575
576
577
578 ;; Ensure that the melting function is uniquely valued by
579 ;; combining samples with identical temperature values.
580 (defn cooling-function
581 "just put it in a map for this!"
582 [source]
583 (let [data (cooling-phase source)
584 lookup (apply sorted-map
585 (interleave
586 (:temperature data) (:dna-fraction data)))]
587 {:temperature (vec (keys lookup))
588 :dna-fraction (vec (map lookup (keys lookup)))}))
589
590
591 (defn turn [n]
592 (format "%5.2f" n))
593
594 (defn bleach-correction-2 [bleaching-constant #^dna-data d]
595 (assert (= (:glow-units d) "Raw Glow (Volts)"))
596 (let [old-glow (vec (:glow d))
597 integral-old-glow (vec (reductions + old-glow))
598 correction (fn [t] (+
599 (old-glow t)
600 (* bleaching-constant (integral-old-glow t))))
601 new-glow (map + (map correction (range 0 (count old-glow))) old-glow)]
602 ;;glow is related to the number of particles,
603 ;;and the number destroyed is some fraction of
604 ;;the number present and glowing.
605 (assoc d
606 :name (str (:name d) " |bleaching-corrected constant = " (turn bleaching-constant) "| ")
607 :glow new-glow)))
608
609
610
611 (defn ultimate-correction
612 ([k-block-sample k-sample-air initial-sample-temp room-temp bleach-constant
613 #^dna-data d
614 ]
615 ((comp
616 (partial newton-temp-correction4*
617 k-block-sample k-sample-air initial-sample-temp room-temp)
618
619 temp-unit-transform
620 (partial bleach-correction-2 bleach-constant)
621 low-pass-correction) d))
622
623 ([{:keys [test-k-sample-air
624 test-k-block-sample
625 test-sample-temp
626 test-room-temp
627 test-bleach]}
628 #^dna-data d]
629 (ultimate-correction
630 (mean test-k-block-sample) (mean test-k-sample-air)
631 (mean test-sample-temp) (mean test-room-temp) (mean test-bleach) d)))
632
633
634 ;;((uui 0.0020 0.0001 280 293) (bleach-correction-2 1.1e-5 ?-3))
635
636
637 ;;; this is for ?-3 after 35 iterations
638 (def opt {:test-k-sample-air '(9.375630358472117E-4 9.375630359053611E-4), :test-k-block-sample '(0.0010343754438043107 0.0010343754455447197), :test-room-temp '(294.9804782861611 294.98047828674316), :test-sample-temp '(293.36120605294127 293.3612060546875), :test-bleach '(1.771081481128931E-5 1.7710814863094127E-5), :align 1.5796052678842228E-4})
639
640
641 ;;; C-1 after 20 iterations
642 (def opt2
643 {:test-k-sample-air '(9.936472587585448E-4 9.936491641998292E-4), :test-k-block-sample '(2.9390106201171873E-4 2.939580917358398E-4), :test-room-temp '(294.76531982421875 294.7653388977051), :test-sample-temp '(294.765625 294.765682220459), :test-bleach '(2.39045524597168E-5 2.3906250000000002E-5), :align 1.7562642331021098E-4})
644
645
646 ;;(time (def result-?-3 (doall (take 10 (iterate (partial improve ?-3) start-values-overestimates)))))
647
648
649 ;;(doall (map (comp dna-melting #(ultimate-correction % ?-3)) result-?-3))
650
651
652 (defn subsample
653 "subsample v every n points"
654 [n v]
655
656 (vec (map (vec v) (vec (range 0 (count (vec v)) n)))))
657
658
659 (defn max-difference [v] (- (apply max v) (apply min v)))
660
661 (defn alignment [#^dna-data d]
662 (let [distribution (vec (vals
663 (apply sorted-map
664 (interleave (:temp d) (:glow d)))))]
665 (mean (map variance (partition 10 distribution)))))
666
667 (defn garigh [d]
668 (let [distribution (vec (vals
669 (apply sorted-map
670 (interleave (:temp d) (:glow d)))))]
671 (view (visual (vec (map max-difference (partition 100 distribution)))))))
672
673
674 (defn alignment-4 [#^dna-data d]
675 (let [distribution (vec (vals
676 (apply sorted-map
677 (interleave (:temp d) (:glow d)))))]
678 (reduce + (map sq (map max-difference (partition 100 distribution))))))
679
680
681 (defn alignment-2 [#^dna-data d]
682 (let [distribution
683 (vec
684 (vals
685 (apply sorted-map
686 (interleave (subsample 2 (:temp d)) (subsample 2(:glow d))))))]
687 (reduce + (map max-difference (partition 30 distribution)))))
688 (defn alignment-3 [#^dna-data d]
689 (let [distribution
690 (vec
691 (vals
692 (apply sorted-map
693 (interleave (subsample 2 (:temp d)) (subsample 2(:glow d))))))]
694 (reduce max (map max-difference (partition 30 distribution)))))
695
696
697 (comment
698 ;; this takes a while
699 (def opts (zipmap all-samples (map #(last (take 20 (iterate (partial improve %) start-values))) all-samples))))
700
701 ;(def opts (zipmap all-samples opts-vals))
702
703 (defn mean-map [v s]
704 (map mean (map (partial vector v) s)))
705
706
707 (def start-values
708 {:test-k-sample-air [0.000001 0.001]
709 :test-k-block-sample [0.001 0.03]
710 :test-room-temp [290 300]
711 :test-sample-temp [285 310]
712 :test-bleach [0.00001 0.0009]})
713
714 (def start-values-2
715 {:test-k-sample-air [1e-15 0.0005]
716 :test-k-block-sample [0.001 0.002]
717 :test-room-temp [290 300]
718 :test-sample-temp [290 300]
719 :test-bleach [1e-7 0.0005]})
720
721
722
723 (def start-values-3
724 {:test-k-block-sample [0.0005 0.00125 0.002]
725 :test-k-sample-air [0.0000001 0.000001 0.0000019]
726 :test-sample-temp [280 300]
727 :test-room-temp [300 306]
728 :test-bleach [0.000001 0.00001]})
729
730 (defn condense [[a b :as v]]
731 (if (< (/ (abs (- (apply max v) (apply min v))) (apply max v)) 1e-2) (vector (mean v)) v))
732
733 (defn mass-shift
734 [value v]
735 (let [frac 2/3]
736 (cond (>= (count v) 2)
737 (let [[a b] v]
738 (condense (vec (map #(float (+ (* frac %) (* (- 1 frac) value))) v))))
739 (= (count v) 1)
740 v)))
741
742
743 (defn improve* [#^dna-data d
744 {:keys [test-k-sample-air
745 test-k-block-sample
746 test-room-temp
747 test-sample-temp
748 test-bleach] :as domain}]
749 (do (print \newline)
750 (reduce (fn [m1 m2] (if (< (:align m1) (:align m2)) m1 m2))
751
752
753 (for [k-sample-air (condense test-k-sample-air)
754 k-block-sample (condense test-k-block-sample)
755 room-temp (condense test-room-temp)
756 initial-sample-temp (condense test-sample-temp)
757 bleach-constant (condense test-bleach)]
758
759 (do
760 (print ".")
761 (let [align
762 (alignment-2
763 (ultimate-correction
764 k-block-sample k-sample-air initial-sample-temp room-temp
765 bleach-constant d))
766 align (if (Double/isNaN align) Double/POSITIVE_INFINITY align)]
767 {:test-k-sample-air (mass-shift k-sample-air test-k-sample-air)
768 :test-k-block-sample(mass-shift k-block-sample test-k-block-sample)
769 :test-room-temp (mass-shift room-temp test-room-temp)
770 :test-sample-temp (mass-shift initial-sample-temp test-sample-temp)
771 :test-bleach (mass-shift bleach-constant test-bleach)
772 :align align}))))))
773
774
775
776 (def println-repl (bound-fn [& args] (apply println args)))
777 (def print-repl (bound-fn [& args] (apply print args)))
778
779
780
781 (defn improve [#^dna-data d
782 {:keys [test-k-block-sample
783 test-k-sample-air
784 test-sample-temp
785 test-room-temp
786 test-bleach] :as domain}]
787 (reduce
788 (fn [m1 m2] (if (< (:align m1) (:align m2)) m1 m2))
789 (let [return-hash
790 (fn
791 [[k-block-sample k-sample-air initial-sample-temp
792 room-temp bleach-constant]]
793 (do
794 (print-repl ".")
795 (let [align
796 (alignment-4
797 (ultimate-correction
798 k-block-sample k-sample-air initial-sample-temp room-temp
799 bleach-constant d))
800 align (if (Double/isNaN align) Double/POSITIVE_INFINITY align)]
801 {:test-k-sample-air (mass-shift k-sample-air test-k-sample-air)
802 :test-k-block-sample(mass-shift k-block-sample test-k-block-sample)
803 :test-room-temp (mass-shift room-temp test-room-temp)
804 :test-sample-temp (mass-shift initial-sample-temp test-sample-temp)
805 :test-bleach (mass-shift bleach-constant test-bleach)
806 :align align})))]
807 (pmap return-hash
808 (cartesian-product
809 test-k-block-sample
810 test-k-sample-air
811 test-sample-temp
812 test-room-temp
813 test-bleach)))))
814
815 (def improve (memoize improve))
816
817 (defn iterate-test [n #^dna-data d]
818 (last (take n (iterate (partial improve d) start-values-3))))
819
820
821 (defn nested-vals [key coll]
822 (for [x (tree-seq coll? seq coll) :when (contains? x key)]
823 (get x key)))
824
825
826 (defn quantum-efficiency* [q-constant #^dna-data d]
827 ;; 0.8 at room temperature
828 ;; quantum efficiency is somehow
829 ;; inversely proportional to temperature
830 (let [correction
831
832
833
834 (fn [t g] (/ g (* 0.8 (exp (* q-constant (- 292 t))))))]
835 (assoc d
836 :name (str (:name d) " |quantum-correction| ")
837 :glow (map correction (:temp d) (:glow d)))))
838
839 (def quantum-efficiency (partial quantum-efficiency* 1.2e-2))
840
841 (defn exp-decay [[A] x]
842 (* 0.8 (exp (* A (- 298 x)))))
843
844 ;(defn final-join [#^dna-data d]
845
846
847
848 (def join-opts
849 {
850 A-1 {:test-k-sample-air [1.760184242508937E-6], :test-k-block-sample [0.0012930832259977858], :test-room-temp [304.41977945963544], :test-sample-temp [281.31689453125], :test-bleach [9.956068424799014E-6], :align 0.800815268639094}
851 A-2 {:test-k-sample-air [3.017639376897326E-7], :test-k-block-sample [0.0011550507430608075], :test-room-temp [304.6666717529297], :test-sample-temp [282.6337890625], :test-bleach [9.5610075732111E-6], :align 0.8530570499185829}
852 A-3 {:test-k-sample-air [2.6829059556281816E-6], :test-k-block-sample [0.002413294818252325], :test-room-temp [304.6666717529297], :test-sample-temp [281.31689453125], :test-bleach [9.965317076421343E-6]}
853
854 B-2 {:test-k-sample-air [1.6274943088016396E-6], :test-k-block-sample [0.0013714926317334175], :test-room-temp [304.6666717529297], :test-sample-temp [281.31689453125], :test-bleach [9.965317076421343E-6], :align 0.7631166194402621}
855 B-3
856 {:test-k-sample-air [1.455345833771086E-6], :test-k-block-sample [0.0014841814991086721], :test-room-temp [304.6666717529297], :test-sample-temp [287.9835662841797], :test-bleach [9.965317076421343E-6], :align 0.5358800697386289}
857
858 C-1
859 {:test-k-sample-air [1.6907095717518434E-6], :test-k-block-sample [0.001278722076676786], :test-room-temp [304.6666717529297], :test-sample-temp [281.31689453125], :test-bleach [9.965317076421343E-6], :align 0.7249279378017799}
860 C-2
861 {:test-k-sample-air [1.77524979487013E-6], :test-k-block-sample [0.001278722076676786], :test-room-temp [304.6666717529297], :test-sample-temp [281.31689453125], :test-bleach [9.965317076421343E-6], :align 2.0474987460209793}
862 C-3
863 {:test-k-sample-air [1.010541116860016E-6], :test-k-block-sample [0.001601272417853276], :test-room-temp [304.6666717529297], :test-sample-temp [281.31689453125], :test-bleach [9.965317076421343E-6], :align 0.7191738524207253}
864
865 ?-1
866 {:test-k-sample-air [1.5533593114014366E-6], :test-k-block-sample [0.0013761443551629782], :test-room-temp [304.6666717529297], :test-sample-temp [281.31689453125], :test-bleach [9.965317076421343E-6], :align 1.3838506291264723}
867 ?-3
868 {:test-k-sample-air [2.940294715851148E-7], :test-k-block-sample [0.0018434864080821474], :test-room-temp [304.6666717529297], :test-sample-temp [281.31689453125], :test-bleach [9.965317076421343E-6], :align 0.48549623932134006}
869
870 })
871
872
873 (defn final-join [#^dna-data d]
874
875 (let [
876 m (apply sorted-map (interleave (:temp d) (:glow d)))
877 distribution (vec (vals m))
878 ordinates (vec (keys m))
879 ]
880 (assoc d
881 :name (str (:name d) " |joined| ")
882 :temp (map (fn [region] (mean [(apply min region) (apply max region)]))
883 (partition 30 ordinates))
884
885 :glow (map (fn [region] (mean [(apply min region) (apply max region)]))
886 (partition 30 distribution)))))
887
888
889 (defn melting-point [#^dna-data d]
890 (let [t (drop 150 (:temp d))
891 g (drop 150 (:glow d))]
892 (nth t (find-min (map / (diff g) (diff t) )))))
893
894
895 (def target-dir (file-str "/home/r/MIT/6/6.122/DNA/output"))
896
897 (defn x-form-dna [#^dna-data d]
898 (final-join (ultimate-correction (join-opts d) d)))
899
900 (defn save-raw-dna-curve [#^dna-data d]
901 (save (dna-melting d) (str target-dir File/separator (:name d) ".png")))
902
903 (defn save-dna-curve-with-temp [#^dna-data d*]
904 (let [
905
906 melt (melting-point d*)
907 d (assoc d* :name (str (:name d*) " |M.P. = " (turn melt) " |"))
908 ]
909 (save (add-lines (dna-melting d) (repeat (count (:glow d)) melt) (:glow d))
910 (str target-dir File/separator (:name d) ".png"))))
911
912
913
914 (defn process-all []
915 (doall (pmap save-dna-curve all-samples))
916 (doall (pmap (comp save-dna-curve-with-temp x-form-dna) all-samples)))
917
918
919 (def ?-temps (vec (map (comp melting-point x-form-dna) [?-1 ?-3])))
920 (def A-temps (vec (map (comp melting-point x-form-dna) [A-1 A-2 A-3])))
921 (def B-temps (vec (map (comp melting-point x-form-dna) [B-2 B-3])))
922 (def C-temps (vec (map (comp melting-point x-form-dna) [C-1 C-2 C-3])))
923
924 (defn report [temps]
925
926 (sorted-map :data temps :std (sqrt (variance temps)) :mean (mean temps)))
927
928
929 ;;; now it's time to start data reporting
930
931 ;(defn process [#^dna-data d]
932 ; (ultimate-correction (opts d) d)
933
934
935
936 (report A-temps)
937 {:data [348.8771481779248 349.3989434763391 348.76079920968573],
938 :mean 349.0122969546499,
939 :std 0.33986161910548834}
940
941 (report B-temps)
942 {:data [349.0655539116902 349.92155530431506],
943 :mean 349.4935546080026,
944 :std 0.6052843894485812}
945
946 (report C-temps)
947 {:data [349.92341967511976 349.68832687337056 348.2872189461977],
948 :mean 349.299655164896,
949 :std 0.884639745344559}
950
951 (report ?-temps)
952 {:data [350.4525305192239 349.87377093318975],
953 :mean 350.1631507262068,
954 :std 0.4092448279511269}
955
956