diff src/rlm/dna_melting.clj @ 0:78a630e650d2

initial import
author Robert McIntyre <rlm@mit.edu>
date Tue, 18 Oct 2011 00:57:08 -0700
parents
children
line wrap: on
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/rlm/dna_melting.clj	Tue Oct 18 00:57:08 2011 -0700
     1.3 @@ -0,0 +1,956 @@
     1.4 +(ns rlm.dna-melting
     1.5 +  (:refer-clojure :only [])
     1.6 +  (:require rlm.ns-rlm mobius.base))
     1.7 +(rlm.ns-rlm/ns-clone mobius.base)
     1.8 +(use :reload-all 'rlm.web-stuff)
     1.9 +				
    1.10 +
    1.11 +(def pre "/home/r/MIT/6/6.122/DNA/samples/")
    1.12 +
    1.13 +
    1.14 +
    1.15 +(defrecord dna-data [temp glow name temp-units glow-units]
    1.16 +  clojure.lang.IFn
    1.17 +  (invoke [this k] (get this k))
    1.18 +  (invoke [this k not-found] (get this k not-found))
    1.19 +  (toString [this] "whatever"))
    1.20 +
    1.21 +(defmethod print-method dna-data [d w]
    1.22 +	   (.write w (.toString d)))
    1.23 +
    1.24 +(defmethod print-dup dna-data [d w]
    1.25 +	   (print-method d w))
    1.26 +
    1.27 +(defn load-data [source]
    1.28 +  (let [data (read-dataset source :delim \tab)
    1.29 +	temp (vec ($ :col0 data))
    1.30 +	glow (vec ($ :col1 data))]
    1.31 +    (dna-data. temp glow
    1.32 +	        (last (re-split #"/" source))
    1.33 +		"Raw Temperature (Volts)"
    1.34 +		"Raw Glow (Volts)")))
    1.35 +
    1.36 +
    1.37 +(def ?-1 (load-data (str pre "sample4-1.txt")))
    1.38 +
    1.39 +;; I accidentally removed the cuvette while it was still recording!
    1.40 +;; drop last 5 seconds of invalid data!
    1.41 +(def ?-2* (load-data (str pre "sample4-2.txt")))
    1.42 +(def ?-2 (assoc ?-2*
    1.43 +	   :temp (drop-last 500 (:temp ?-2*))
    1.44 +	   :glow (drop-last 500 (:glow ?-2*))))
    1.45 +
    1.46 +(def ?-3 (load-data (str pre "sample4-3.txt")))
    1.47 +(def A-1 (load-data (str pre "sampleA-1.txt")))
    1.48 +(def A-2 (load-data (str pre "sampleA-2.txt")))
    1.49 +(def A-3 (load-data (str pre "sampleA-3.txt")))
    1.50 +(def B-2 (load-data (str pre "sampleB-2.txt")))
    1.51 +(def B-3 (load-data (str pre "sampleB-3.txt")))
    1.52 +(def C-1 (load-data (str pre "sampleC-1.txt")))
    1.53 +(def C-2 (load-data (str pre "sampleC-2.txt")))
    1.54 +(def C-3 (load-data (str pre "sampleC-3.txt")))
    1.55 +
    1.56 +
    1.57 +(def pure-bleaching (load-data (str pre "sampleB-1(failed-to-heat).txt")))
    1.58 +(def double-cycle (load-data (str pre "sampleC-3(double).txt")))
    1.59 +(def triple-cycle (load-data (str pre "sampleC-3(triple).txt")))
    1.60 +
    1.61 +(def all-samples
    1.62 +     [?-1 ?-3 A-1 A-2 A-3 B-2 B-3 C-1 C-2 C-3])
    1.63 +
    1.64 +(def everything
    1.65 +     (concat [pure-bleaching double-cycle triple-cycle] all-samples))
    1.66 +
    1.67 +
    1.68 +
    1.69 +(defn dna-plot [d]
    1.70 +    (view
    1.71 +     (->
    1.72 +      (visual (:temp d))
    1.73 +      (set-y-label (:temp-units d))
    1.74 +      (set-x-label "Time (deci-seconds)")
    1.75 +      (set-title  (:name d))))
    1.76 +    (view
    1.77 +     (->
    1.78 +      (visual (:glow d))
    1.79 +      (set-y-label (:glow-units d))
    1.80 +      (set-x-label "Time (deci-seconds)")
    1.81 +      (set-title  (:name d))))
    1.82 +    d)
    1.83 +
    1.84 +(defn dna-melting [d]
    1.85 +  
    1.86 +   (->
    1.87 +    (scatter-plot (:temp d) (:glow d))
    1.88 +    (set-y-label (:glow-units d))
    1.89 +    (set-x-label (:temp-units d))
    1.90 +    (set-title (:name d))))
    1.91 +
    1.92 +
    1.93 +(defmethod visual clojure.lang.LazySeq [ls]
    1.94 +	   (visual (vec ls)))
    1.95 +
    1.96 +(defmethod visual clojure.lang.PersistentVector [v]
    1.97 +	   (set-y-range (xy-plot (vec (range (count v)))  v)
    1.98 +			      (apply min v) (apply max v)))
    1.99 +
   1.100 +(defmethod visual dna-data [d]
   1.101 +	   (dna-plot d))
   1.102 +
   1.103 +
   1.104 +
   1.105 +
   1.106 +    
   1.107 +
   1.108 +;;; Filter V_RTD and V_F to reduce noise
   1.109 +
   1.110 +
   1.111 +
   1.112 +;(let [a (int 1.8e4) b (int 2.2e4)]
   1.113 +;  (visual (map (vec (temperature triple-cycle)) (range a b)))
   1.114 +;  (visual (map (vec (dna-fraction triple-cycle)) (range a b))))
   1.115 +
   1.116 +;; Differentiate the resulting function with respect to temperature. 
   1.117 +
   1.118 +(def test-temp-range (vec (range 260 380 0.1)))
   1.119 +
   1.120 +(defn trans-print [& args] (apply println args) args)
   1.121 +
   1.122 +;; (def search
   1.123 +;;      #(for [x (range -1000 1000 100) y (range -1000 1000 100)]
   1.124 +;; 	(do
   1.125 +;; 	  (print [x y])
   1.126 +;; 	  (try (trans-print (non-linear-model
   1.127 +;; 			     (ideal-dna-fraction 30e-6)
   1.128 +;; 			      test-dna-fraction test-temperature [x y]
   1.129 +;; 			     :method :newton-raphson))
   1.130 +	       
   1.131 +;; 	       (catch Exception e (trans-print nil))))))
   1.132 +
   1.133 +
   1.134 +;;(non-linear-model (ideal-dna-fraction 30e-6) test-dna-fraction test-temperature [-184  -71e3])
   1.135 +
   1.136 +;;(time (def answer (non-linear-model (ideal-dna-fraction 30e-6) test-dna-fraction test-temperature [-200  -90e3])))
   1.137 +
   1.138 +;;we're assuming that the SYBR Green 1 doesn't glow at all when there is only
   1.139 +;;single stranded DNA, but it actually does. V_f only ever gets down to ~0.1 V
   1.140 +;;even with everything melted.
   1.141 +
   1.142 +
   1.143 +;;correct for thermal capacatance
   1.144 +
   1.145 +(defn find-max [v]
   1.146 +  (first (reduce (fn [a b] (if (> (last b) (last a)) b a)) (indexed v))))
   1.147 +
   1.148 +(defn find-min [v]
   1.149 +  (first (reduce (fn [a b] (if (< (last b) (last a)) b a)) (indexed v))))
   1.150 +
   1.151 +(defn plot-thermal-delay []
   1.152 +   (let [a (int 2.5e3) b (int 5e3)]
   1.153 +     (dna-plot (assoc A-1
   1.154 +		 :temp (map (vec (:temp A-1)) (range a b))
   1.155 +		 :glow (map (vec (:glow A-1)) (range a b))))))
   1.156 +     
   1.157 +(defvar thermal-delay
   1.158 +     (let [a (int 2.5e3) b (int 5e3)]
   1.159 +       (- 
   1.160 +	(find-min (map (vec (:glow A-1)) (range a b)))
   1.161 +	(find-max (map (vec (:temp A-1)) (range a b)))))
   1.162 +     "There are two sources of thermal delay: thermal capacatance
   1.163 +      of the cuvette holder and thermal capacatance from the time
   1.164 +      it takes DNA to unwind.
   1.165 +
   1.166 +      Use A-1 to find the thermal delay. There was almost no delay
   1.167 +      between heating and cooling for A-1, and thus the extremum of
   1.168 +      temperature and glow should temporally coincide if there were
   1.169 +      no delays. It turns out that there is about a 30 second lag!" )
   1.170 +
   1.171 +(defn thermal-delay-correction*
   1.172 +  "There is a certain delay between the RTD reporting changes in
   1.173 +   temperature and the temperature of the DNA actually changing.
   1.174 +   This shifts the temperature values so that the delay is eliminated."
   1.175 +  [thermal-delay #^dna-data d]
   1.176 +  (assoc d
   1.177 +    :name (str (:name d) " |thermal-correction| ")
   1.178 +    :temp (drop-last thermal-delay (:temp d))
   1.179 +    :glow (drop thermal-delay (:glow d))))
   1.180 +  
   1.181 +(def thermal-delay-correction (partial thermal-delay-correction* thermal-delay))
   1.182 +
   1.183 +;;correct for photo-bleaching
   1.184 +
   1.185 +(defn general-linear [[A B] x] (+ (* A x) B))
   1.186 +
   1.187 +(defvar bleaching-coef nil
   1.188 +   ;; (let [[A B]
   1.189 +   ;; 	 (:coefs				   
   1.190 +   ;; 	  (let [a (int 3e4) b (length (:glow pure-bleaching))]
   1.191 +   ;; 	    (non-linear-model
   1.192 +   ;; 	     general-linear
   1.193 +   ;; 	     (map (vec (:glow pure-bleaching)) (range a b))
   1.194 +   ;; 	     (range a b)
   1.195 +   ;; 	     [1 1])))]
   1.196 +   ;;   A)
   1.197 +  "This function corrects V_f for photo-bleaching over a period of time.
   1.198 +     Photo-bleaching was found to be linear(!) over quite a long period of time.
   1.199 +     Data is taken from sample B-1, which I left in the holder for an hour
   1.200 +     with no heating")
   1.201 +
   1.202 +(defn bleaching-correction*
   1.203 +  [bleaching-coef #^dna-data d]
   1.204 +  ;;the correction is clibrated to apply to the raw data only
   1.205 +  (assert (= (:glow-units d) "Raw Glow (Volts)"))
   1.206 +  ;;we need to modulate this correction factor
   1.207 +
   1.208 +  (let [baseline (first (drop 3e4 (:temp pure-bleaching)))]
   1.209 +    (assoc d
   1.210 +      :name (str (:name d) " |bleaching-correction| ")
   1.211 +      :glow (map (fn [[i n]] (- n (*
   1.212 +				   (/ n baseline)
   1.213 +				   ((partial general-linear [bleaching-coef 0]) i))))
   1.214 +		 (indexed (:glow d))))))
   1.215 +
   1.216 +(def bleaching-correction (partial bleaching-correction* bleaching-coef))
   1.217 +
   1.218 +;; compensate for residual fluorescence when everything is melted
   1.219 +
   1.220 +(defn residual-fluorescence
   1.221 +  "the SYBR1 green dye doesn't completly stop glowing once everything
   1.222 +   is melted."
   1.223 +  [#^dna-data d]
   1.224 +  (let [low (apply min (:glow d))
   1.225 +	high (apply max (:glow d))]
   1.226 +    (assoc d
   1.227 +      :name (str (:name d) " |residual-fluorescence| ")
   1.228 +      :glow (map (fn [f]
   1.229 +		   (- f (* (- 1 (/ f high)) low))) (:glow d)))))
   1.230 +
   1.231 +
   1.232 +
   1.233 +
   1.234 +;; extract the cooling phase
   1.235 +
   1.236 +(defn cooling-phase
   1.237 +  "first stretch where the temperature is going down for a while
   1.238 +   equivalent to where the f curve starts to recover"
   1.239 +  [#^dna-data d]
   1.240 +  (let [t (:temp d)
   1.241 +	f (:glow d)
   1.242 +	dividing-line (reduce (fn [a b] (if (< (a 1) (b 1)) a b)) (indexed f))
   1.243 +	t-new (drop (dividing-line 0) t)
   1.244 +	f-new (drop (dividing-line 0) f)]
   1.245 +    (assoc d
   1.246 +      :name (str (:name d) " |extract cooling| ")
   1.247 +      :temp (vec t-new) :glow (vec f-new))))
   1.248 +
   1.249 +
   1.250 +
   1.251 +;; filter noise
   1.252 +;;(http://clojure-log.n01se.net/date/2009-10-13.html by ambient
   1.253 +;;this is such a cool transform!!!
   1.254 +
   1.255 +
   1.256 +(defn lo-pass
   1.257 +  [d coll]
   1.258 +  (reductions #(+ (* %2 d) (* %1 (- 1.0 d))) coll))
   1.259 +
   1.260 +
   1.261 +;;)
   1.262 +
   1.263 +(def reasonable-low-pass (partial lo-pass 1e-2))
   1.264 +
   1.265 +(defn low-pass-correction
   1.266 +  "filter the raw data through a low pass filter to remove
   1.267 +   electronic noise"
   1.268 +  [#^dna-data d]
   1.269 +  (assert (= (:glow-units d) "Raw Glow (Volts)"))
   1.270 +  (assert (= (:temp-units d) "Raw Temperature (Volts)"))
   1.271 +  (assoc d
   1.272 +    :name (str (:name d) " |low-pass| ")
   1.273 +    :temp (reasonable-low-pass (:temp d))
   1.274 +    :glow (reasonable-low-pass (:glow d))))
   1.275 +  
   1.276 +
   1.277 +
   1.278 +
   1.279 +
   1.280 +;; correct for units
   1.281 +
   1.282 +(defn RTD-voltage->temperature
   1.283 +  "transform RTD voltage to temperature.
   1.284 +  R_RTD = 1000 + 3.85 * (T - 273)
   1.285 +  T = ((R_RTD - 1000) / 3.85 ) + 273
   1.286 +  V_RTD = 15 * R_RTD / (R + R_RTD)
   1.287 +  R_RTD = R / (15/V_RTD - 1)"
   1.288 +  [coll]
   1.289 +  (let [R 14.97e3
   1.290 +	V 15]
   1.291 +    (letfn [(T [R_RTD] (+ 273 (/ (- R_RTD 1000)  3.85))) ;; from lab notrd
   1.292 +	    (R_RTD [V_RTD] (/ R (- (/ V V_RTD) 1)))]
   1.293 +      (map (comp T R_RTD) coll))))
   1.294 +
   1.295 +(defn normalize
   1.296 +  "Transform photodiode current to relative fluorescence."
   1.297 +  [coll]
   1.298 +  (let [low (apply min coll)
   1.299 +	high (apply max coll)]
   1.300 +    (map #(/ (- % low) (- high low)) coll)))
   1.301 +
   1.302 +(defn unit-transform
   1.303 +  "turn raw voltage into temperature and dna-concentration"
   1.304 +  [#^dna-data d]
   1.305 +  (assert (= (:glow-units d) "Raw Glow (Volts)"))
   1.306 +  (assert (= (:temp-units d) "Raw Temperature (Volts)"))
   1.307 +  (assoc d
   1.308 +    :name (str (:name d) " |units| ")
   1.309 +    :temp-units "Temperature (Kelvin)"
   1.310 +    :glow-units "DNA-Fracton"
   1.311 +    :temp (RTD-voltage->temperature (:temp d))
   1.312 +    :glow (normalize (:glow d))))
   1.313 +
   1.314 +
   1.315 +(defn temp-unit-transform
   1.316 +  "turn raw voltage into temperature and dna-concentration"
   1.317 +  [#^dna-data d]
   1.318 +  (assert (= (:temp-units d) "Raw Temperature (Volts)"))
   1.319 +  (assoc d
   1.320 +    :name (str (:name d) " |temp-units| ")
   1.321 +    :temp-units "Temperature (Kelvin)"
   1.322 +    :temp (RTD-voltage->temperature (:temp d))))
   1.323 +
   1.324 +(defn glow-unit-transform
   1.325 +  "turn raw voltage into temperature and dna-concentration"
   1.326 +  [#^dna-data d]
   1.327 +  (assert (= (:glow-units d) "Raw Glow (Volts)"))
   1.328 +  (assoc d
   1.329 +    :name (str (:name d) " |glow-units| ")
   1.330 +    :glow-units "DNA-Fracton"
   1.331 +    :glow (normalize (:glow d))))
   1.332 +
   1.333 +
   1.334 +(defn thermal-efficiency
   1.335 +  [#^dna-data d]
   1.336 +  (assert (= (:temp-units d) "Temperature (Kelvin)"))
   1.337 +  (assert (= (:glow-units d) "Raw Glow (Volts)"))
   1.338 +  (assoc d
   1.339 +    :name (str (:name d) " |thermal-efficiency| ")
   1.340 +    :glow nil))
   1.341 +
   1.342 +
   1.343 +
   1.344 +(defn remove-first-values
   1.345 +  "remove the first minute or so of data so that the initial temperature of
   1.346 +   the cuvette itself doesn't matter"
   1.347 +  [#^dna-data d]
   1.348 +  (assoc d
   1.349 +    :name (str (:name d) " |remove-first| ")
   1.350 +    :temp (drop 600 (:temp d)) 
   1.351 +    :glow (drop 600 (:glow d))))
   1.352 +
   1.353 +(defn diff [coll]  (map - (rest coll) coll ))
   1.354 +(defn temp-transform
   1.355 +  "my assumption will be that at the very end of the temperature data, the
   1.356 +   cuvette is exactly the same temperature as the RTD.  Then, I use newton's
   1.357 +   heat transfer and numerical integration to derive the heat of the cuvette
   1.358 +   for previous times"
   1.359 +  [k-value temperature-data]
   1.360 +  (let [T1 (vec (reverse temperature-data))
   1.361 +    ;; want fast indexed access, and instead of doing the problem in reverse
   1.362 +    ;; just reverse the vector at the start, treat it normally, and
   1.363 +    ;; then reverse at the end
   1.364 +	initial-cuvette-temp (first T1)
   1.365 +	initial-RTD-temp     (first T1)
   1.366 +	step (fn [prev-T2 current-T1]
   1.367 +	  (let [new-T2 (+ prev-T2 (* k-value (- current-T1 prev-T2)))]
   1.368 +	    new-T2))]
   1.369 +    (reverse (reductions step initial-cuvette-temp T1))))
   1.370 +
   1.371 +(def k-value 0.1)
   1.372 +
   1.373 +(defn temp-corr-2 [k-value T1*]
   1.374 +  (let [T1 (reverse T1*)]
   1.375 +    (reverse  (cons (first T1)  (map + (map (partial * k-value) (diff T1)) (rest T1))))))
   1.376 +
   1.377 +(defn temp-corr-3 [initial-cuvette-temp k-value T1]
   1.378 +  (let [step
   1.379 +	(fn [prev-T2 current-T1]
   1.380 +	  (+ prev-T2 (* k-value (- current-T1 prev-T2))))]
   1.381 +    (rest (reductions step initial-cuvette-temp T1))))
   1.382 +
   1.383 +(defn newton-temp-correction* [k-value #^dna-data d]
   1.384 +  (assoc d
   1.385 +    :name (str (:name d) " |newton-temp| ")
   1.386 +    :temp (temp-transform k-value (:temp d))))
   1.387 +(defn newton-temp-correction2* [k-value #^dna-data d]
   1.388 +  (assoc d
   1.389 +    :name (str (:name d) " |newton-temp-reverse| ")
   1.390 +    :temp (temp-corr-2 k-value (:temp d))))
   1.391 +(defn newton-temp-correction3* [init-temp k-value #^dna-data d]
   1.392 +  (assoc d
   1.393 +    :name (str (:name d) " |newton-temp-reverse| ")
   1.394 +    :temp (temp-corr-3 init-temp k-value (:temp d))))
   1.395 +
   1.396 +(defn turn-e [n] (format "%5.2e" n))
   1.397 +
   1.398 +(defn newton-temp-correction4*
   1.399 +  "now with AIR!!!"
   1.400 +  [k-block-sample k-sample-air initial-sample-temp room-temp #^dna-data d]
   1.401 +  (let [old-temp (:temp d)
   1.402 +	step (fn [prev-T2 current-T1]
   1.403 +	       (+ prev-T2
   1.404 +		  (* k-block-sample (- current-T1 prev-T2))
   1.405 +		  (* k-sample-air   (- room-temp  prev-T2))))
   1.406 +	new-temp (rest (reductions step initial-sample-temp old-temp))]
   1.407 +    (assoc d
   1.408 +      :name (str (:name d)
   1.409 +		 " |temp-adjusted {" 
   1.410 +		 "initial-sample:" (turn initial-sample-temp) " "
   1.411 +		 "room-temp:" (turn room-temp) " "
   1.412 +		 "k-block-sample:" (turn-e k-block-sample) " "
   1.413 +		 "k-sample-air:" (turn-e k-sample-air) "}")
   1.414 +      :temp new-temp)))
   1.415 +
   1.416 +(defn plot-adjusted-temp
   1.417 +  "visualize the temperature correction"
   1.418 +  [adjuster #^dna-data d]
   1.419 +  (view
   1.420 +   (add-lines
   1.421 +    (visual (:temp d))
   1.422 +    (vec (range (count (:temp d))))
   1.423 +    (vec (:temp (adjuster d))))))
   1.424 +  
   1.425 +    
   1.426 +
   1.427 +
   1.428 +
   1.429 +;; process data
   1.430 +
   1.431 +(defn process [#^dna-data d]
   1.432 +  (cooling-phase
   1.433 +   (unit-transform
   1.434 +    (residual-fluorescence
   1.435 +     (bleaching-correction
   1.436 +      (low-pass-correction
   1.437 +       ;(thermal-delay-correction
   1.438 +	d))))))
   1.439 +
   1.440 +
   1.441 +(defn first-pass
   1.442 +  "after this, the only thing stopping the melting curve from
   1.443 +   being a straight line is thermal capacatance"
   1.444 +  [#^dna-data data]
   1.445 +  (low-pass-correction
   1.446 +   ;(remove-first-values
   1.447 +    (bleaching-correction
   1.448 +     data)))
   1.449 +
   1.450 +
   1.451 +
   1.452 +(defn explore [k-value]
   1.453 +  (dna-melting (newton-temp-correction* k-value (first-pass A-2))))
   1.454 +
   1.455 +
   1.456 +(defn explore2 [k-value]
   1.457 +  (dna-melting (newton-temp-correction2* k-value (first-pass A-2))))
   1.458 +  
   1.459 +
   1.460 +(defn explore3 [init-temp k-value]
   1.461 +  (dna-melting (newton-temp-correction3* init-temp  k-value (unit-transform (first-pass A-2)))))
   1.462 +  
   1.463 +
   1.464 +
   1.465 +;; get the three characteristic values we want.
   1.466 +
   1.467 +(defn ideal-dna-fraction* [concentration [delta-S delta-H] temperature]
   1.468 +    (let [R 8.31447215
   1.469 +	  t temperature
   1.470 +	  Ct concentration
   1.471 +	  K_eq (exp (- (/ delta-S R) (/ delta-H (* R t))))
   1.472 +	  f (/
   1.473 +	     (+ 1 (* Ct K_eq)
   1.474 +		(- (sqrt (+ 1 (* 2 Ct K_eq)))))
   1.475 +	     (* Ct K_eq))]
   1.476 +  f))
   1.477 +
   1.478 +(def ideal-dna-fraction (partial ideal-dna-fraction* 30e-6))
   1.479 +
   1.480 +(defvar starting-values
   1.481 +  ;;  [-184 -71e3]
   1.482 +  [-904 -32e4]
   1.483 +  "chosen from theoritical estimation") 
   1.484 +
   1.485 +
   1.486 +(defn analyze* [iterations #^dna-data d]
   1.487 +  (let [best-fit-results
   1.488 +	(non-linear-model
   1.489 +	 ideal-dna-fraction
   1.490 +	 (:glow d)
   1.491 +	 (:temp d)
   1.492 +	 starting-values
   1.493 +	:max-iter iterations)]
   1.494 +    best-fit-results))
   1.495 +		    
   1.496 +
   1.497 +(def analyze (partial analyze* 200))
   1.498 +
   1.499 +(defn plot-results [d]
   1.500 +  (let [ans (analyze d)]
   1.501 +    (view (doto (xy-plot (:x ans) (:y ans))
   1.502 +      (set-title (:name d))
   1.503 +      (add-lines (:x ans) (:fitted ans))))
   1.504 +    ans))
   1.505 +
   1.506 +(defn zzz [d [A B]]
   1.507 +  (let [gar (vec (map (partial ideal-dna-fraction [A B]) (:temp d)))]
   1.508 +    (view (doto (xy-plot (:temp d) (:glow d))
   1.509 +      (set-title (:name d))
   1.510 +      (add-lines (:temp d) gar)))))
   1.511 +  
   1.512 +
   1.513 +
   1.514 +
   1.515 +(defn index-filter [pred coll]
   1.516 +  (when pred
   1.517 +    (for [[idx elt] (indexed coll) :when (pred elt)] idx)))
   1.518 +
   1.519 +
   1.520 +
   1.521 +
   1.522 +;; we're going to estimate two correction factors,
   1.523 +;; a constant capacative temperature delay,
   1.524 +;; and a linear decrease in brightness.  We'll do this
   1.525 +;; before unit-transforms.
   1.526 +
   1.527 +(defn zipmap-collect
   1.528 +  "Returns a map with the keys mapped to the corresponding vals.
   1.529 +   adds extra values to a list"
   1.530 +  [keys vals]
   1.531 +    (loop [map {}
   1.532 +           ks (seq keys)
   1.533 +           vs (seq vals)]
   1.534 +      (if (and ks vs)
   1.535 +        (recur (if (contains? map (first ks))
   1.536 +		 (assoc map (first ks) (cons (first vs) (map (first ks))))
   1.537 +		 (assoc map (first ks) (list (first vs))))
   1.538 +               (next ks)
   1.539 +               (next vs))
   1.540 +        map)))
   1.541 +
   1.542 +
   1.543 +(defn little-process [thermal-cap bleaching #^dna-data d]
   1.544 +  ((partial thermal-delay-correction thermal-cap)
   1.545 +   ((partial bleaching-correction bleaching) d)))
   1.546 +
   1.547 +(def little-process (memoize little-process))
   1.548 + 
   1.549 +;(defn lookup-map [#^dna-data d]
   1.550 +  
   1.551 +
   1.552 +(defn converge-function
   1.553 +  "this is the vertical discrepancy after applying the two transforms"
   1.554 +  [#^dna-data d [thermal-cap bleaching] x]
   1.555 +  (let [new-d (little-process thermal-cap bleaching d)
   1.556 +	spreads
   1.557 +	(map (vec (:glow new-d))
   1.558 +	     (index-filter (partial = x) (vec (:temp new-d))))
   1.559 +	spread (- (apply max spreads) (apply min spreads))]
   1.560 +    spread))
   1.561 +
   1.562 +
   1.563 +
   1.564 +
   1.565 +
   1.566 +
   1.567 +
   1.568 +
   1.569 +
   1.570 +
   1.571 +(defn  blah  [k] (comp 
   1.572 +	   ( partial newton-temp-correction3* 280 k ) temp-unit-transform first-pass))
   1.573 +
   1.574 +;(def ggg (comp 
   1.575 +;	  dna-melting ( partial thermal-delay-correction* 120 ) (blah 0.0018)))
   1.576 +
   1.577 +
   1.578 +
   1.579 +
   1.580 +
   1.581 +;; Ensure that the melting function is uniquely valued by
   1.582 +;;   combining samples with identical temperature values.
   1.583 +(defn cooling-function
   1.584 +  "just put it in a map for this!"
   1.585 +  [source]
   1.586 +  (let [data (cooling-phase source)
   1.587 +	lookup (apply sorted-map
   1.588 +		    (interleave
   1.589 +		     (:temperature data) (:dna-fraction data)))]
   1.590 +    {:temperature (vec (keys lookup))
   1.591 +     :dna-fraction (vec (map lookup (keys lookup)))}))
   1.592 +
   1.593 +
   1.594 +(defn turn [n]
   1.595 +  (format "%5.2f" n))
   1.596 +
   1.597 +(defn bleach-correction-2 [bleaching-constant #^dna-data d] 
   1.598 +  (assert (= (:glow-units d) "Raw Glow (Volts)"))
   1.599 +  (let [old-glow (vec (:glow d))
   1.600 +	integral-old-glow (vec (reductions + old-glow))
   1.601 +	correction (fn [t] (+
   1.602 +			    (old-glow t)
   1.603 +			    (* bleaching-constant (integral-old-glow t))))
   1.604 +	new-glow (map + (map correction (range 0 (count old-glow))) old-glow)]
   1.605 +    ;;glow is related to the number of particles,
   1.606 +    ;;and the number destroyed is some fraction of
   1.607 +    ;;the number present and glowing.
   1.608 +    (assoc d
   1.609 +      :name (str (:name d) " |bleaching-corrected constant = " (turn bleaching-constant) "| ")
   1.610 +      :glow new-glow)))
   1.611 +    
   1.612 +    
   1.613 +
   1.614 +(defn ultimate-correction
   1.615 +  ([k-block-sample k-sample-air initial-sample-temp room-temp bleach-constant
   1.616 +    #^dna-data d
   1.617 +    ]
   1.618 +     ((comp
   1.619 +      (partial newton-temp-correction4*
   1.620 +	       k-block-sample k-sample-air initial-sample-temp room-temp)
   1.621 +
   1.622 +      temp-unit-transform
   1.623 +      (partial bleach-correction-2 bleach-constant)
   1.624 +      low-pass-correction) d))
   1.625 +
   1.626 +  ([{:keys [test-k-sample-air
   1.627 +	    test-k-block-sample
   1.628 +	    test-sample-temp
   1.629 +	    test-room-temp
   1.630 +	    test-bleach]}
   1.631 +		#^dna-data d]
   1.632 +  (ultimate-correction
   1.633 +    (mean test-k-block-sample) (mean test-k-sample-air)
   1.634 +    (mean test-sample-temp) (mean test-room-temp) (mean test-bleach) d)))
   1.635 +
   1.636 +
   1.637 +;;((uui 0.0020 0.0001 280 293) (bleach-correction-2 1.1e-5 ?-3))
   1.638 +
   1.639 +
   1.640 +;;; this is for ?-3 after 35 iterations
   1.641 +(def opt {:test-k-sample-air '(9.375630358472117E-4 9.375630359053611E-4), :test-k-block-sample '(0.0010343754438043107 0.0010343754455447197), :test-room-temp '(294.9804782861611 294.98047828674316), :test-sample-temp '(293.36120605294127 293.3612060546875), :test-bleach '(1.771081481128931E-5 1.7710814863094127E-5), :align 1.5796052678842228E-4})
   1.642 +
   1.643 +
   1.644 +;;; C-1 after 20 iterations
   1.645 +(def opt2
   1.646 +     {:test-k-sample-air '(9.936472587585448E-4 9.936491641998292E-4), :test-k-block-sample '(2.9390106201171873E-4 2.939580917358398E-4), :test-room-temp '(294.76531982421875 294.7653388977051), :test-sample-temp '(294.765625 294.765682220459), :test-bleach '(2.39045524597168E-5 2.3906250000000002E-5), :align 1.7562642331021098E-4})
   1.647 +
   1.648 +
   1.649 +;;(time (def result-?-3 (doall (take 10 (iterate (partial improve ?-3) start-values-overestimates)))))
   1.650 +
   1.651 +
   1.652 +;;(doall (map (comp dna-melting #(ultimate-correction % ?-3)) result-?-3))
   1.653 +
   1.654 +
   1.655 +(defn subsample
   1.656 +  "subsample v every n points"
   1.657 +  [n v]
   1.658 +
   1.659 +    (vec (map (vec v) (vec (range 0 (count (vec v)) n)))))
   1.660 +
   1.661 +
   1.662 +(defn max-difference [v] (- (apply max v) (apply min v)))
   1.663 +   
   1.664 +(defn alignment [#^dna-data d]
   1.665 +  (let [distribution (vec (vals
   1.666 +			  (apply sorted-map
   1.667 +				 (interleave (:temp d) (:glow d)))))]
   1.668 +    (mean (map variance (partition 10 distribution)))))
   1.669 +
   1.670 +(defn garigh [d]
   1.671 +  (let [distribution (vec (vals
   1.672 +			   (apply sorted-map
   1.673 +				  (interleave (:temp d) (:glow d)))))]
   1.674 +    (view (visual (vec  (map max-difference (partition 100 distribution)))))))
   1.675 +
   1.676 +
   1.677 +(defn alignment-4 [#^dna-data d]
   1.678 +  (let [distribution (vec (vals
   1.679 +			  (apply sorted-map
   1.680 +				 (interleave (:temp d) (:glow d)))))]
   1.681 +    (reduce + (map sq (map max-difference (partition 100 distribution))))))
   1.682 +
   1.683 +
   1.684 +(defn alignment-2 [#^dna-data d]
   1.685 +  (let [distribution
   1.686 +	(vec
   1.687 +	 (vals
   1.688 +	  (apply sorted-map
   1.689 +		 (interleave (subsample 2 (:temp d)) (subsample 2(:glow d))))))]
   1.690 +    (reduce +     (map max-difference (partition 30 distribution)))))
   1.691 +(defn alignment-3 [#^dna-data d]
   1.692 +  (let [distribution
   1.693 +	(vec
   1.694 +	 (vals
   1.695 +	  (apply sorted-map
   1.696 +		 (interleave (subsample 2 (:temp d)) (subsample 2(:glow d))))))]
   1.697 +    (reduce max     (map max-difference (partition 30 distribution)))))
   1.698 +
   1.699 +
   1.700 +(comment
   1.701 +  ;; this takes a while
   1.702 +  (def opts (zipmap all-samples (map #(last (take 20 (iterate (partial improve %) start-values))) all-samples))))
   1.703 +
   1.704 +;(def opts (zipmap all-samples opts-vals))
   1.705 +
   1.706 +(defn mean-map [v s]
   1.707 +  (map mean (map (partial vector v) s)))
   1.708 +
   1.709 +  
   1.710 +(def start-values
   1.711 +     {:test-k-sample-air [0.000001 0.001]
   1.712 +      :test-k-block-sample   [0.001 0.03]
   1.713 +      :test-room-temp  [290 300]
   1.714 +      :test-sample-temp  [285 310]
   1.715 +      :test-bleach [0.00001 0.0009]})
   1.716 +
   1.717 +(def start-values-2 
   1.718 +     {:test-k-sample-air [1e-15 0.0005]
   1.719 +      :test-k-block-sample   [0.001 0.002]
   1.720 +      :test-room-temp  [290 300]
   1.721 +      :test-sample-temp  [290 300]
   1.722 +      :test-bleach [1e-7 0.0005]})
   1.723 +
   1.724 +    
   1.725 +
   1.726 +(def start-values-3
   1.727 +     {:test-k-block-sample   [0.0005  0.00125 0.002]
   1.728 +      :test-k-sample-air [0.0000001 0.000001 0.0000019]
   1.729 +      :test-sample-temp  [280 300]
   1.730 +      :test-room-temp  [300 306]
   1.731 +      :test-bleach [0.000001 0.00001]})
   1.732 +         
   1.733 +(defn condense [[a b :as v]]
   1.734 +  (if (< (/ (abs (- (apply max v) (apply min v))) (apply max v)) 1e-2) (vector (mean v)) v))
   1.735 +
   1.736 +(defn mass-shift
   1.737 +  [value v]
   1.738 +  (let [frac 2/3]
   1.739 +    (cond (>= (count v) 2)
   1.740 +	  (let [[a b] v]
   1.741 +	    (condense (vec (map #(float (+ (* frac %) (* (- 1 frac) value))) v))))
   1.742 +	  (= (count v) 1)
   1.743 +	  v)))
   1.744 +
   1.745 +
   1.746 +(defn improve* [#^dna-data d
   1.747 +	       {:keys [test-k-sample-air
   1.748 +		       test-k-block-sample
   1.749 +		       test-room-temp
   1.750 +		       test-sample-temp
   1.751 +		       test-bleach] :as domain}]
   1.752 +  (do (print \newline)
   1.753 +      (reduce (fn [m1 m2] (if (< (:align m1) (:align m2)) m1 m2))
   1.754 +
   1.755 +	      
   1.756 +	      (for [k-sample-air (condense test-k-sample-air)
   1.757 +		    k-block-sample (condense test-k-block-sample)
   1.758 +		    room-temp (condense test-room-temp)
   1.759 +		    initial-sample-temp (condense test-sample-temp)
   1.760 +		    bleach-constant (condense test-bleach)]
   1.761 +		
   1.762 +		(do
   1.763 +		  (print ".")
   1.764 +		  (let [align
   1.765 +			(alignment-2
   1.766 +			 (ultimate-correction
   1.767 +			  k-block-sample k-sample-air initial-sample-temp room-temp
   1.768 +			  bleach-constant d))
   1.769 +			align (if (Double/isNaN align) Double/POSITIVE_INFINITY  align)]
   1.770 +		    {:test-k-sample-air (mass-shift k-sample-air test-k-sample-air)
   1.771 +		     :test-k-block-sample(mass-shift k-block-sample test-k-block-sample)
   1.772 +		     :test-room-temp (mass-shift room-temp test-room-temp)
   1.773 +		     :test-sample-temp (mass-shift initial-sample-temp test-sample-temp)
   1.774 +		     :test-bleach (mass-shift bleach-constant test-bleach)
   1.775 +		     :align align}))))))
   1.776 +
   1.777 +
   1.778 +
   1.779 +(def println-repl (bound-fn [& args] (apply println args)))
   1.780 +(def print-repl (bound-fn [& args] (apply print args)))
   1.781 +
   1.782 +
   1.783 +
   1.784 +(defn improve [#^dna-data d
   1.785 +	       {:keys [test-k-block-sample
   1.786 +		       test-k-sample-air
   1.787 +		       test-sample-temp
   1.788 +		       test-room-temp
   1.789 +		       test-bleach] :as domain}]
   1.790 +       (reduce
   1.791 +       (fn [m1 m2] (if (< (:align m1) (:align m2)) m1 m2))
   1.792 +       (let [return-hash
   1.793 +	     (fn
   1.794 +	       [[k-block-sample k-sample-air initial-sample-temp
   1.795 +		room-temp bleach-constant]]
   1.796 +	       (do
   1.797 +		 (print-repl ".")
   1.798 +		 (let [align
   1.799 +		       (alignment-4
   1.800 +			(ultimate-correction
   1.801 +			 k-block-sample k-sample-air initial-sample-temp room-temp
   1.802 +			 bleach-constant d))
   1.803 +		       align (if (Double/isNaN align) Double/POSITIVE_INFINITY  align)]
   1.804 +		   {:test-k-sample-air (mass-shift k-sample-air test-k-sample-air)
   1.805 +		    :test-k-block-sample(mass-shift k-block-sample test-k-block-sample)
   1.806 +		    :test-room-temp (mass-shift room-temp test-room-temp)
   1.807 +		    :test-sample-temp (mass-shift initial-sample-temp test-sample-temp)
   1.808 +		    :test-bleach (mass-shift bleach-constant test-bleach)
   1.809 +		    :align align})))]
   1.810 +	 (pmap return-hash 
   1.811 +	       (cartesian-product
   1.812 +		 test-k-block-sample
   1.813 +		 test-k-sample-air
   1.814 +		 test-sample-temp
   1.815 +		 test-room-temp
   1.816 +		 test-bleach)))))
   1.817 +
   1.818 +(def improve (memoize improve))
   1.819 +
   1.820 +(defn iterate-test [n #^dna-data d]
   1.821 +  (last (take n (iterate (partial improve d) start-values-3))))
   1.822 +
   1.823 +
   1.824 +(defn nested-vals [key coll]
   1.825 +  (for [x (tree-seq coll? seq coll) :when (contains? x key)]
   1.826 +       (get x key)))
   1.827 +
   1.828 +
   1.829 +(defn quantum-efficiency* [q-constant #^dna-data d]
   1.830 +    ;; 0.8 at room temperature
   1.831 +    ;; quantum efficiency is somehow
   1.832 +    ;; inversely proportional to temperature
   1.833 +  (let [correction
   1.834 +
   1.835 +
   1.836 +
   1.837 +	(fn [t g] (/ g   (* 0.8 (exp (* q-constant (- 292 t))))))]
   1.838 +      (assoc d
   1.839 +	:name (str (:name d) " |quantum-correction| ")
   1.840 +	:glow (map correction (:temp d) (:glow d)))))
   1.841 +
   1.842 +(def quantum-efficiency (partial quantum-efficiency* 1.2e-2))
   1.843 +
   1.844 +(defn exp-decay [[A] x]
   1.845 +  (* 0.8 (exp (* A (- 298 x)))))
   1.846 +
   1.847 +;(defn final-join [#^dna-data d]
   1.848 +  
   1.849 +  
   1.850 +
   1.851 +(def join-opts
   1.852 +     {
   1.853 +      A-1 {:test-k-sample-air [1.760184242508937E-6], :test-k-block-sample [0.0012930832259977858], :test-room-temp [304.41977945963544], :test-sample-temp [281.31689453125], :test-bleach [9.956068424799014E-6], :align 0.800815268639094}
   1.854 +      A-2 {:test-k-sample-air [3.017639376897326E-7], :test-k-block-sample [0.0011550507430608075], :test-room-temp [304.6666717529297], :test-sample-temp [282.6337890625], :test-bleach [9.5610075732111E-6], :align 0.8530570499185829}
   1.855 +      A-3 {:test-k-sample-air [2.6829059556281816E-6], :test-k-block-sample [0.002413294818252325], :test-room-temp [304.6666717529297], :test-sample-temp [281.31689453125], :test-bleach [9.965317076421343E-6]}
   1.856 +
   1.857 +      B-2 {:test-k-sample-air [1.6274943088016396E-6], :test-k-block-sample [0.0013714926317334175], :test-room-temp [304.6666717529297], :test-sample-temp [281.31689453125], :test-bleach [9.965317076421343E-6], :align 0.7631166194402621}
   1.858 +      B-3
   1.859 +      {:test-k-sample-air [1.455345833771086E-6], :test-k-block-sample [0.0014841814991086721], :test-room-temp [304.6666717529297], :test-sample-temp [287.9835662841797], :test-bleach [9.965317076421343E-6], :align 0.5358800697386289}
   1.860 +
   1.861 +      C-1
   1.862 +      {:test-k-sample-air [1.6907095717518434E-6], :test-k-block-sample [0.001278722076676786], :test-room-temp [304.6666717529297], :test-sample-temp [281.31689453125], :test-bleach [9.965317076421343E-6], :align 0.7249279378017799}
   1.863 +      C-2
   1.864 +      {:test-k-sample-air [1.77524979487013E-6], :test-k-block-sample [0.001278722076676786], :test-room-temp [304.6666717529297], :test-sample-temp [281.31689453125], :test-bleach [9.965317076421343E-6], :align 2.0474987460209793}
   1.865 +      C-3
   1.866 +      {:test-k-sample-air [1.010541116860016E-6], :test-k-block-sample [0.001601272417853276], :test-room-temp [304.6666717529297], :test-sample-temp [281.31689453125], :test-bleach [9.965317076421343E-6], :align 0.7191738524207253}
   1.867 +
   1.868 +      ?-1
   1.869 +      {:test-k-sample-air [1.5533593114014366E-6], :test-k-block-sample [0.0013761443551629782], :test-room-temp [304.6666717529297], :test-sample-temp [281.31689453125], :test-bleach [9.965317076421343E-6], :align 1.3838506291264723}
   1.870 +      ?-3
   1.871 +      {:test-k-sample-air [2.940294715851148E-7], :test-k-block-sample [0.0018434864080821474], :test-room-temp [304.6666717529297], :test-sample-temp [281.31689453125], :test-bleach [9.965317076421343E-6], :align 0.48549623932134006}
   1.872 +
   1.873 +      })
   1.874 +
   1.875 +
   1.876 +(defn final-join [#^dna-data d]
   1.877 +  
   1.878 +  (let [
   1.879 +	m (apply sorted-map (interleave (:temp d) (:glow d)))
   1.880 +	distribution (vec (vals m))
   1.881 +	ordinates (vec (keys m))
   1.882 +			   ]
   1.883 +    (assoc d
   1.884 +      :name (str (:name d) " |joined| ")
   1.885 +      :temp (map (fn [region] (mean [(apply min region) (apply max region)]))
   1.886 +		 (partition 30 ordinates))
   1.887 +
   1.888 +      :glow (map (fn [region] (mean [(apply min region) (apply max region)]))
   1.889 +		 (partition 30 distribution)))))
   1.890 +
   1.891 +
   1.892 +(defn melting-point [#^dna-data d]
   1.893 +  (let [t (drop 150 (:temp d))
   1.894 +	g (drop 150 (:glow d))]
   1.895 +    (nth t (find-min (map / (diff g) (diff t) )))))
   1.896 +
   1.897 +
   1.898 +(def target-dir (file-str "/home/r/MIT/6/6.122/DNA/output"))
   1.899 +
   1.900 +(defn x-form-dna [#^dna-data d]
   1.901 +  (final-join (ultimate-correction (join-opts d) d)))
   1.902 +
   1.903 +(defn save-raw-dna-curve [#^dna-data d]
   1.904 +  (save (dna-melting d) (str target-dir File/separator (:name d) ".png")))
   1.905 +
   1.906 +(defn save-dna-curve-with-temp [#^dna-data d*]
   1.907 +  (let [
   1.908 +
   1.909 +	melt (melting-point d*)
   1.910 +	d (assoc d* :name (str (:name d*) " |M.P. = " (turn melt) " |"))
   1.911 +	]
   1.912 +    (save (add-lines (dna-melting d) (repeat (count (:glow d)) melt) (:glow d))
   1.913 +	  (str target-dir File/separator (:name d) ".png"))))
   1.914 +
   1.915 +  
   1.916 +  
   1.917 +(defn process-all []
   1.918 +  (doall (pmap  save-dna-curve all-samples))
   1.919 +  (doall (pmap (comp save-dna-curve-with-temp x-form-dna) all-samples)))
   1.920 +
   1.921 +
   1.922 +(def ?-temps (vec (map (comp melting-point x-form-dna) [?-1 ?-3])))
   1.923 +(def A-temps (vec (map (comp melting-point x-form-dna) [A-1 A-2 A-3])))
   1.924 +(def B-temps (vec (map (comp melting-point x-form-dna) [B-2 B-3])))
   1.925 +(def C-temps (vec (map (comp melting-point x-form-dna) [C-1 C-2 C-3])))
   1.926 +
   1.927 +(defn report [temps]
   1.928 +  
   1.929 +  (sorted-map :data temps :std (sqrt (variance temps)) :mean (mean temps)))
   1.930 +
   1.931 +
   1.932 +;;; now it's time to start data reporting
   1.933 +
   1.934 +;(defn process [#^dna-data d]
   1.935 +;  (ultimate-correction (opts d) d)
   1.936 +
   1.937 +
   1.938 +
   1.939 +(report A-temps)
   1.940 +{:data [348.8771481779248 349.3989434763391 348.76079920968573],
   1.941 + :mean 349.0122969546499,
   1.942 + :std 0.33986161910548834}
   1.943 +
   1.944 +(report B-temps)
   1.945 +{:data [349.0655539116902 349.92155530431506],
   1.946 + :mean 349.4935546080026,
   1.947 + :std 0.6052843894485812}
   1.948 +
   1.949 +(report C-temps)
   1.950 +{:data [349.92341967511976 349.68832687337056 348.2872189461977],
   1.951 + :mean 349.299655164896,
   1.952 + :std 0.884639745344559}
   1.953 +
   1.954 +(report ?-temps)
   1.955 +{:data [350.4525305192239 349.87377093318975],
   1.956 + :mean 350.1631507262068,
   1.957 + :std 0.4092448279511269}
   1.958 +
   1.959 +