changeset 5:fca75c0e8f40

added stories.clj
author Robert McIntyre <rlm@mit.edu>
date Thu, 01 Mar 2012 05:47:37 -0700
parents 12d1367cf1aa
children b8bbb0dbda7b
files src/fanfiction/stories.clj
diffstat 1 files changed, 196 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/fanfiction/stories.clj	Thu Mar 01 05:47:37 2012 -0700
     1.3 @@ -0,0 +1,196 @@
     1.4 +(ns fanfiction.stories)
     1.5 +
     1.6 +(use 'clojure.java.io)
     1.7 +
     1.8 +(import org.htmlcleaner.HtmlCleaner)
     1.9 +(import org.htmlcleaner.TagNode)
    1.10 +(import java.io.File)
    1.11 +(import java.net.URL)
    1.12 +(use 'clojure.contrib.def)
    1.13 +
    1.14 +(def mlp
    1.15 +  (URL. "http://www.fanfiction.net/cartoon/My_Little_Pony"))
    1.16 +;; It Takes a Village 
    1.17 +;;  http://www.fanfiction.net/s/7490980/1/It_Takes_a_Village 
    1.18 +;;    Reviews:  1058 
    1.19 +;;    Chapters: 17
    1.20 +;; Progress 
    1.21 +;;  http://www.fanfiction.net/s/6982210/1/Progress 
    1.22 +;;    Reviews:  612 
    1.23 +;;    Chapters: 27
    1.24 +;; On a Cross and Arrow 
    1.25 +;;  http://www.fanfiction.net/s/7211729/1/On_a_Cross_and_Arrow 
    1.26 +;;    Reviews:  324 
    1.27 +;;    Chapters: 12
    1.28 +;; Growing Pains 
    1.29 +;;  http://www.fanfiction.net/s/7252679/1/Growing_Pains 
    1.30 +;;    Reviews:  223 
    1.31 +;;    Chapters: 23
    1.32 +;; My Little Pony: FiM The Romancing Quest 
    1.33 +;;  http://www.fanfiction.net/s/7171004/1/My_Little_Pony_FiM_The_Romancing_Quest 
    1.34 +;;    Reviews:  174 
    1.35 +;;    Chapters: 13
    1.36 +;; Don't Let the Sun Catch You Crying 
    1.37 +;;  http://www.fanfiction.net/s/7032385/1/Dont_Let_the_Sun_Catch_You_Crying 
    1.38 +;;    Reviews:  165 
    1.39 +;;    Chapters: 6
    1.40 +
    1.41 +
    1.42 +(def chobits (URL. "http://www.fanfiction.net/anime/Chobits/"))
    1.43 +;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!!
    1.44 +
    1.45 +(def ccs (URL. "http://www.fanfiction.net/anime/Card_Captor_Sakura/"))
    1.46 +;; Jagged Amber 
    1.47 +;;  http://www.fanfiction.net/s/1348047/1/Jagged_Amber 
    1.48 +;;    Reviews:  4767 
    1.49 +;;    Chapters: 25
    1.50 +;; Misty Dreams 
    1.51 +;;  http://www.fanfiction.net/s/1250008/1/Misty_Dreams 
    1.52 +;;    Reviews:  4460 
    1.53 +;;    Chapters: 27
    1.54 +;; Butterflies 
    1.55 +;;  http://www.fanfiction.net/s/1667519/1/Butterflies 
    1.56 +;;    Reviews:  3029 
    1.57 +;;    Chapters: 36
    1.58 +;; Mischievous Love 
    1.59 +;;  http://www.fanfiction.net/s/1020198/1/Mischievous_Love 
    1.60 +;;    Reviews:  3013 
    1.61 +;;    Chapters: 25
    1.62 +;; Black Wings 
    1.63 +;;  http://www.fanfiction.net/s/2737015/1/Black_Wings 
    1.64 +;;    Reviews:  2822 
    1.65 +;;    Chapters: 23
    1.66 +;; Cherry, My Love 
    1.67 +;;  http://www.fanfiction.net/s/968234/1/Cherry_My_Love 
    1.68 +;;    Reviews:  2612 
    1.69 +;;    Chapters: 26
    1.70 +;; Deeper 
    1.71 +;;  http://www.fanfiction.net/s/4027405/1/Deeper 
    1.72 +;;    Reviews:  2260 
    1.73 +;;    Chapters: 26
    1.74 +;; Through A Looking Glass 
    1.75 +;;  http://www.fanfiction.net/s/2729427/1/Through_A_Looking_Glass 
    1.76 +;;    Reviews:  2009 
    1.77 +;;    Chapters: 9
    1.78 +;; Butterflies: In Spring 
    1.79 +;;  http://www.fanfiction.net/s/2284162/1/Butterflies_In_Spring 
    1.80 +;;    Reviews:  1815 
    1.81 +;;    Chapters: 17
    1.82 +;; The New Trials of Card Captor Sakura and Friends 
    1.83 +;;  http://www.fanfiction.net/s/197453/1/The_New_Trials_of_Card_Captor_Sakura_and_Friends 
    1.84 +;;    Reviews:  1794 
    1.85 +;;    Chapters: 111
    1.86 +;; Ice Queen 
    1.87 +;;  http://www.fanfiction.net/s/1927065/1/Ice_Queen 
    1.88 +;;    Reviews:  1725 
    1.89 +;;    Chapters: 38
    1.90 +;; Crystal Tears 
    1.91 +;;  http://www.fanfiction.net/s/751067/1/Crystal_Tears 
    1.92 +;;    Reviews:  1623 
    1.93 +;;    Chapters: 26
    1.94 +
    1.95 +
    1.96 +(def bible (URL. "http://www.fanfiction.net/book/Bible/"))
    1.97 +;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!!
    1.98 +
    1.99 +(def dark (URL. "http://www.fanfiction.net/book/His_Dark_Materials/"))
   1.100 +;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!!
   1.101 +
   1.102 +(def junjo (URL. "http://www.fanfiction.net/anime/Junjo_Romantica/"))
   1.103 +;;The Talk 
   1.104 +;; http://www.fanfiction.net/s/4351277/1/The_Talk 
   1.105 +;;   Reviews:  250 
   1.106 +;;   Chapters: 1
   1.107 +
   1.108 +(def rye (URL. "http://www.fanfiction.net/book/Catcher_in_the_Rye/"))
   1.109 +;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!!
   1.110 +
   1.111 +
   1.112 +(defn tags-by-name
   1.113 +  [#^TagNode node #^String element]
   1.114 +  (seq (.getElementListByName node element true)))
   1.115 +
   1.116 +(defn-memo parse
   1.117 +  "parse a web page using HtmlCleaner"
   1.118 +  [#^URL url]
   1.119 +  (println "parsing" (.getPath url))
   1.120 +  (if (not (nil? url))
   1.121 +    (.clean (HtmlCleaner.) (input-stream url))))
   1.122 +
   1.123 +(defn attributes
   1.124 +  "get a hash map of the attributes of an element"
   1.125 +  [#^TagNode node]
   1.126 +  (into {} (.getAttributes node)))
   1.127 +
   1.128 +(def fanfiction-base "http://www.fanfiction.net")
   1.129 +
   1.130 +(defn next-fanfiction-url
   1.131 +  "get the next url to visit from the current page we are visiting"
   1.132 +  [#^URL url]
   1.133 +  (let
   1.134 +      [;; extract all links 
   1.135 +       links (tags-by-name (parse url) "a")
   1.136 +       ;; extract the "Next" link
   1.137 +       next-node (first (filter  #(re-matches #".*Next.*" (.getText %)) links))]
   1.138 +    (if (not (nil? next-node))
   1.139 +      
   1.140 +      (URL. (str fanfiction-base
   1.141 +                 ((attributes next-node) "href"))))))
   1.142 +
   1.143 +(defn get-stories [#^URL url]
   1.144 +  (filter
   1.145 +   #(= "z-list" ((attributes %) "class"))
   1.146 +   (tags-by-name (parse url) "div")))
   1.147 +
   1.148 +
   1.149 +(defn story-url [#^TagNode node]
   1.150 +  (str fanfiction-base
   1.151 +       ((attributes
   1.152 +         (first (tags-by-name node "a"))) "href")))
   1.153 +
   1.154 +(defn story-desc [#^TagNode node]
   1.155 +  (.getText (second (tags-by-name node "div"))))
   1.156 +
   1.157 +(defn story-reviews [#^TagNode node]
   1.158 +  (Integer/parseInt
   1.159 +   (get
   1.160 +    (re-matches #".*Reviews: (\d+).*"
   1.161 +                (story-desc node)) 1 "0")))
   1.162 +
   1.163 +(defn story-chapters [#^TagNode node]
   1.164 +  (Integer/parseInt
   1.165 +   (get
   1.166 +    (re-matches #".*Chapters: (\d+).*"
   1.167 +                (story-desc node)) 1 "1")))
   1.168 +
   1.169 +(defn story-title [#^TagNode node]
   1.170 +  (str (.getText (first (tags-by-name node "a")))))
   1.171 +
   1.172 +(defn all-stories [n #^URL start-url]
   1.173 +  (loop [stories []
   1.174 +         index 0
   1.175 +         current-url start-url]
   1.176 +    (let [stories* (concat stories (get-stories current-url))
   1.177 +          next (next-fanfiction-url current-url)]
   1.178 +      (if (and (not= index n)
   1.179 +               (not (nil? next)))
   1.180 +        (do
   1.181 +          (recur stories* (inc index) next))
   1.182 +        stories*))))
   1.183 +
   1.184 +(defn best-stories [n search-limit #^URL start-url]
   1.185 +  (dorun
   1.186 +   (map
   1.187 +    #(println (story-title %) "\n"
   1.188 +              (story-url %) "\n"
   1.189 +              "  Reviews: " (story-reviews %) "\n"
   1.190 +              "  Chapters:" (story-chapters %))
   1.191 +    
   1.192 +    (take n (reverse
   1.193 +             (sort-by
   1.194 +              #(/ (story-reviews %) (inc (story-chapters %)))
   1.195 +              ;;story-reviews
   1.196 +                      (all-stories search-limit start-url)))))))
   1.197 +           
   1.198 +  
   1.199 +  
   1.200 \ No newline at end of file