annotate src/fanfiction/stories.clj @ 5:fca75c0e8f40

added stories.clj
author Robert McIntyre <rlm@mit.edu>
date Thu, 01 Mar 2012 05:47:37 -0700
parents
children
rev   line source
rlm@5 1 (ns fanfiction.stories)
rlm@5 2
rlm@5 3 (use 'clojure.java.io)
rlm@5 4
rlm@5 5 (import org.htmlcleaner.HtmlCleaner)
rlm@5 6 (import org.htmlcleaner.TagNode)
rlm@5 7 (import java.io.File)
rlm@5 8 (import java.net.URL)
rlm@5 9 (use 'clojure.contrib.def)
rlm@5 10
rlm@5 11 (def mlp
rlm@5 12 (URL. "http://www.fanfiction.net/cartoon/My_Little_Pony"))
rlm@5 13 ;; It Takes a Village
rlm@5 14 ;; http://www.fanfiction.net/s/7490980/1/It_Takes_a_Village
rlm@5 15 ;; Reviews: 1058
rlm@5 16 ;; Chapters: 17
rlm@5 17 ;; Progress
rlm@5 18 ;; http://www.fanfiction.net/s/6982210/1/Progress
rlm@5 19 ;; Reviews: 612
rlm@5 20 ;; Chapters: 27
rlm@5 21 ;; On a Cross and Arrow
rlm@5 22 ;; http://www.fanfiction.net/s/7211729/1/On_a_Cross_and_Arrow
rlm@5 23 ;; Reviews: 324
rlm@5 24 ;; Chapters: 12
rlm@5 25 ;; Growing Pains
rlm@5 26 ;; http://www.fanfiction.net/s/7252679/1/Growing_Pains
rlm@5 27 ;; Reviews: 223
rlm@5 28 ;; Chapters: 23
rlm@5 29 ;; My Little Pony: FiM The Romancing Quest
rlm@5 30 ;; http://www.fanfiction.net/s/7171004/1/My_Little_Pony_FiM_The_Romancing_Quest
rlm@5 31 ;; Reviews: 174
rlm@5 32 ;; Chapters: 13
rlm@5 33 ;; Don't Let the Sun Catch You Crying
rlm@5 34 ;; http://www.fanfiction.net/s/7032385/1/Dont_Let_the_Sun_Catch_You_Crying
rlm@5 35 ;; Reviews: 165
rlm@5 36 ;; Chapters: 6
rlm@5 37
rlm@5 38
rlm@5 39 (def chobits (URL. "http://www.fanfiction.net/anime/Chobits/"))
rlm@5 40 ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!!
rlm@5 41
rlm@5 42 (def ccs (URL. "http://www.fanfiction.net/anime/Card_Captor_Sakura/"))
rlm@5 43 ;; Jagged Amber
rlm@5 44 ;; http://www.fanfiction.net/s/1348047/1/Jagged_Amber
rlm@5 45 ;; Reviews: 4767
rlm@5 46 ;; Chapters: 25
rlm@5 47 ;; Misty Dreams
rlm@5 48 ;; http://www.fanfiction.net/s/1250008/1/Misty_Dreams
rlm@5 49 ;; Reviews: 4460
rlm@5 50 ;; Chapters: 27
rlm@5 51 ;; Butterflies
rlm@5 52 ;; http://www.fanfiction.net/s/1667519/1/Butterflies
rlm@5 53 ;; Reviews: 3029
rlm@5 54 ;; Chapters: 36
rlm@5 55 ;; Mischievous Love
rlm@5 56 ;; http://www.fanfiction.net/s/1020198/1/Mischievous_Love
rlm@5 57 ;; Reviews: 3013
rlm@5 58 ;; Chapters: 25
rlm@5 59 ;; Black Wings
rlm@5 60 ;; http://www.fanfiction.net/s/2737015/1/Black_Wings
rlm@5 61 ;; Reviews: 2822
rlm@5 62 ;; Chapters: 23
rlm@5 63 ;; Cherry, My Love
rlm@5 64 ;; http://www.fanfiction.net/s/968234/1/Cherry_My_Love
rlm@5 65 ;; Reviews: 2612
rlm@5 66 ;; Chapters: 26
rlm@5 67 ;; Deeper
rlm@5 68 ;; http://www.fanfiction.net/s/4027405/1/Deeper
rlm@5 69 ;; Reviews: 2260
rlm@5 70 ;; Chapters: 26
rlm@5 71 ;; Through A Looking Glass
rlm@5 72 ;; http://www.fanfiction.net/s/2729427/1/Through_A_Looking_Glass
rlm@5 73 ;; Reviews: 2009
rlm@5 74 ;; Chapters: 9
rlm@5 75 ;; Butterflies: In Spring
rlm@5 76 ;; http://www.fanfiction.net/s/2284162/1/Butterflies_In_Spring
rlm@5 77 ;; Reviews: 1815
rlm@5 78 ;; Chapters: 17
rlm@5 79 ;; The New Trials of Card Captor Sakura and Friends
rlm@5 80 ;; http://www.fanfiction.net/s/197453/1/The_New_Trials_of_Card_Captor_Sakura_and_Friends
rlm@5 81 ;; Reviews: 1794
rlm@5 82 ;; Chapters: 111
rlm@5 83 ;; Ice Queen
rlm@5 84 ;; http://www.fanfiction.net/s/1927065/1/Ice_Queen
rlm@5 85 ;; Reviews: 1725
rlm@5 86 ;; Chapters: 38
rlm@5 87 ;; Crystal Tears
rlm@5 88 ;; http://www.fanfiction.net/s/751067/1/Crystal_Tears
rlm@5 89 ;; Reviews: 1623
rlm@5 90 ;; Chapters: 26
rlm@5 91
rlm@5 92
rlm@5 93 (def bible (URL. "http://www.fanfiction.net/book/Bible/"))
rlm@5 94 ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!!
rlm@5 95
rlm@5 96 (def dark (URL. "http://www.fanfiction.net/book/His_Dark_Materials/"))
rlm@5 97 ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!!
rlm@5 98
rlm@5 99 (def junjo (URL. "http://www.fanfiction.net/anime/Junjo_Romantica/"))
rlm@5 100 ;;The Talk
rlm@5 101 ;; http://www.fanfiction.net/s/4351277/1/The_Talk
rlm@5 102 ;; Reviews: 250
rlm@5 103 ;; Chapters: 1
rlm@5 104
rlm@5 105 (def rye (URL. "http://www.fanfiction.net/book/Catcher_in_the_Rye/"))
rlm@5 106 ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!!
rlm@5 107
rlm@5 108
rlm@5 109 (defn tags-by-name
rlm@5 110 [#^TagNode node #^String element]
rlm@5 111 (seq (.getElementListByName node element true)))
rlm@5 112
rlm@5 113 (defn-memo parse
rlm@5 114 "parse a web page using HtmlCleaner"
rlm@5 115 [#^URL url]
rlm@5 116 (println "parsing" (.getPath url))
rlm@5 117 (if (not (nil? url))
rlm@5 118 (.clean (HtmlCleaner.) (input-stream url))))
rlm@5 119
rlm@5 120 (defn attributes
rlm@5 121 "get a hash map of the attributes of an element"
rlm@5 122 [#^TagNode node]
rlm@5 123 (into {} (.getAttributes node)))
rlm@5 124
rlm@5 125 (def fanfiction-base "http://www.fanfiction.net")
rlm@5 126
rlm@5 127 (defn next-fanfiction-url
rlm@5 128 "get the next url to visit from the current page we are visiting"
rlm@5 129 [#^URL url]
rlm@5 130 (let
rlm@5 131 [;; extract all links
rlm@5 132 links (tags-by-name (parse url) "a")
rlm@5 133 ;; extract the "Next" link
rlm@5 134 next-node (first (filter #(re-matches #".*Next.*" (.getText %)) links))]
rlm@5 135 (if (not (nil? next-node))
rlm@5 136
rlm@5 137 (URL. (str fanfiction-base
rlm@5 138 ((attributes next-node) "href"))))))
rlm@5 139
rlm@5 140 (defn get-stories [#^URL url]
rlm@5 141 (filter
rlm@5 142 #(= "z-list" ((attributes %) "class"))
rlm@5 143 (tags-by-name (parse url) "div")))
rlm@5 144
rlm@5 145
rlm@5 146 (defn story-url [#^TagNode node]
rlm@5 147 (str fanfiction-base
rlm@5 148 ((attributes
rlm@5 149 (first (tags-by-name node "a"))) "href")))
rlm@5 150
rlm@5 151 (defn story-desc [#^TagNode node]
rlm@5 152 (.getText (second (tags-by-name node "div"))))
rlm@5 153
rlm@5 154 (defn story-reviews [#^TagNode node]
rlm@5 155 (Integer/parseInt
rlm@5 156 (get
rlm@5 157 (re-matches #".*Reviews: (\d+).*"
rlm@5 158 (story-desc node)) 1 "0")))
rlm@5 159
rlm@5 160 (defn story-chapters [#^TagNode node]
rlm@5 161 (Integer/parseInt
rlm@5 162 (get
rlm@5 163 (re-matches #".*Chapters: (\d+).*"
rlm@5 164 (story-desc node)) 1 "1")))
rlm@5 165
rlm@5 166 (defn story-title [#^TagNode node]
rlm@5 167 (str (.getText (first (tags-by-name node "a")))))
rlm@5 168
rlm@5 169 (defn all-stories [n #^URL start-url]
rlm@5 170 (loop [stories []
rlm@5 171 index 0
rlm@5 172 current-url start-url]
rlm@5 173 (let [stories* (concat stories (get-stories current-url))
rlm@5 174 next (next-fanfiction-url current-url)]
rlm@5 175 (if (and (not= index n)
rlm@5 176 (not (nil? next)))
rlm@5 177 (do
rlm@5 178 (recur stories* (inc index) next))
rlm@5 179 stories*))))
rlm@5 180
rlm@5 181 (defn best-stories [n search-limit #^URL start-url]
rlm@5 182 (dorun
rlm@5 183 (map
rlm@5 184 #(println (story-title %) "\n"
rlm@5 185 (story-url %) "\n"
rlm@5 186 " Reviews: " (story-reviews %) "\n"
rlm@5 187 " Chapters:" (story-chapters %))
rlm@5 188
rlm@5 189 (take n (reverse
rlm@5 190 (sort-by
rlm@5 191 #(/ (story-reviews %) (inc (story-chapters %)))
rlm@5 192 ;;story-reviews
rlm@5 193 (all-stories search-limit start-url)))))))
rlm@5 194
rlm@5 195
rlm@5 196