Mercurial > rlm
diff src/fanfiction/stories.clj @ 5:fca75c0e8f40
added stories.clj
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Thu, 01 Mar 2012 05:47:37 -0700 |
parents | |
children |
line wrap: on
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/fanfiction/stories.clj Thu Mar 01 05:47:37 2012 -0700 1.3 @@ -0,0 +1,196 @@ 1.4 +(ns fanfiction.stories) 1.5 + 1.6 +(use 'clojure.java.io) 1.7 + 1.8 +(import org.htmlcleaner.HtmlCleaner) 1.9 +(import org.htmlcleaner.TagNode) 1.10 +(import java.io.File) 1.11 +(import java.net.URL) 1.12 +(use 'clojure.contrib.def) 1.13 + 1.14 +(def mlp 1.15 + (URL. "http://www.fanfiction.net/cartoon/My_Little_Pony")) 1.16 +;; It Takes a Village 1.17 +;; http://www.fanfiction.net/s/7490980/1/It_Takes_a_Village 1.18 +;; Reviews: 1058 1.19 +;; Chapters: 17 1.20 +;; Progress 1.21 +;; http://www.fanfiction.net/s/6982210/1/Progress 1.22 +;; Reviews: 612 1.23 +;; Chapters: 27 1.24 +;; On a Cross and Arrow 1.25 +;; http://www.fanfiction.net/s/7211729/1/On_a_Cross_and_Arrow 1.26 +;; Reviews: 324 1.27 +;; Chapters: 12 1.28 +;; Growing Pains 1.29 +;; http://www.fanfiction.net/s/7252679/1/Growing_Pains 1.30 +;; Reviews: 223 1.31 +;; Chapters: 23 1.32 +;; My Little Pony: FiM The Romancing Quest 1.33 +;; http://www.fanfiction.net/s/7171004/1/My_Little_Pony_FiM_The_Romancing_Quest 1.34 +;; Reviews: 174 1.35 +;; Chapters: 13 1.36 +;; Don't Let the Sun Catch You Crying 1.37 +;; http://www.fanfiction.net/s/7032385/1/Dont_Let_the_Sun_Catch_You_Crying 1.38 +;; Reviews: 165 1.39 +;; Chapters: 6 1.40 + 1.41 + 1.42 +(def chobits (URL. "http://www.fanfiction.net/anime/Chobits/")) 1.43 +;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!! 1.44 + 1.45 +(def ccs (URL. "http://www.fanfiction.net/anime/Card_Captor_Sakura/")) 1.46 +;; Jagged Amber 1.47 +;; http://www.fanfiction.net/s/1348047/1/Jagged_Amber 1.48 +;; Reviews: 4767 1.49 +;; Chapters: 25 1.50 +;; Misty Dreams 1.51 +;; http://www.fanfiction.net/s/1250008/1/Misty_Dreams 1.52 +;; Reviews: 4460 1.53 +;; Chapters: 27 1.54 +;; Butterflies 1.55 +;; http://www.fanfiction.net/s/1667519/1/Butterflies 1.56 +;; Reviews: 3029 1.57 +;; Chapters: 36 1.58 +;; Mischievous Love 1.59 +;; http://www.fanfiction.net/s/1020198/1/Mischievous_Love 1.60 +;; Reviews: 3013 1.61 +;; Chapters: 25 1.62 +;; Black Wings 1.63 +;; http://www.fanfiction.net/s/2737015/1/Black_Wings 1.64 +;; Reviews: 2822 1.65 +;; Chapters: 23 1.66 +;; Cherry, My Love 1.67 +;; http://www.fanfiction.net/s/968234/1/Cherry_My_Love 1.68 +;; Reviews: 2612 1.69 +;; Chapters: 26 1.70 +;; Deeper 1.71 +;; http://www.fanfiction.net/s/4027405/1/Deeper 1.72 +;; Reviews: 2260 1.73 +;; Chapters: 26 1.74 +;; Through A Looking Glass 1.75 +;; http://www.fanfiction.net/s/2729427/1/Through_A_Looking_Glass 1.76 +;; Reviews: 2009 1.77 +;; Chapters: 9 1.78 +;; Butterflies: In Spring 1.79 +;; http://www.fanfiction.net/s/2284162/1/Butterflies_In_Spring 1.80 +;; Reviews: 1815 1.81 +;; Chapters: 17 1.82 +;; The New Trials of Card Captor Sakura and Friends 1.83 +;; http://www.fanfiction.net/s/197453/1/The_New_Trials_of_Card_Captor_Sakura_and_Friends 1.84 +;; Reviews: 1794 1.85 +;; Chapters: 111 1.86 +;; Ice Queen 1.87 +;; http://www.fanfiction.net/s/1927065/1/Ice_Queen 1.88 +;; Reviews: 1725 1.89 +;; Chapters: 38 1.90 +;; Crystal Tears 1.91 +;; http://www.fanfiction.net/s/751067/1/Crystal_Tears 1.92 +;; Reviews: 1623 1.93 +;; Chapters: 26 1.94 + 1.95 + 1.96 +(def bible (URL. "http://www.fanfiction.net/book/Bible/")) 1.97 +;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!! 1.98 + 1.99 +(def dark (URL. "http://www.fanfiction.net/book/His_Dark_Materials/")) 1.100 +;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!! 1.101 + 1.102 +(def junjo (URL. "http://www.fanfiction.net/anime/Junjo_Romantica/")) 1.103 +;;The Talk 1.104 +;; http://www.fanfiction.net/s/4351277/1/The_Talk 1.105 +;; Reviews: 250 1.106 +;; Chapters: 1 1.107 + 1.108 +(def rye (URL. "http://www.fanfiction.net/book/Catcher_in_the_Rye/")) 1.109 +;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!! 1.110 + 1.111 + 1.112 +(defn tags-by-name 1.113 + [#^TagNode node #^String element] 1.114 + (seq (.getElementListByName node element true))) 1.115 + 1.116 +(defn-memo parse 1.117 + "parse a web page using HtmlCleaner" 1.118 + [#^URL url] 1.119 + (println "parsing" (.getPath url)) 1.120 + (if (not (nil? url)) 1.121 + (.clean (HtmlCleaner.) (input-stream url)))) 1.122 + 1.123 +(defn attributes 1.124 + "get a hash map of the attributes of an element" 1.125 + [#^TagNode node] 1.126 + (into {} (.getAttributes node))) 1.127 + 1.128 +(def fanfiction-base "http://www.fanfiction.net") 1.129 + 1.130 +(defn next-fanfiction-url 1.131 + "get the next url to visit from the current page we are visiting" 1.132 + [#^URL url] 1.133 + (let 1.134 + [;; extract all links 1.135 + links (tags-by-name (parse url) "a") 1.136 + ;; extract the "Next" link 1.137 + next-node (first (filter #(re-matches #".*Next.*" (.getText %)) links))] 1.138 + (if (not (nil? next-node)) 1.139 + 1.140 + (URL. (str fanfiction-base 1.141 + ((attributes next-node) "href")))))) 1.142 + 1.143 +(defn get-stories [#^URL url] 1.144 + (filter 1.145 + #(= "z-list" ((attributes %) "class")) 1.146 + (tags-by-name (parse url) "div"))) 1.147 + 1.148 + 1.149 +(defn story-url [#^TagNode node] 1.150 + (str fanfiction-base 1.151 + ((attributes 1.152 + (first (tags-by-name node "a"))) "href"))) 1.153 + 1.154 +(defn story-desc [#^TagNode node] 1.155 + (.getText (second (tags-by-name node "div")))) 1.156 + 1.157 +(defn story-reviews [#^TagNode node] 1.158 + (Integer/parseInt 1.159 + (get 1.160 + (re-matches #".*Reviews: (\d+).*" 1.161 + (story-desc node)) 1 "0"))) 1.162 + 1.163 +(defn story-chapters [#^TagNode node] 1.164 + (Integer/parseInt 1.165 + (get 1.166 + (re-matches #".*Chapters: (\d+).*" 1.167 + (story-desc node)) 1 "1"))) 1.168 + 1.169 +(defn story-title [#^TagNode node] 1.170 + (str (.getText (first (tags-by-name node "a"))))) 1.171 + 1.172 +(defn all-stories [n #^URL start-url] 1.173 + (loop [stories [] 1.174 + index 0 1.175 + current-url start-url] 1.176 + (let [stories* (concat stories (get-stories current-url)) 1.177 + next (next-fanfiction-url current-url)] 1.178 + (if (and (not= index n) 1.179 + (not (nil? next))) 1.180 + (do 1.181 + (recur stories* (inc index) next)) 1.182 + stories*)))) 1.183 + 1.184 +(defn best-stories [n search-limit #^URL start-url] 1.185 + (dorun 1.186 + (map 1.187 + #(println (story-title %) "\n" 1.188 + (story-url %) "\n" 1.189 + " Reviews: " (story-reviews %) "\n" 1.190 + " Chapters:" (story-chapters %)) 1.191 + 1.192 + (take n (reverse 1.193 + (sort-by 1.194 + #(/ (story-reviews %) (inc (story-chapters %))) 1.195 + ;;story-reviews 1.196 + (all-stories search-limit start-url))))))) 1.197 + 1.198 + 1.199 + 1.200 \ No newline at end of file