Mercurial > rlm
view src/fanfiction/stories.clj @ 5:fca75c0e8f40
added stories.clj
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Thu, 01 Mar 2012 05:47:37 -0700 |
parents | |
children |
line wrap: on
line source
1 (ns fanfiction.stories)3 (use 'clojure.java.io)5 (import org.htmlcleaner.HtmlCleaner)6 (import org.htmlcleaner.TagNode)7 (import java.io.File)8 (import java.net.URL)9 (use 'clojure.contrib.def)11 (def mlp12 (URL. "http://www.fanfiction.net/cartoon/My_Little_Pony"))13 ;; It Takes a Village14 ;; http://www.fanfiction.net/s/7490980/1/It_Takes_a_Village15 ;; Reviews: 105816 ;; Chapters: 1717 ;; Progress18 ;; http://www.fanfiction.net/s/6982210/1/Progress19 ;; Reviews: 61220 ;; Chapters: 2721 ;; On a Cross and Arrow22 ;; http://www.fanfiction.net/s/7211729/1/On_a_Cross_and_Arrow23 ;; Reviews: 32424 ;; Chapters: 1225 ;; Growing Pains26 ;; http://www.fanfiction.net/s/7252679/1/Growing_Pains27 ;; Reviews: 22328 ;; Chapters: 2329 ;; My Little Pony: FiM The Romancing Quest30 ;; http://www.fanfiction.net/s/7171004/1/My_Little_Pony_FiM_The_Romancing_Quest31 ;; Reviews: 17432 ;; Chapters: 1333 ;; Don't Let the Sun Catch You Crying34 ;; http://www.fanfiction.net/s/7032385/1/Dont_Let_the_Sun_Catch_You_Crying35 ;; Reviews: 16536 ;; Chapters: 639 (def chobits (URL. "http://www.fanfiction.net/anime/Chobits/"))40 ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!!42 (def ccs (URL. "http://www.fanfiction.net/anime/Card_Captor_Sakura/"))43 ;; Jagged Amber44 ;; http://www.fanfiction.net/s/1348047/1/Jagged_Amber45 ;; Reviews: 476746 ;; Chapters: 2547 ;; Misty Dreams48 ;; http://www.fanfiction.net/s/1250008/1/Misty_Dreams49 ;; Reviews: 446050 ;; Chapters: 2751 ;; Butterflies52 ;; http://www.fanfiction.net/s/1667519/1/Butterflies53 ;; Reviews: 302954 ;; Chapters: 3655 ;; Mischievous Love56 ;; http://www.fanfiction.net/s/1020198/1/Mischievous_Love57 ;; Reviews: 301358 ;; Chapters: 2559 ;; Black Wings60 ;; http://www.fanfiction.net/s/2737015/1/Black_Wings61 ;; Reviews: 282262 ;; Chapters: 2363 ;; Cherry, My Love64 ;; http://www.fanfiction.net/s/968234/1/Cherry_My_Love65 ;; Reviews: 261266 ;; Chapters: 2667 ;; Deeper68 ;; http://www.fanfiction.net/s/4027405/1/Deeper69 ;; Reviews: 226070 ;; Chapters: 2671 ;; Through A Looking Glass72 ;; http://www.fanfiction.net/s/2729427/1/Through_A_Looking_Glass73 ;; Reviews: 200974 ;; Chapters: 975 ;; Butterflies: In Spring76 ;; http://www.fanfiction.net/s/2284162/1/Butterflies_In_Spring77 ;; Reviews: 181578 ;; Chapters: 1779 ;; The New Trials of Card Captor Sakura and Friends80 ;; http://www.fanfiction.net/s/197453/1/The_New_Trials_of_Card_Captor_Sakura_and_Friends81 ;; Reviews: 179482 ;; Chapters: 11183 ;; Ice Queen84 ;; http://www.fanfiction.net/s/1927065/1/Ice_Queen85 ;; Reviews: 172586 ;; Chapters: 3887 ;; Crystal Tears88 ;; http://www.fanfiction.net/s/751067/1/Crystal_Tears89 ;; Reviews: 162390 ;; Chapters: 2693 (def bible (URL. "http://www.fanfiction.net/book/Bible/"))94 ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!!96 (def dark (URL. "http://www.fanfiction.net/book/His_Dark_Materials/"))97 ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!!99 (def junjo (URL. "http://www.fanfiction.net/anime/Junjo_Romantica/"))100 ;;The Talk101 ;; http://www.fanfiction.net/s/4351277/1/The_Talk102 ;; Reviews: 250103 ;; Chapters: 1105 (def rye (URL. "http://www.fanfiction.net/book/Catcher_in_the_Rye/"))106 ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!!109 (defn tags-by-name110 [#^TagNode node #^String element]111 (seq (.getElementListByName node element true)))113 (defn-memo parse114 "parse a web page using HtmlCleaner"115 [#^URL url]116 (println "parsing" (.getPath url))117 (if (not (nil? url))118 (.clean (HtmlCleaner.) (input-stream url))))120 (defn attributes121 "get a hash map of the attributes of an element"122 [#^TagNode node]123 (into {} (.getAttributes node)))125 (def fanfiction-base "http://www.fanfiction.net")127 (defn next-fanfiction-url128 "get the next url to visit from the current page we are visiting"129 [#^URL url]130 (let131 [;; extract all links132 links (tags-by-name (parse url) "a")133 ;; extract the "Next" link134 next-node (first (filter #(re-matches #".*Next.*" (.getText %)) links))]135 (if (not (nil? next-node))137 (URL. (str fanfiction-base138 ((attributes next-node) "href"))))))140 (defn get-stories [#^URL url]141 (filter142 #(= "z-list" ((attributes %) "class"))143 (tags-by-name (parse url) "div")))146 (defn story-url [#^TagNode node]147 (str fanfiction-base148 ((attributes149 (first (tags-by-name node "a"))) "href")))151 (defn story-desc [#^TagNode node]152 (.getText (second (tags-by-name node "div"))))154 (defn story-reviews [#^TagNode node]155 (Integer/parseInt156 (get157 (re-matches #".*Reviews: (\d+).*"158 (story-desc node)) 1 "0")))160 (defn story-chapters [#^TagNode node]161 (Integer/parseInt162 (get163 (re-matches #".*Chapters: (\d+).*"164 (story-desc node)) 1 "1")))166 (defn story-title [#^TagNode node]167 (str (.getText (first (tags-by-name node "a")))))169 (defn all-stories [n #^URL start-url]170 (loop [stories []171 index 0172 current-url start-url]173 (let [stories* (concat stories (get-stories current-url))174 next (next-fanfiction-url current-url)]175 (if (and (not= index n)176 (not (nil? next)))177 (do178 (recur stories* (inc index) next))179 stories*))))181 (defn best-stories [n search-limit #^URL start-url]182 (dorun183 (map184 #(println (story-title %) "\n"185 (story-url %) "\n"186 " Reviews: " (story-reviews %) "\n"187 " Chapters:" (story-chapters %))189 (take n (reverse190 (sort-by191 #(/ (story-reviews %) (inc (story-chapters %)))192 ;;story-reviews193 (all-stories search-limit start-url)))))))