view src/fanfiction/stories.clj @ 5:fca75c0e8f40

added stories.clj
author Robert McIntyre <rlm@mit.edu>
date Thu, 01 Mar 2012 05:47:37 -0700
parents
children
line wrap: on
line source
1 (ns fanfiction.stories)
3 (use 'clojure.java.io)
5 (import org.htmlcleaner.HtmlCleaner)
6 (import org.htmlcleaner.TagNode)
7 (import java.io.File)
8 (import java.net.URL)
9 (use 'clojure.contrib.def)
11 (def mlp
12 (URL. "http://www.fanfiction.net/cartoon/My_Little_Pony"))
13 ;; It Takes a Village
14 ;; http://www.fanfiction.net/s/7490980/1/It_Takes_a_Village
15 ;; Reviews: 1058
16 ;; Chapters: 17
17 ;; Progress
18 ;; http://www.fanfiction.net/s/6982210/1/Progress
19 ;; Reviews: 612
20 ;; Chapters: 27
21 ;; On a Cross and Arrow
22 ;; http://www.fanfiction.net/s/7211729/1/On_a_Cross_and_Arrow
23 ;; Reviews: 324
24 ;; Chapters: 12
25 ;; Growing Pains
26 ;; http://www.fanfiction.net/s/7252679/1/Growing_Pains
27 ;; Reviews: 223
28 ;; Chapters: 23
29 ;; My Little Pony: FiM The Romancing Quest
30 ;; http://www.fanfiction.net/s/7171004/1/My_Little_Pony_FiM_The_Romancing_Quest
31 ;; Reviews: 174
32 ;; Chapters: 13
33 ;; Don't Let the Sun Catch You Crying
34 ;; http://www.fanfiction.net/s/7032385/1/Dont_Let_the_Sun_Catch_You_Crying
35 ;; Reviews: 165
36 ;; Chapters: 6
39 (def chobits (URL. "http://www.fanfiction.net/anime/Chobits/"))
40 ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!!
42 (def ccs (URL. "http://www.fanfiction.net/anime/Card_Captor_Sakura/"))
43 ;; Jagged Amber
44 ;; http://www.fanfiction.net/s/1348047/1/Jagged_Amber
45 ;; Reviews: 4767
46 ;; Chapters: 25
47 ;; Misty Dreams
48 ;; http://www.fanfiction.net/s/1250008/1/Misty_Dreams
49 ;; Reviews: 4460
50 ;; Chapters: 27
51 ;; Butterflies
52 ;; http://www.fanfiction.net/s/1667519/1/Butterflies
53 ;; Reviews: 3029
54 ;; Chapters: 36
55 ;; Mischievous Love
56 ;; http://www.fanfiction.net/s/1020198/1/Mischievous_Love
57 ;; Reviews: 3013
58 ;; Chapters: 25
59 ;; Black Wings
60 ;; http://www.fanfiction.net/s/2737015/1/Black_Wings
61 ;; Reviews: 2822
62 ;; Chapters: 23
63 ;; Cherry, My Love
64 ;; http://www.fanfiction.net/s/968234/1/Cherry_My_Love
65 ;; Reviews: 2612
66 ;; Chapters: 26
67 ;; Deeper
68 ;; http://www.fanfiction.net/s/4027405/1/Deeper
69 ;; Reviews: 2260
70 ;; Chapters: 26
71 ;; Through A Looking Glass
72 ;; http://www.fanfiction.net/s/2729427/1/Through_A_Looking_Glass
73 ;; Reviews: 2009
74 ;; Chapters: 9
75 ;; Butterflies: In Spring
76 ;; http://www.fanfiction.net/s/2284162/1/Butterflies_In_Spring
77 ;; Reviews: 1815
78 ;; Chapters: 17
79 ;; The New Trials of Card Captor Sakura and Friends
80 ;; http://www.fanfiction.net/s/197453/1/The_New_Trials_of_Card_Captor_Sakura_and_Friends
81 ;; Reviews: 1794
82 ;; Chapters: 111
83 ;; Ice Queen
84 ;; http://www.fanfiction.net/s/1927065/1/Ice_Queen
85 ;; Reviews: 1725
86 ;; Chapters: 38
87 ;; Crystal Tears
88 ;; http://www.fanfiction.net/s/751067/1/Crystal_Tears
89 ;; Reviews: 1623
90 ;; Chapters: 26
93 (def bible (URL. "http://www.fanfiction.net/book/Bible/"))
94 ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!!
96 (def dark (URL. "http://www.fanfiction.net/book/His_Dark_Materials/"))
97 ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!!
99 (def junjo (URL. "http://www.fanfiction.net/anime/Junjo_Romantica/"))
100 ;;The Talk
101 ;; http://www.fanfiction.net/s/4351277/1/The_Talk
102 ;; Reviews: 250
103 ;; Chapters: 1
105 (def rye (URL. "http://www.fanfiction.net/book/Catcher_in_the_Rye/"))
106 ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!!
109 (defn tags-by-name
110 [#^TagNode node #^String element]
111 (seq (.getElementListByName node element true)))
113 (defn-memo parse
114 "parse a web page using HtmlCleaner"
115 [#^URL url]
116 (println "parsing" (.getPath url))
117 (if (not (nil? url))
118 (.clean (HtmlCleaner.) (input-stream url))))
120 (defn attributes
121 "get a hash map of the attributes of an element"
122 [#^TagNode node]
123 (into {} (.getAttributes node)))
125 (def fanfiction-base "http://www.fanfiction.net")
127 (defn next-fanfiction-url
128 "get the next url to visit from the current page we are visiting"
129 [#^URL url]
130 (let
131 [;; extract all links
132 links (tags-by-name (parse url) "a")
133 ;; extract the "Next" link
134 next-node (first (filter #(re-matches #".*Next.*" (.getText %)) links))]
135 (if (not (nil? next-node))
137 (URL. (str fanfiction-base
138 ((attributes next-node) "href"))))))
140 (defn get-stories [#^URL url]
141 (filter
142 #(= "z-list" ((attributes %) "class"))
143 (tags-by-name (parse url) "div")))
146 (defn story-url [#^TagNode node]
147 (str fanfiction-base
148 ((attributes
149 (first (tags-by-name node "a"))) "href")))
151 (defn story-desc [#^TagNode node]
152 (.getText (second (tags-by-name node "div"))))
154 (defn story-reviews [#^TagNode node]
155 (Integer/parseInt
156 (get
157 (re-matches #".*Reviews: (\d+).*"
158 (story-desc node)) 1 "0")))
160 (defn story-chapters [#^TagNode node]
161 (Integer/parseInt
162 (get
163 (re-matches #".*Chapters: (\d+).*"
164 (story-desc node)) 1 "1")))
166 (defn story-title [#^TagNode node]
167 (str (.getText (first (tags-by-name node "a")))))
169 (defn all-stories [n #^URL start-url]
170 (loop [stories []
171 index 0
172 current-url start-url]
173 (let [stories* (concat stories (get-stories current-url))
174 next (next-fanfiction-url current-url)]
175 (if (and (not= index n)
176 (not (nil? next)))
177 (do
178 (recur stories* (inc index) next))
179 stories*))))
181 (defn best-stories [n search-limit #^URL start-url]
182 (dorun
183 (map
184 #(println (story-title %) "\n"
185 (story-url %) "\n"
186 " Reviews: " (story-reviews %) "\n"
187 " Chapters:" (story-chapters %))
189 (take n (reverse
190 (sort-by
191 #(/ (story-reviews %) (inc (story-chapters %)))
192 ;;story-reviews
193 (all-stories search-limit start-url)))))))