Mercurial > rlm
comparison src/fanfiction/stories.clj @ 5:fca75c0e8f40
added stories.clj
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Thu, 01 Mar 2012 05:47:37 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
4:12d1367cf1aa | 5:fca75c0e8f40 |
---|---|
1 (ns fanfiction.stories) | |
2 | |
3 (use 'clojure.java.io) | |
4 | |
5 (import org.htmlcleaner.HtmlCleaner) | |
6 (import org.htmlcleaner.TagNode) | |
7 (import java.io.File) | |
8 (import java.net.URL) | |
9 (use 'clojure.contrib.def) | |
10 | |
11 (def mlp | |
12 (URL. "http://www.fanfiction.net/cartoon/My_Little_Pony")) | |
13 ;; It Takes a Village | |
14 ;; http://www.fanfiction.net/s/7490980/1/It_Takes_a_Village | |
15 ;; Reviews: 1058 | |
16 ;; Chapters: 17 | |
17 ;; Progress | |
18 ;; http://www.fanfiction.net/s/6982210/1/Progress | |
19 ;; Reviews: 612 | |
20 ;; Chapters: 27 | |
21 ;; On a Cross and Arrow | |
22 ;; http://www.fanfiction.net/s/7211729/1/On_a_Cross_and_Arrow | |
23 ;; Reviews: 324 | |
24 ;; Chapters: 12 | |
25 ;; Growing Pains | |
26 ;; http://www.fanfiction.net/s/7252679/1/Growing_Pains | |
27 ;; Reviews: 223 | |
28 ;; Chapters: 23 | |
29 ;; My Little Pony: FiM The Romancing Quest | |
30 ;; http://www.fanfiction.net/s/7171004/1/My_Little_Pony_FiM_The_Romancing_Quest | |
31 ;; Reviews: 174 | |
32 ;; Chapters: 13 | |
33 ;; Don't Let the Sun Catch You Crying | |
34 ;; http://www.fanfiction.net/s/7032385/1/Dont_Let_the_Sun_Catch_You_Crying | |
35 ;; Reviews: 165 | |
36 ;; Chapters: 6 | |
37 | |
38 | |
39 (def chobits (URL. "http://www.fanfiction.net/anime/Chobits/")) | |
40 ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!! | |
41 | |
42 (def ccs (URL. "http://www.fanfiction.net/anime/Card_Captor_Sakura/")) | |
43 ;; Jagged Amber | |
44 ;; http://www.fanfiction.net/s/1348047/1/Jagged_Amber | |
45 ;; Reviews: 4767 | |
46 ;; Chapters: 25 | |
47 ;; Misty Dreams | |
48 ;; http://www.fanfiction.net/s/1250008/1/Misty_Dreams | |
49 ;; Reviews: 4460 | |
50 ;; Chapters: 27 | |
51 ;; Butterflies | |
52 ;; http://www.fanfiction.net/s/1667519/1/Butterflies | |
53 ;; Reviews: 3029 | |
54 ;; Chapters: 36 | |
55 ;; Mischievous Love | |
56 ;; http://www.fanfiction.net/s/1020198/1/Mischievous_Love | |
57 ;; Reviews: 3013 | |
58 ;; Chapters: 25 | |
59 ;; Black Wings | |
60 ;; http://www.fanfiction.net/s/2737015/1/Black_Wings | |
61 ;; Reviews: 2822 | |
62 ;; Chapters: 23 | |
63 ;; Cherry, My Love | |
64 ;; http://www.fanfiction.net/s/968234/1/Cherry_My_Love | |
65 ;; Reviews: 2612 | |
66 ;; Chapters: 26 | |
67 ;; Deeper | |
68 ;; http://www.fanfiction.net/s/4027405/1/Deeper | |
69 ;; Reviews: 2260 | |
70 ;; Chapters: 26 | |
71 ;; Through A Looking Glass | |
72 ;; http://www.fanfiction.net/s/2729427/1/Through_A_Looking_Glass | |
73 ;; Reviews: 2009 | |
74 ;; Chapters: 9 | |
75 ;; Butterflies: In Spring | |
76 ;; http://www.fanfiction.net/s/2284162/1/Butterflies_In_Spring | |
77 ;; Reviews: 1815 | |
78 ;; Chapters: 17 | |
79 ;; The New Trials of Card Captor Sakura and Friends | |
80 ;; http://www.fanfiction.net/s/197453/1/The_New_Trials_of_Card_Captor_Sakura_and_Friends | |
81 ;; Reviews: 1794 | |
82 ;; Chapters: 111 | |
83 ;; Ice Queen | |
84 ;; http://www.fanfiction.net/s/1927065/1/Ice_Queen | |
85 ;; Reviews: 1725 | |
86 ;; Chapters: 38 | |
87 ;; Crystal Tears | |
88 ;; http://www.fanfiction.net/s/751067/1/Crystal_Tears | |
89 ;; Reviews: 1623 | |
90 ;; Chapters: 26 | |
91 | |
92 | |
93 (def bible (URL. "http://www.fanfiction.net/book/Bible/")) | |
94 ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!! | |
95 | |
96 (def dark (URL. "http://www.fanfiction.net/book/His_Dark_Materials/")) | |
97 ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!! | |
98 | |
99 (def junjo (URL. "http://www.fanfiction.net/anime/Junjo_Romantica/")) | |
100 ;;The Talk | |
101 ;; http://www.fanfiction.net/s/4351277/1/The_Talk | |
102 ;; Reviews: 250 | |
103 ;; Chapters: 1 | |
104 | |
105 (def rye (URL. "http://www.fanfiction.net/book/Catcher_in_the_Rye/")) | |
106 ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!! | |
107 | |
108 | |
109 (defn tags-by-name | |
110 [#^TagNode node #^String element] | |
111 (seq (.getElementListByName node element true))) | |
112 | |
113 (defn-memo parse | |
114 "parse a web page using HtmlCleaner" | |
115 [#^URL url] | |
116 (println "parsing" (.getPath url)) | |
117 (if (not (nil? url)) | |
118 (.clean (HtmlCleaner.) (input-stream url)))) | |
119 | |
120 (defn attributes | |
121 "get a hash map of the attributes of an element" | |
122 [#^TagNode node] | |
123 (into {} (.getAttributes node))) | |
124 | |
125 (def fanfiction-base "http://www.fanfiction.net") | |
126 | |
127 (defn next-fanfiction-url | |
128 "get the next url to visit from the current page we are visiting" | |
129 [#^URL url] | |
130 (let | |
131 [;; extract all links | |
132 links (tags-by-name (parse url) "a") | |
133 ;; extract the "Next" link | |
134 next-node (first (filter #(re-matches #".*Next.*" (.getText %)) links))] | |
135 (if (not (nil? next-node)) | |
136 | |
137 (URL. (str fanfiction-base | |
138 ((attributes next-node) "href")))))) | |
139 | |
140 (defn get-stories [#^URL url] | |
141 (filter | |
142 #(= "z-list" ((attributes %) "class")) | |
143 (tags-by-name (parse url) "div"))) | |
144 | |
145 | |
146 (defn story-url [#^TagNode node] | |
147 (str fanfiction-base | |
148 ((attributes | |
149 (first (tags-by-name node "a"))) "href"))) | |
150 | |
151 (defn story-desc [#^TagNode node] | |
152 (.getText (second (tags-by-name node "div")))) | |
153 | |
154 (defn story-reviews [#^TagNode node] | |
155 (Integer/parseInt | |
156 (get | |
157 (re-matches #".*Reviews: (\d+).*" | |
158 (story-desc node)) 1 "0"))) | |
159 | |
160 (defn story-chapters [#^TagNode node] | |
161 (Integer/parseInt | |
162 (get | |
163 (re-matches #".*Chapters: (\d+).*" | |
164 (story-desc node)) 1 "1"))) | |
165 | |
166 (defn story-title [#^TagNode node] | |
167 (str (.getText (first (tags-by-name node "a"))))) | |
168 | |
169 (defn all-stories [n #^URL start-url] | |
170 (loop [stories [] | |
171 index 0 | |
172 current-url start-url] | |
173 (let [stories* (concat stories (get-stories current-url)) | |
174 next (next-fanfiction-url current-url)] | |
175 (if (and (not= index n) | |
176 (not (nil? next))) | |
177 (do | |
178 (recur stories* (inc index) next)) | |
179 stories*)))) | |
180 | |
181 (defn best-stories [n search-limit #^URL start-url] | |
182 (dorun | |
183 (map | |
184 #(println (story-title %) "\n" | |
185 (story-url %) "\n" | |
186 " Reviews: " (story-reviews %) "\n" | |
187 " Chapters:" (story-chapters %)) | |
188 | |
189 (take n (reverse | |
190 (sort-by | |
191 #(/ (story-reviews %) (inc (story-chapters %))) | |
192 ;;story-reviews | |
193 (all-stories search-limit start-url))))))) | |
194 | |
195 | |
196 |