rlm@5: (ns fanfiction.stories) rlm@5: rlm@5: (use 'clojure.java.io) rlm@5: rlm@5: (import org.htmlcleaner.HtmlCleaner) rlm@5: (import org.htmlcleaner.TagNode) rlm@5: (import java.io.File) rlm@5: (import java.net.URL) rlm@5: (use 'clojure.contrib.def) rlm@5: rlm@5: (def mlp rlm@5: (URL. "http://www.fanfiction.net/cartoon/My_Little_Pony")) rlm@5: ;; It Takes a Village rlm@5: ;; http://www.fanfiction.net/s/7490980/1/It_Takes_a_Village rlm@5: ;; Reviews: 1058 rlm@5: ;; Chapters: 17 rlm@5: ;; Progress rlm@5: ;; http://www.fanfiction.net/s/6982210/1/Progress rlm@5: ;; Reviews: 612 rlm@5: ;; Chapters: 27 rlm@5: ;; On a Cross and Arrow rlm@5: ;; http://www.fanfiction.net/s/7211729/1/On_a_Cross_and_Arrow rlm@5: ;; Reviews: 324 rlm@5: ;; Chapters: 12 rlm@5: ;; Growing Pains rlm@5: ;; http://www.fanfiction.net/s/7252679/1/Growing_Pains rlm@5: ;; Reviews: 223 rlm@5: ;; Chapters: 23 rlm@5: ;; My Little Pony: FiM The Romancing Quest rlm@5: ;; http://www.fanfiction.net/s/7171004/1/My_Little_Pony_FiM_The_Romancing_Quest rlm@5: ;; Reviews: 174 rlm@5: ;; Chapters: 13 rlm@5: ;; Don't Let the Sun Catch You Crying rlm@5: ;; http://www.fanfiction.net/s/7032385/1/Dont_Let_the_Sun_Catch_You_Crying rlm@5: ;; Reviews: 165 rlm@5: ;; Chapters: 6 rlm@5: rlm@5: rlm@5: (def chobits (URL. "http://www.fanfiction.net/anime/Chobits/")) rlm@5: ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!! rlm@5: rlm@5: (def ccs (URL. "http://www.fanfiction.net/anime/Card_Captor_Sakura/")) rlm@5: ;; Jagged Amber rlm@5: ;; http://www.fanfiction.net/s/1348047/1/Jagged_Amber rlm@5: ;; Reviews: 4767 rlm@5: ;; Chapters: 25 rlm@5: ;; Misty Dreams rlm@5: ;; http://www.fanfiction.net/s/1250008/1/Misty_Dreams rlm@5: ;; Reviews: 4460 rlm@5: ;; Chapters: 27 rlm@5: ;; Butterflies rlm@5: ;; http://www.fanfiction.net/s/1667519/1/Butterflies rlm@5: ;; Reviews: 3029 rlm@5: ;; Chapters: 36 rlm@5: ;; Mischievous Love rlm@5: ;; http://www.fanfiction.net/s/1020198/1/Mischievous_Love rlm@5: ;; Reviews: 3013 rlm@5: ;; Chapters: 25 rlm@5: ;; Black Wings rlm@5: ;; http://www.fanfiction.net/s/2737015/1/Black_Wings rlm@5: ;; Reviews: 2822 rlm@5: ;; Chapters: 23 rlm@5: ;; Cherry, My Love rlm@5: ;; http://www.fanfiction.net/s/968234/1/Cherry_My_Love rlm@5: ;; Reviews: 2612 rlm@5: ;; Chapters: 26 rlm@5: ;; Deeper rlm@5: ;; http://www.fanfiction.net/s/4027405/1/Deeper rlm@5: ;; Reviews: 2260 rlm@5: ;; Chapters: 26 rlm@5: ;; Through A Looking Glass rlm@5: ;; http://www.fanfiction.net/s/2729427/1/Through_A_Looking_Glass rlm@5: ;; Reviews: 2009 rlm@5: ;; Chapters: 9 rlm@5: ;; Butterflies: In Spring rlm@5: ;; http://www.fanfiction.net/s/2284162/1/Butterflies_In_Spring rlm@5: ;; Reviews: 1815 rlm@5: ;; Chapters: 17 rlm@5: ;; The New Trials of Card Captor Sakura and Friends rlm@5: ;; http://www.fanfiction.net/s/197453/1/The_New_Trials_of_Card_Captor_Sakura_and_Friends rlm@5: ;; Reviews: 1794 rlm@5: ;; Chapters: 111 rlm@5: ;; Ice Queen rlm@5: ;; http://www.fanfiction.net/s/1927065/1/Ice_Queen rlm@5: ;; Reviews: 1725 rlm@5: ;; Chapters: 38 rlm@5: ;; Crystal Tears rlm@5: ;; http://www.fanfiction.net/s/751067/1/Crystal_Tears rlm@5: ;; Reviews: 1623 rlm@5: ;; Chapters: 26 rlm@5: rlm@5: rlm@5: (def bible (URL. "http://www.fanfiction.net/book/Bible/")) rlm@5: ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!! rlm@5: rlm@5: (def dark (URL. "http://www.fanfiction.net/book/His_Dark_Materials/")) rlm@5: ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!! rlm@5: rlm@5: (def junjo (URL. "http://www.fanfiction.net/anime/Junjo_Romantica/")) rlm@5: ;;The Talk rlm@5: ;; http://www.fanfiction.net/s/4351277/1/The_Talk rlm@5: ;; Reviews: 250 rlm@5: ;; Chapters: 1 rlm@5: rlm@5: (def rye (URL. "http://www.fanfiction.net/book/Catcher_in_the_Rye/")) rlm@5: ;; TTTRRRRRRAAAAAAASSSSSSHHHHH!!!!!!! rlm@5: rlm@5: rlm@5: (defn tags-by-name rlm@5: [#^TagNode node #^String element] rlm@5: (seq (.getElementListByName node element true))) rlm@5: rlm@5: (defn-memo parse rlm@5: "parse a web page using HtmlCleaner" rlm@5: [#^URL url] rlm@5: (println "parsing" (.getPath url)) rlm@5: (if (not (nil? url)) rlm@5: (.clean (HtmlCleaner.) (input-stream url)))) rlm@5: rlm@5: (defn attributes rlm@5: "get a hash map of the attributes of an element" rlm@5: [#^TagNode node] rlm@5: (into {} (.getAttributes node))) rlm@5: rlm@5: (def fanfiction-base "http://www.fanfiction.net") rlm@5: rlm@5: (defn next-fanfiction-url rlm@5: "get the next url to visit from the current page we are visiting" rlm@5: [#^URL url] rlm@5: (let rlm@5: [;; extract all links rlm@5: links (tags-by-name (parse url) "a") rlm@5: ;; extract the "Next" link rlm@5: next-node (first (filter #(re-matches #".*Next.*" (.getText %)) links))] rlm@5: (if (not (nil? next-node)) rlm@5: rlm@5: (URL. (str fanfiction-base rlm@5: ((attributes next-node) "href")))))) rlm@5: rlm@5: (defn get-stories [#^URL url] rlm@5: (filter rlm@5: #(= "z-list" ((attributes %) "class")) rlm@5: (tags-by-name (parse url) "div"))) rlm@5: rlm@5: rlm@5: (defn story-url [#^TagNode node] rlm@5: (str fanfiction-base rlm@5: ((attributes rlm@5: (first (tags-by-name node "a"))) "href"))) rlm@5: rlm@5: (defn story-desc [#^TagNode node] rlm@5: (.getText (second (tags-by-name node "div")))) rlm@5: rlm@5: (defn story-reviews [#^TagNode node] rlm@5: (Integer/parseInt rlm@5: (get rlm@5: (re-matches #".*Reviews: (\d+).*" rlm@5: (story-desc node)) 1 "0"))) rlm@5: rlm@5: (defn story-chapters [#^TagNode node] rlm@5: (Integer/parseInt rlm@5: (get rlm@5: (re-matches #".*Chapters: (\d+).*" rlm@5: (story-desc node)) 1 "1"))) rlm@5: rlm@5: (defn story-title [#^TagNode node] rlm@5: (str (.getText (first (tags-by-name node "a"))))) rlm@5: rlm@5: (defn all-stories [n #^URL start-url] rlm@5: (loop [stories [] rlm@5: index 0 rlm@5: current-url start-url] rlm@5: (let [stories* (concat stories (get-stories current-url)) rlm@5: next (next-fanfiction-url current-url)] rlm@5: (if (and (not= index n) rlm@5: (not (nil? next))) rlm@5: (do rlm@5: (recur stories* (inc index) next)) rlm@5: stories*)))) rlm@5: rlm@5: (defn best-stories [n search-limit #^URL start-url] rlm@5: (dorun rlm@5: (map rlm@5: #(println (story-title %) "\n" rlm@5: (story-url %) "\n" rlm@5: " Reviews: " (story-reviews %) "\n" rlm@5: " Chapters:" (story-chapters %)) rlm@5: rlm@5: (take n (reverse rlm@5: (sort-by rlm@5: #(/ (story-reviews %) (inc (story-chapters %))) rlm@5: ;;story-reviews rlm@5: (all-stories search-limit start-url))))))) rlm@5: rlm@5: rlm@5: