;;; sb-asahi.el --- shimbun backend for asahi.com -*- coding: iso-2022-7bit; -*- ;; Copyright (C) 2001, 2002, 2003, 2004, 2005 ;; Yuuichi Teranishi ;; Author: TSUCHIYA Masatoshi , ;; Yuuichi Teranishi , ;; Katsumi Yamaoka , ;; NOMIYA Masaru ;; Keywords: news ;; This file is a part of shimbun. ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2, or (at your option) ;; any later version. ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with this program; if not, you can either send email to this ;; program's maintainer or write to: The Free Software Foundation, ;; Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. ;;; Commentary: ;; Original code was nnshimbun.el written by ;; TSUCHIYA Masatoshi . ;;; Code: (require 'shimbun) (require 'sb-text) (luna-define-class shimbun-asahi (shimbun-japanese-newspaper shimbun-text) ()) (defvar shimbun-asahi-top-level-domain "asahi.com" "Name of the top level domain for the Asahi shimbun.") (defvar shimbun-asahi-url (concat "http://www." shimbun-asahi-top-level-domain "/") "Name of the parent url.") (defun shimbun-asahi-make-regexp (name) "Return a list of a regexp and numbers for the kansai.NAME group. Every `.' in NAME will be replaced with `/'." (list (let ((s0 "[\t\n  ]*") (s1 "[\t\n ]+") (no-nl "[^\n]+")) (concat "" s0 ;; 6. subject "\\(" no-nl "\\)" s0 "")) 1 nil 2 6 3 4 5)) (defvar shimbun-asahi-group-table (let* ((s0 "[\t\n  ]*") (s1 "[\t\n ]+") (no-nl "[^\n]+") (default (list (concat "" s0 ;; 5. subject "\\(" no-nl "\\)" s0 "") 1 4 nil 5 nil 2 3)) (default2 (shimbun-asahi-make-regexp "%s")) (default3 (list (concat "" s0 ;; 6. subject "\\(" no-nl "\\)" s0 "") 1 nil 2 6 3 4 5)) (edu (shimbun-asahi-make-regexp "edu.news")) (health (shimbun-asahi-make-regexp "health.news"))) `(("book" "出版ニュース" "book/news/" ,@(shimbun-asahi-make-regexp "book.news")) ("business" "ビジネス" "%s/list.html" ,@default) ;; The url should be ended with "index.html". ("business.column" "経済気象台" "business/column/index.html" ,@default2) ("car" "愛車" "%s/news/" ,@(shimbun-asahi-make-regexp "car.news")) ("car.italycolumn" "イタリア発アモーレ!モトーレ!" "car/italycolumn/" ,@default2) ("car.motorsports" "モータースポーツ" "car/motorsports/" ,@default2) ("car.newcar" "新車情報" "car/newcar/" ,@default2) ("car.newcarbywebcg" "新車発表会" "car/newcarbywebcg/" ,@default2) ("culture" "文化・芸能" "%s/list.html" ,@default) ("culture.column" "もやしのひげ" "culture/list_moyashi.html" ,@(shimbun-asahi-make-regexp "culture.column.moyashi")) ("digital" "デジタル機器" "digital/av/" ,@(shimbun-asahi-make-regexp "digital.av")) ("digital.apc" "雑誌「ASAHIパソコン」ニュース" "digital/apc/" ,@default2) ("digital.bcnnews" "eビジネス情報 (提供:BCN)" "digital/bcnnews/" ,@default2) ("digital.column01" "デジタルコラム" "digital/column01/" ,@default2) ("digital.hotwired" "HotWired Japan" "digital/hotwired/" ,@default2) ("digital.internet" "ネット・ウイルス" "digital/internet/" ,@default2) ("digital.mobile" "携帯電話" "digital/mobile/" ,@default2) ("digital.nikkanko" "日刊工業新聞ニュース" "digital/nikkanko/" ,@default2) ("digital.pc" "パソコン" "digital/pc/" ,@default2) ("editorial" "社説" "paper/editorial.html" ,(concat "]+>\\)*" s0 "([01]?[0-9]/[0-3]?[0-9])") (cdr rest)))) ("health" "健康・生活" "%s/news/" ,@health) ("health.aged" "福祉・高齢" "health/news/aged.html" ,@health) ("health.alz" "認知症特集" "health/news/alz.html" ,@health) ("health.medical" "医療・病気" "health/news/medical.html" ,@health) ("housing" "住まい" "%s/news/" ,@(shimbun-asahi-make-regexp "housing.news")) ("housing.amano" "天野彰のいい家いい家族" "housing/amano/" ,@default2) ("housing.column" "住まいのお役立ちコラム" "housing/column/" ,@default2) ("housing.diary" "小さな家の生活日記" "housing/diary/" ,@default2) ("housing.world" "世界のウチ" "housing/world/" ,@default2) ("igo" "囲碁" "%s/news/" ,@(shimbun-asahi-make-regexp "igo.news")) ("international" "国際" "%s/list.html" ,@default) ("international.jinmin" "人民日報" "international/jinmin/index.html" ,@default2) ("job" "就職・転職" "%s/news/" ,@(shimbun-asahi-make-regexp "job.news")) ("job.special" "週刊朝日・AERAから" "job/special/" ,(concat (car default2) "\\(" s0 "<[^>]+>\\)*" s0 "(" s0 ;; 8. extra "\\(" no-nl "\\)" ":") ,@(cdr default2) nil 8) ("kansai" "関西" "%s/news/" ,@(shimbun-asahi-make-regexp "kansai.news")) ("kansai.horiekenichi" "堀江謙一の世界一周ひとりぼっち" "kansai/horiekenichi/" ,@default2) ("kansai.umaimon" "うまいもん" "kansai/umaimon/" ,@default2) ("kansai.fuukei" "風景を歩く" "kansai/fuukei/" ,@default2) ("kansai.yotsuba" "よつ葉びより" "kansai/yotsuba/" ,@default2) ("kansai.smile" "スマイルスタイル" "kansai/smile/" ,@default2) ("kansai.keiki" "け〜きの“ええ話”" "kansai/keiki/" ,@default2) ("kansai.okiniiri" "DJのお気に入り" "kansai/okiniiri/" ,@default2) ("kansai.syun" "旬の顔" "kansai/syun/" ,@default2) ("kansai.takara" "たから図鑑" "kansai/takara/" ,@default2) ("kansai.kansaiisan" "勝手に関西世界遺産" "kansai/kansaiisan/" ,@default2) ("kansai.depa" "デパ地下NEWS" "kansai/depa/" ,@default2) ("kansai.kaban" "かばんの中身" "kansai/kaban/" ,@default2) ("kansai.kyosho" "巨匠に学べ" "kansai/kyosho/" ,@default2) ("kansai.okan" "母さんの知恵袋" "kansai/okan/" ,@default2) ("kansai.densetsu" "ほんま?関西伝説" "kansai/densetsu/" ,@default2) ("kansai.onayami" "みうらじゅんのお悩み祭り" "kansai/onayami/" ,@default2) ("kansai.sanshi" "三枝の笑ウインドウ" "kansai/sanshi/" ,@default2) ("life" "暮らし" "%s/list.html" ,@default) ("life.column" "暮らしコラム" "life/column/" ,(concat "" s0 ;; 6. subject "\\(" no-nl "\\)" s0 "") 1 nil 2 6 3 4 5) ("life.food" "食と料理" "life/food/" ,(concat "" s0 ;; 6. subject "\\(" no-nl "\\)" s0 "") 1 nil 2 6 3 4 5) ("nankyoku" "南極プロジェクト" "%s/news/" ,@(shimbun-asahi-make-regexp "nankyoku.news")) ("nankyoku.borderless" "国境のない大陸から" "nankyoku/borderless/" ,@default2) ("nankyoku.people" "越冬隊の人びと" "nankyoku/people/" ,@default2) ("nankyoku.whitemail" "WhiteMail@南極" "nankyoku/whitemail/" ,@default2) ("national" "社会" "%s/list.html" ,@default) ("national.calamity" "災害・交通情報" "national/calamity.html" ,@default3) ("national.etc" "その他・話題" "national/etc.html" ,@default3) ("national.trial" "裁判" "national/trial.html" ,@default3) ("obituaries" "おくやみ" "obituaries" ,@default) ("politics" "政治" "%s/list.html" ,@default) ("rss" "RSS" "http://www3.asahi.com/rss/index.rdf" ,(concat "" ;; 1. subject "\\([^<]+\\)" "\n" ;; 2. url "\\(http://www\\.asahi\\.com/" ;; 3. extra keyword (en) "\\([^/]+\\)" "/update/" ;; 4 and 5. serial number "\\([0-9]+\\)/\\([0-9]+\\)" "\\.html\\?ref=rss\\)" "\n\n" ;; 6. extra keyword (ja) "\\([^<]+\\)" "\n20[0-9][0-9]-" ;; 7. month "\\([01][0-9]\\)" "-" ;; 8. day. "\\([0-3][0-9]\\)" "T" ;; 9. hour:min:sec "\\([012][0-9]:[0-5][0-9]:[0-5][0-9]\\)") 2 4 5 1 nil 7 8 9 3 nil 6) ("science" "サイエンス" "%s/list.html" ,@(shimbun-asahi-make-regexp "science.news")) ("shopping" "ショッピング" "%s/news/" ,@(shimbun-asahi-make-regexp "shopping.news")) ("shopping.kishi" "岸朝子の気になるお取り寄せ12カ月" "shopping/kishi/" ,@default2) ("shopping.ryouhin" "くらしの良品探訪" "shopping/ryouhin/" ,@default2) ("shougi" "将棋" "%s/news/" ,@(shimbun-asahi-make-regexp "shougi.news")) ("sports" "スポーツ" "%s/list.html" ,@default) ("sports.baseball" "野球" "sports/bb/" ,@(shimbun-asahi-make-regexp "sports.bb")) ("sports.column" "スポーツコラム" "sports/column/" ,@default2) ("sports.football" "サッカー" "sports/fb/" ,@(shimbun-asahi-make-regexp "sports.fb")) ("sports.spo" "一般スポーツ" "sports/spo/" ,@default2) ("tenjin" "天声人語" "paper/column.html" ,(concat "\ \\|") (defvar shimbun-asahi-content-end "\ \\|") (defvar shimbun-asahi-x-face-alist '(("default" . "X-Face: +Oh!C!EFfmR$+Zw{dwWW]1e_>S0rnNCA*CX|\ bIy3rr^gW5)Q]N{Mmn\n L]suPpL|gFjV{S|]a-:)\\FR\ 7GRf9uL:ue5_=;h{V%@()={uTd@l?eXBppF%`6W%;h`#]2q+f*81n$B\n h|t"))) (defvar shimbun-asahi-expiration-days 6) (luna-define-method initialize-instance :after ((shimbun shimbun-asahi) &rest init-args) (shimbun-set-server-name-internal shimbun "朝日新聞") (shimbun-set-from-address-internal shimbun "nobody@example.com") ;; To share class variables between `shimbun-asahi' and its ;; successor `shimbun-asahi-html'. (shimbun-set-x-face-alist-internal shimbun shimbun-asahi-x-face-alist) (shimbun-set-expiration-days-internal shimbun shimbun-asahi-expiration-days) shimbun) (luna-define-method shimbun-groups ((shimbun shimbun-asahi)) (mapcar 'car shimbun-asahi-group-table)) (luna-define-method shimbun-current-group-name ((shimbun shimbun-asahi)) (nth 1 (assoc (shimbun-current-group-internal shimbun) shimbun-asahi-group-table))) (luna-define-method shimbun-index-url ((shimbun shimbun-asahi)) (let* ((group (shimbun-current-group-internal shimbun)) (index (nth 2 (assoc group shimbun-asahi-group-table)))) (if (string-match "\\`http:" index) index (concat shimbun-asahi-url (format index group))))) (defun shimbun-asahi-get-headers (shimbun) "Return a list of headers." (let ((group (shimbun-current-group-internal shimbun)) (from (concat (shimbun-server-name shimbun) " (" (shimbun-current-group-name shimbun) ")")) (case-fold-search t) regexp jname numbers cyear cmonth rss-p paper-p en-category hour-min month year day serial num extra rgroup id headers backnumbers) (setq regexp (assoc group shimbun-asahi-group-table) jname (nth 1 regexp) numbers (nthcdr 4 regexp) regexp (format (nth 3 regexp) (regexp-quote (shimbun-subst-char-in-string ?. ?/ group))) cyear (decode-time) cmonth (nth 4 cyear) cyear (nth 5 cyear) rss-p (string-equal group "rss") paper-p (member group '("editorial" "tenjin"))) (catch 'stop ;; The loop for fetching all the articles in the whitemail group. (while t (while (re-search-forward regexp nil t) (cond ((string-equal group "english") (setq en-category (save-excursion (save-match-data (if (re-search-backward "\ ]+\\)?>[\t\n ]*\\([^&]+\\)[\t\n ]*&#[0-9]+" nil t) (downcase (match-string 2))))))) (t (setq hour-min (save-excursion (save-match-data (if (re-search-forward "\ ]+>[\t\n ]*(\\([01]?[0-9]/[0-3]?[0-9][\t\n ]+\\)? \\([012]?[0-9]:[0-5][0-9]\\))[\t\n ]*" nil t) (match-string 2))))))) (setq month (string-to-number (match-string (nth 5 numbers))) year (if (setq num (nth 4 numbers)) (string-to-number (match-string num)) (cond ((>= (- month cmonth) 2) (1- cyear)) ((and (= 1 month) (= 12 cmonth)) (1+ cyear)) (t cyear))) day (string-to-number (match-string (nth 6 numbers))) serial (cond (rss-p (format "%d%s.%s" year (match-string (nth 1 numbers)) (match-string (nth 2 numbers)))) (paper-p (format "%d%02d%02d" year month day)) ((and (setq num (nth 1 numbers)) (match-beginning num)) (format "%d%02d%02d.%s" year month day (match-string num))) (t (shimbun-subst-char-in-string ?/ ?. (downcase (match-string (nth 2 numbers)))))) extra (or (and (setq num (nth 8 numbers)) (match-beginning num) (match-string num)) (and (setq num (nth 9 numbers)) (match-beginning num) (match-string num))) rgroup (mapconcat 'identity (nreverse (save-match-data (split-string group "\\."))) ".") id (if (and extra (not (member group '("job.special")))) (concat "<" serial "%" extra "." rgroup "." shimbun-asahi-top-level-domain ">") (concat "<" serial "%" rgroup "." shimbun-asahi-top-level-domain ">"))) (unless (and (shimbun-search-id shimbun id) (if backnumbers (throw 'stop nil) ;; Don't stop it since there might be more new ;; articles even if the same links are repeated. t)) (push (shimbun-create-header ;; number 0 ;; subject (cond (rss-p (match-string (nth 3 numbers))) (en-category (concat "[" en-category "] " (match-string (nth 3 numbers)))) ((and (setq num (nth 8 numbers)) (match-beginning num)) (concat "[" (match-string num) "] " (match-string (nth 3 numbers)))) ((and (setq num (nth 9 numbers)) (match-beginning num)) (concat "[" (match-string num) "] " (match-string (nth 3 numbers)))) (paper-p (concat jname (format " (%d/%d)" month day))) (t (match-string (nth 3 numbers)))) ;; from (if (and rss-p (setq num (nth 10 numbers)) (setq num (match-string num))) (save-match-data (shimbun-replace-in-string from "(RSS" (concat "\\&:" num))) from) ;; date (shimbun-make-date-string year month day (cond ((and (setq num (nth 7 numbers)) (match-beginning num)) (match-string num)) (paper-p "07:00") (t hour-min))) ;; id id ;; references, chars, lines "" 0 0 ;; xref (shimbun-expand-url (match-string (nth 0 numbers)) (if paper-p (concat shimbun-asahi-url "paper/") shimbun-asahi-url))) headers))) (if (string-equal group "nankyoku.whitemail") (progn (cond ((eq backnumbers 'stop) (throw 'stop nil)) ((null backnumbers) (while (re-search-forward "" nil t) (unless (member (setq id (match-string 1)) backnumbers) (push id backnumbers))))) (if backnumbers (progn (shimbun-retrieve-url (prog1 (car backnumbers) (erase-buffer) (unless (setq backnumbers (cdr backnumbers)) (setq backnumbers 'stop))))) (throw 'stop nil))) (throw 'stop nil)))) (append (shimbun-sort-headers headers) (shimbun-asahi-get-headers-for-today group jname from)))) (luna-define-method shimbun-get-headers ((shimbun shimbun-asahi) &optional range) (shimbun-asahi-get-headers shimbun)) (defun shimbun-asahi-get-headers-for-today (group jname from) "Return a list of the header for today's article. It works for only the groups `editorial' and `tenjin'." (goto-char (point-min)) (let ((basename (cdr (assoc group '(("editorial" . "editorial") ("tenjin" . "column"))))) year month day url) (when (and basename (re-search-forward (concat ;; 1. year "\\(20[0-9][0-9]\\)" "年" ;; 2. month "\\([01]?[0-9]\\)" "月" ;; 3. day "\\([0-3]?[0-9]\\)" "日" "(.曜日)付") nil t)) (setq year (string-to-number (match-string 1)) month (string-to-number (match-string 2)) day (string-to-number (match-string 3)) url (format "paper/%s%d%02d%02d.html" basename year month day)) (list (shimbun-make-header ;; number 0 ;; subject (shimbun-mime-encode-string (concat jname (format " (%d/%d)" month day))) ;; from from ;; date (shimbun-make-date-string year month day "07:00") ;; id (format "<%d%02d%02d%%%s.%s>" year month day group shimbun-asahi-top-level-domain) ;; references, chars, lines "" 0 0 ;; xref (shimbun-expand-url url shimbun-asahi-url)))))) (defun shimbun-asahi-prepare-article (shimbun header) "Prepare an article. Extract the article core on some groups or adjust a date header if there is a correct information available. For the groups editorial and tenjin, it tries to fetch the article for that day if it failed." (let ((case-fold-search t) (group (shimbun-current-group-internal shimbun))) (cond ((string-equal group "editorial") (let ((regexp "\ ]+\\)?>[\t\n ]*") (retry 0) index) (while (<= retry 1) (if (re-search-forward regexp nil t) (progn (goto-char (match-beginning 0)) (insert "") (when index (insert "\ \n

(指定された url が まだ/すでに 無いので、\ トップページ から記事を取得しました)

\n")) (search-forward "" nil t) (while (re-search-forward regexp nil t)) (when (re-search-forward "[\n\t ]*

" nil t) (insert "\n")) (setq retry 255)) (erase-buffer) (if (zerop retry) (progn (shimbun-retrieve-url (setq index (shimbun-index-url shimbun))) (goto-char (point-min))) (insert "Couldn't retrieve the page.\n"))) (setq retry (1+ retry))))) ((string-equal group "tenjin") (let ((retry 0) index) (while (<= retry 1) (if (and (search-forward "【天声人語】" nil t) (re-search-forward "]+>[\t\n ]*" nil t)) (progn (insert "") (when index (insert "\ \n

(指定された url が まだ/すでに 無いので、\ トップページ から記事を取得しました)

\n")) (while (re-search-forward "[\t\n ]*]+>[\t\n ]*" nil t) (delete-region (match-beginning 0) (match-end 0))) (when (re-search-forward "[\t\n ]*" nil t) (goto-char (match-beginning 0)) (insert "\n")) (setq retry 255)) (erase-buffer) (if (zerop retry) (progn (shimbun-retrieve-url (setq index (shimbun-index-url shimbun))) (goto-char (point-min))) (insert "Couldn't retrieve the page.\n"))) (setq retry (1+ retry))))) (t (when (re-search-forward (eval-when-compile (let ((s0 "[\t\n ]*") (s1 "[\t\n ]+")) (concat "" s0 ;; 1. year "\\(20[0-9][0-9]\\)年" ;; 2. month "\\([01]?[0-9]\\)月" ;; 3. day "\\([0-3]?[0-9]\\)日" ;; 4. hour "\\([012]?[0-9]\\)時" ;; 5. minute "\\([0-5]?[0-9]\\)分" s0 "

"))) nil t) (shimbun-header-set-date header (shimbun-make-date-string (string-to-number (match-string 1)) (string-to-number (match-string 2)) (string-to-number (match-string 3)) (concat (match-string 4) ":" (match-string 5)) "+0900")))))) (goto-char (point-min))) (luna-define-method shimbun-make-contents :before ((shimbun shimbun-asahi) header) (shimbun-asahi-prepare-article shimbun header)) (provide 'sb-asahi) ;;; sb-asahi.el ends here