sitescooper_archive.site:
# This is a sitescooper site file. see # by Stefan Schwingeler, Version 0.1, 09.11.1999# Thanks Stefan!#URL: Name: Sitescooper Archive Levels: 2 ContentsStart: <font.*>date</font> ContentsEnd: </table> ContentsCachable: 0 StoryURL: http://groups.yahoo.com/group/sitescooper-archive/\d+\.html\? StoryStart: Subject: StoryEnd: alt="Previous" StoryCacheable: 1# rm center StoryPostProcess: { s/v?align=center//gim; }
sitescooper_changes.site:
URL: http://sitescooper.org/devel/LATEST_CHANGES.html Name: Sitescooper Latest ChangesDescription: the Sitescooper development change logLevels: 1StoryDiff: 1UseTableSmarts: 0TableRender: flatten
bsdtoday.site:
URL: Name: BSD TodayDescription: Your Daily Source for BSD News and InformationLevels: 2UseTableSmarts: 0TableRender: flattenContentsStart: <img src="/images/black.gif" width="1" height="550">ContentsEnd: <b>Resources</b><br>StoryURL: http://www.bsdtoday.com/\d+/.*\d+.htmlStoryStart: <img src="images/black.gif" width="1" height="550">StoryEnd: <b>Please share your comments.</b>
openbsd_journal.site:
URL: Name: OpenBSD JournalLevels: 2AuthorName: Barry Dexter A. GonzagaAuthorEMail: barryg-sitescooper /at/ kssp.upd.edu.phStoryURL: .*action=article.*StoryToPrintableSub: s/(sid=\d+$)/\1\&mode=flat/ContentsStart: About :ContentsEnd: <b>Features</b>
oreillynet_bsd.site:
URL: Name: O'Reilly Net BSDLevels: 2ContentsStart: -- BSD Lede --ContentsEnd: -- digest --StoryURL: /pub/a/bsd/[[YYYY]]/\d+/\d+/\S+.html(|\?page=\d+)StoryStart: -- content here --StoryEnd: -- footer area --StoryFollowLinks: 1
businessweek.site:
URL: Name: BusinessWeek OnlineLevels: 3AuthorName: Barry Dexter A. GonzagaAuthorEMail: barryg-sitescooper /at/ kssp.upd.edu.phStoryURL: .*\.(htm|html)StoryURL: /list/.*\.htmStoryURL: /.*/.*/.*/.*\.htmStorySkipURL: /ads/contents.htmImageURL: /common_images/.*\.gif
cnn_financial.site:
# CNN FinancialURL: # created from PODS file by David A. DesrosiersAuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr>Name: CNN FinancialLevels: 2ImageURL: .*\.gifImageScaleToMaxWidth: 150ContentsCachable: 0StoryURL: http://wireless.cnn.com/avantgo/CNNMONEY/en/stories/.*StoryCachable: 1
fuckedcompany.site:
# AuthorName: jm## I love this site, just for the author's pure schadenfreude!#URL: Name: Fucked CompanyDescription: the dot-com deadpoolStoryStart: <img src="images/recent_groove.gif" width=402 height=2></td>StoryEnd: <td> <a href="archives">View more headlines</a></td>
industry_week.site:
URL: Name: Industry WeekLevels: 2ContentsPrint: 1ImageURL: http://.*## This site was converted from an AvantGo .subs file by subs-to-site.pl.# See http://sitescooper.org/ for more information on sitescooper.
motley-fool.site:
URL: Name: The Motley FoolDescription: To Educate, Amuse, and EnrichContentsFormat: rssStoryURL: /.*\.htmStoryEnd: <A NAME="NUMBERS">StoryStart: <BODY # as dictated in Rights: Copyright 1996-2000 The Motley Fool. All rights reserved.MinPages: 2
the_economist.site:
URL: http://www.economist.com/index.html?nonNA=1Name: EconomistDescription: EconomistAuthorName: Goh Boon Nam# Version 1.2# Date updated : 30 Dec 2004# Changes made : Change of URL + Remove Subscription-only pages which cause problem to PluckerLevels: 2ContentsStart: <td colspan="7" width="447" valign="top">ContentsEnd: Only one answer is correctContentsUseTableSmarts: 0StoryToPrintableSub: s!displayStory.cfm!PrinterFriendly.cfm!StoryURL: http://www.economist.com/(.*?)/PrinterFriendly.cfm(.*?)#This image is the icon to indicate story not availableImageURL: ContentsHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; }StoryHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/<div id="wholepage" style="visibility: hidden">(.*?)<\/noscript>//gis; }
lazarus_at_large.site:
# site_samples/business/lazarus_at_large.site## SF Chronicle Columnists : David Lazarus, "Lazarus at Large"# by Akkana PeckURL: http://sfgate.com/cgi-bin/search/columnists.cgi?waisdbname=/chronicle/&byline=David+LazarusName: Lazarus at LargeLevels: 2ContentsStart: <INPUT TYPE="submit" VALUE="View Archive">ContentsDiff: 1StoryURL: http://sfgate.com/cgi-bin/article.cgi.*StoryStart: <!-- end #additionalcontent -->StoryEnd: <!-- END STORY -->
darkhorizons.site:
# Author: MMiller /at/ media-general.comURL: Name: Dark Horizons Levels: 1 ContentsStart: UPDATE: ContentsEnd: HR WIDTH=40% StoryCacheable: 0 ContentsDiff: 0
ebert_1min.site:
# roger_ebert_1min.site# AuthorName: Alan Hoyle <alan@alanhoyle.com>## Reads the Roger Ebert One Minute Movie ReviewsURL: Name: Roger Ebert One-Minute ReviewsDescription: Roger Ebert's One-Minute Movie ReviewsLevels: 2Category: DailyContentsStart: Begin ContentContentsEnd: End ContentStoryURL: .*ebert_reviews/.*\.htmlStoryCacheable: 1StoryHeadline: <h2>(.*)</h2>StoryStart: Begin ReviewStoryEnd: End ContentRights: Copyright © Chicago Sun-Times Inc.
ebert_answer_man.site:
# ebert_answer_man.site# Roger Ebert's Movie Answer Man weekly Q&A columnURL: Name: Roger Ebert: Movie Answer ManDescription: Roger Ebert's Movie Answer Man weekly Q&A columnLevels: 2ContentsStart: <!-- Begin Content -->StoryURL: .*answ-man/.*\.htmlStoryCacheable: 1StoryHeadline: <h2>(.*)</h2>StoryStart: <!-- Begin Content --> StoryEnd: <!-- End Content -->
ebert_features.site:
# ebert_features.site# Roger Ebert's Movie Feature ArticlesURL: Name: Roger Ebert: Interviews-essays-festivalsDescription: Roger Ebert's movie feature articlesLevels: 2ContentsStart: <!-- Begin Content -->StoryURL: .*eb-feature/.*\.htmlStoryCacheable: 1StoryHeadline: <h2>(.*)</h2>StoryStart: <!-- Begin Content --> StoryEnd: <!-- End Content -->
ebert_great_movies.site:
# ebert_great_movies.site# Roger Ebert's "The Great Movies"URL: Name: Roger Ebert: The Great MoviesDescription: Roger Ebert's regular "The Great Movies" featureLevels: 2ContentsStart: <!-- Begin Content -->ContentsDiff: 1StoryURL: .*ebert/greatmovies/.*\.htmlStoryCacheable: 1StoryHeadline: <h[12]>(.*)</h[12]>StoryStart: <!-- Begin Content --> StoryEnd: <!-- End Content -->
filthy_critic.site:
URL: Name: The Filthy CriticLevels: 1StoryStart: <TD WIDTH="440" VALIGN="TOP">StoryEnd: </HTML>
imdb_studio_briefing.site:
# IMDB.com Movie/TV news# Author: Jan Lund Thomsen <kwed@kwed.org>URL: Name: IMDB Movie/TV newsLevels: 1AuthorName: Jan Lund ThomsenAuthorEmail: kwed@kwed.orgStoryStart: <!-studiodate -->StoryEnd: <A HREF="mailto:studiobrf@aol.com">Studio Briefing</A> Edited by <A HREF="http://members.aol.com/studiobrf/lewirwin/lewsbio.html">Lew Irwin</A>
roger_ebert.site:
# roger_ebert.site# AuthorName: Justin Henry <jhenry@fjicl.com># Modified: Alan Hoyle <alan /at/ alanhoyle.com>## Modified to read the Ebert review index page, and to deal# with a new SunTimes page format # Modified to exclude extraneous bottom of page stuff.URL: Name: Roger Ebert ReviewsDescription: Roger Ebert's Movie ReviewsLevels: 2Category: DailyContentsStart: <!-- Begin Content -->ContentsEnd: End ContentStoryURL: .*ebert1/.*\.htmlStoryCacheable: 1StoryHeadline: <h[12]>(.*)</h[12]>StoryStart: <!-- Begin Content -->StoryEnd: End Content
variety.site:
URL: Name: Variety.ComLevels: 2ContentsPrint: 1ImageURL: http://.*## This site was converted from an AvantGo .subs file by subs-to-site.pl.# See http://sitescooper.org/ for more information on sitescooper.
apartment_3g.site:
URL: Name: Apartment 3-GStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Apartment_3-G.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
baby_blues.site:
URL: Name: Baby BluesStoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Baby_Blues.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
barney_google_and_snuffy_smith.site:
URL: Name: Barney Google and Snuffy SmithStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Barney_Google.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
beetle_bailey.site:
URL: AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr>Name: Beetle BaileyStoryStart: <!--CMS NAME="image"-->StoryEnd: by <!--CMS NAME="author"ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Beetle_Bailey.*ImageScaleToMaxWidth: 500
better_half.site:
URL: Name: The Better HalfStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Better_Half.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
between_friends.site:
URL: Name: Between FriendsStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Between_Friends.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
blondie.site:
URL: Name: BlondieStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Blondie.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
boondocks.site:
URL: AuthorName: Ignatz Sol [iggy /at/ mechanolatry.com] Name: Boondocks StoryStart: <!--- comics view content ---> StoryEnd: <!--calendar--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://images.ucomics.com/comics/bo/200\d/bo.* #ImageScaleToMaxWidth: 450 UseTableSmarts: 0
buckles.site:
URL: Name: BucklesStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Buckles.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
calvin_and_hobbes.site:
URL: AuthorName: Marko Bozikovic <redbyron /at/ fly.srk.fer.hr> modified by Gary Paulson Name: Calvin and Hobbes StoryStart: <!-- end comic nav --> # ?did not work? StoryStart: \gtimg src=" StoryEnd: <!--calendar--> ImageOnlySite: 1 ImageURL: .*/ch/\d\d\d\d/ch.*\.gif ImageScaleToMaxWidth: 550 StoryHTMLPreProcess: { s!<a href..http.//www.ucomics.com/shopping/buycomic.cfm.uc_fn=1.uc_full_date=\d+?.uc_daction.X.uc_comic=ch.>!!gsi; }
crock.site:
URL: Name: CrockStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Crock.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
curtis.site:
URL: Name: CurtisStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Curtis.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
dennis_the_menace.site:
URL: AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr>Name: Dennis the MenaceStoryStart: <!--CMS NAME="image"-->StoryEnd: by <!--CMS NAME="author"ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Dennis_The_Menace.*ImageScaleToMaxWidth: 500
dilbert.site:
URL: AuthorName: Kevin L. Dupree <kdupree /at/ flash.net> Name: Dilbert StoryStart: COMIC STRIP BEGIN StoryEnd: COMIC STRIP END StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://www.dilbert.com/comics/dilbert/archive/images/.* ImageScaleToMaxWidth: 450 UseTableSmarts: 0 # add size info so sitescooper knows to make it into a # link for Plucker. StoryHTMLPreProcess: { s/ALT="Today.s Dilbert Comic"/ ALT="Today.s Dilbert Comic" WIDTH=600 HEIGHT=211 /gs; }
dinette_set.site:
URL: Name: The Dinette SetStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Dinette_Set.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
doonesbury.site:
URL: AuthorName: Ignatz Sol [iggy /at/ mechanolatry.com] Name: Doonesbury StoryStart: no next date StoryEnd: dose_feature_menu4_01.gif StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://images.ucomics.com/comics/db/200\d/db.* #ImageScaleToMaxWidth: 500 UseTableSmarts: 0
edge_city.site:
URL: Name: Edge CityStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Edge_City.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
family_circus.site:
URL: Name: Family CircusStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://www.kingfeatures.com/features/comics/familyc/fct.*\.gifImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ rustymail.com
flash_gordon.site:
URL: Name: Flash GordonStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://www.kingfeatures.com/features/comics/fgordon/fg.*\.gifImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
funky_winkerbean.site:
URL: Name: Funky WinkerbeanStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Funky_Winkerbean.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
grin_and_bear_it.site:
URL: Name: Grin and Bear ItStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Grin_and_Bear_It.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
hagar_the_horrible.site:
URL: AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr>Name: Hagar the HorribleStoryStart: <!--CMS NAME="image"-->StoryEnd: by <!--CMS NAME="author"ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Hagar_The_Horrible.*ImageScaleToMaxWidth: 450
hazel.site:
URL: Name: HazelStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://www.kingfeatures.com/features/comics/hazel/hat.*\.gifImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
henry.site:
URL: Name: HenryStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://www.kingfeatures.com/features/comics/henry/het.*\.gifImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
hi_and_lois.site:
URL: Name: Hi and LoisStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Hi_and_Lois.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
judge_parker.site:
URL: Name: Judge ParkerStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Judge_Parker.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
katzenjammer_kids.site:
URL: Name: The Katzenjammer KidsStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://www.kingfeatures.com/features/comics/katzkids/kk.*\.jpgImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
lockhorns.site:
URL: Name: The LockhornsStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Lockhorns.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
mallard_fillmore.site:
URL: Name: Mallard FillmoreStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Mallard_Fillmore.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
mandrake_the_magician.site:
URL: Name: Mandrake the MagicianStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://www.kingfeatures.com/features/comics/mandrake/mmt.*\.gifImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
mark_trail.site:
URL: Name: Mark TrailStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Mark_Trail.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
marvin.site:
URL: Name: MarvinStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Marvin.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
mary_worth.site:
URL: Name: Mary WorthStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Mary_Worth.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
moose_and_molly.site:
URL: Name: Moose and MollyStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://www.kingfeatures.com/features/comics/moosemol/mot.*\.gifImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
mutts.site:
URL: Name: MuttsStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Mutts.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
norm.site:
URL: Name: The NormStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Norm.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
on_the_fastrack.site:
URL: Name: On The FastrackStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Fast_Track.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
phantom.site:
URL: Name: The PhantomStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Phantom.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
piranha_club.site:
URL: Name: The Piranha ClubStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Piranha.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
popeye.site:
URL: AuthorName: Marko Bozikovic <redbyron /at/ fly.srk.fer.hr>Name: PopeyeStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Popeye.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui Thean (revision)AuthorEmail: yoonfui /at/ bigfoot.com
prince_valiant.site:
URL: Name: Prince ValiantStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://www.kingfeatures.com/features/comics/pvaliant/val.*\.gifImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
redeye.site:
URL: Name: RedeyeStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Redeye.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
rex_morgan_md.site:
URL: Name: Rex Morgan M.D.StoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Rex_Morgan.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
rhymes_with_orange.site:
URL: Name: Rhymes With OrangeStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Rhymes_with_Orange.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
safe_havens.site:
URL: Name: Safe HavensStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Safe_Havens.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
sally_forth.site:
URL: Name: Sally ForthStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Sally_Forth.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
sam_and_silo.site:
URL: Name: Sam and SiloStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://www.kingfeatures.com/features/comics/sam_silo/sst.*\.gifImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
shermans_lagoon.site:
URL: Name: Sherman's LagoonStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Shermans_Lagoon.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
six_chix.site:
URL: Name: Six ChixStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/6Chix.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
slylock_fox.site:
URL: Name: Slylock FoxStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Slylock.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
spiderman.site:
URL: AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr>Name: The Amazing SpidermanStoryStart: <!--CMS NAME="image"-->StoryEnd: by <!--CMS NAME="author"ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Spiderman.*ImageScaleToMaxWidth: 500
steve_roper_and_mike_nomad.site:
URL: Name: Steve Roper and Mike NomadStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Steve_Roper.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
tedrall.site:
URL: AuthorName: Ignatz Sol [iggy /at/ mechanolatry.com] Name: Ted Rall StoryStart: no next date StoryEnd: Get Ted Rall by e-mail StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://images.ucomics.com/comics/trall/200\d/tr.* #ImageScaleToMaxWidth: 450 UseTableSmarts: 0
theyll_do_it_every_time.site:
URL: Name: They'll Do It Every TimeStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/TDIE.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
thismodernworld.site:
URL: Name: This Modern World Description: This Modern World by Tom Tomorrow Levels: 1 StoryDiff: 1 # thx to Adrian Colley <aecolley AT spamcop net>
tiger.site:
URL: Name: TigerStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Tiger.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
trudy.site:
URL: Name: TrudyStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://www.kingfeatures.com/features/comics/trudy/trt.*\.gifImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
tumbleweeds.site:
URL: Name: TumbleweedsStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Tumbleweeds.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
user_friendly.site:
URL: AuthorName: Kevin L. Dupree <kdupree /at/ flash.net> Name: User Friendly StoryStart: <!--Start Current Strip--> StoryEnd: <!--End Strip--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: /cartoons/archives/.*\.gif ImageScaleToMaxWidth: 550
zippy_the_pinhead.site:
URL: Name: Zippy The PinheadStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Zippy_the_Pinhead.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
zits.site:
URL: Name: ZitsStoryStart: <!--CMS NAME="image"-->StoryEnd: <!--/CMS-->StoryDiff: 1ImageOnlySite: 1ImageURL: http://est.rbma.com/content/Zits.*ImageScaleToMaxWidth: 500AuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
world_new_york.site:
URL: Name: World New YorkDescription: Links to, and extracts from, quality writing on the webLevels: 2ContentsStart: <!-- Weblog entries -->ContentsEnd: !-- Link to RSS Syndication page -->StoryURL: http://www.worldnewyork.net/comments.php\?id=\S+StoryStart: <div class="lgbody"><p>StoryEnd: <h4>COMMENTS</h4># site file author detailsAuthorName: Justin MasonAuthorEmail: jm@jmason.org# This site gets bonus points for linking to the palm version as the# "AvantGo/SiteScooper/Palm Version" in its early days ;)
oracularities.site:
URL: Name: The Internet OracleLevels: 2ContentsStart: <body>ContentsEnd: </body>StoryURL: http://www.cs.indiana.edu/hyplan/oracle/digests/.*\.htmlStoryStart: <body>StoryEnd: </body>StoryHTMLPreProcess: { s/<form/<ignore/g; s/<\/form>/<\/ignore>/g;}MinPages: 2
wingmail.site:
# From: artwells <artwells@artwells.com>URL: http://www.artwells.com/oracula/web/serve-wing.php?wingrequest=[[YYYY]][[MM]][[DD]]Name: Wingmail DailyLevels: 1
gamasutra_features.site:
URL: Name: GamaSutra Levels: 2 ContentsStart: .BeginEditable "content" ContentsEnd: -- .BeginLibraryItem "/Library/.*_footer.lbi" -- StoryStart: -- .BeginEditable "main.20content" StoryEnd: -- .BeginLibraryItem "/Library/.*_footer.lbi" -- # We only read linked stories for features, not for newswire items. # ah shaggit, let's get the newswires too. StoryURL: .*(features|newswire)/.*\.htm.* # Need to follow links into other story pages StoryFollowLinks: 1 StoryHeadline: <title>Gamasutra - \S+ - (.*?) \[.*?\]\s*</title>
gamedev_net.site:
URL: Name: GameDev.netDescription: Maximum Game Development!ContentsFormat: rssStoryURL: /info/news/fullstory.asp.*
happypenguin.site:
URL: Name: Linux Game TomeDescription: The latest Linux game newsLevels: 2ContentsStart: <form method="GET" action="http://happypenguin.org/news">ContentsEnd: <a href="happypenguin.org/news?start=10">StoryURL: http://.*happypenguin.org/show.*StoryStart: <tr bgcolor=#000080><td width="20" valign=top align=left><img src="http://happypenguin.org/images/tl.gif" width=20 height=20 alt=""></td>StoryEnd: </HTML>ContentsUseTableSmarts: 0StoryUseTableSmarts: 0TableRender: flatten
oldmanmurray.site:
URL: Name: Old Man MurrayDescription: Game news and reviews with a thoroughly nasty flavourTableRender: flattenLevels: 2ContentsStart: Make sure to check to the left for all the latest on OldManMurray.com</SMALL></TD>StoryURL: http://www.oldmanmurray.com/(features|shortreviews|longreviews|seanbaby)/.*html.*StoryURL: StoryFollowLinks: 1StoryStart: src="http://www.oldmanmurray.com/logoimages/ugologo\S+.gif"
bofh-2k+1.site:
URL: Name: 2001: A BOFH Odyssey Description: Bastard Operator From Hell: 2001 Edition AuthorName: Barry Dexter A. Gonzaga AuthorEmail: barryg /at/ kssp.upd.edu.ph Levels: 2 StoryURL: /content/archive/\d+\.html ContentsStart: <HR> ContentsEnd: <BR></DIV>.<DIV><IMG.SRC= StoryStart: <HR> StoryEnd: <BR></DIV>.<DIV><IMG.SRC= StoryHTMLPreProcess: { s/<DIV CLASS=.storyhead.>(.*?)<\/DIV>/<H2>$1<\/H2>/is; s/<DIV CLASS=.storybyline.>(.*?)<\/DIV>/<H3>$1<\/H3>/is; s/<DIV CLASS=.indexposted.>(.*?)<\/DIV>/<H3>$1<\/H3>/is; s/<DIV CLASS=.storybody.><b>(.*?)<\/b>/<H4>$1<\/H4>/is; s/<br>.<br>(.*?)<br>.<br>/<\/p><p>$1<\/p><p>/gs; } StoryPostProcess: { s/<b><b>//is; s/<i><i>//is; s/<\/H4>.<\/p>/<\/H4>/is; }
bofh-2k.site:
URL: Name: BOFH 2K: The Kit and caboodle Description: Bastard Operator From Hell: 2000 Edition AuthorName: Barry Dexter A. Gonzaga AuthorEmail: barryg /at/ kssp.upd.edu.ph Levels: 2 StoryURL: /content/\d+/\d+\.html ContentsStart: <HR> ContentsEnd: <BR></DIV>.<DIV><IMG.SRC= StoryStart: <HR> StoryEnd: <BR></DIV>.<DIV><IMG.SRC= StoryHTMLPreProcess: { s/<DIV CLASS=.storyhead.>(.*?)<\/DIV>/<H2>$1<\/H2>/is; s/<DIV CLASS=.storybyline.>(.*?)<\/DIV>/<H3>$1<\/H3>/is; s/<DIV CLASS=.indexposted.>(.*?)<\/DIV>/<H3>$1<\/H3>/is; s/<DIV CLASS=.storybody.><b>(.*?)<\/b>/<H4>$1<\/H4>/is; s/<br>.<br>(.*?)<br>.<br>/<\/p><p>$1<\/p><p>/gs; } StoryPostProcess: { s/<b><b>//is; s/<i><i>//is; s/<\/H4>.<\/p>/<\/H4>/is; }
bofh.site:
# Bastard Operator from HellURL: Name: BOFH Levels: 2 StoryURL: /content/\d+/\d+\.html StoryCacheable: 1 MinPages: 2 StoryUseTableSmarts: 0 ContentsUseTableSmarts: 0 ContentsStart: <IFRAME SRC=. ContentsEnd: <TD WIDTH="150" ALIGN="right" VALIGN="top"> StoryHTMLPreProcess: { s/<DIV CLASS=.story_head.>(.*?)<\/DIV>/<H2 CLASS='story_head'>$1<\/H2>/is; s/<br>.<br><B>Related (?:[sS]tory|[sS]tories|[lL]ink|[lL]inks)<\/B>.*\Z//s; s/<br>+/<br>/i; s/<br><p>(?:<br>)*/<p>/i; } MinPages: 2AuthorName: Robert Edmonds <stu@brainfood.com>
bofh_archive.site:
# Bastard Operator from Hell official archiveURL: AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr>Name: Bastard Operator from HellLevels: 2StoryURL: .*\.htmlStoryCachable: 1
dave_barry.site:
URL: Name: Dave BarryDescription: Dave Barry's column for the Miami HeraldAuthorName: (update) Alan Hoyle <alan /at/ alanhoyle.com>Levels: 2ContentsStart: <td class="smalltitle" nowrap="nowrap">LATEST COLUMNContentsEnd: rightrail#StoryURL: .*/dave_barry/.*\.htm #StoryURL: .*/gift_guide/.*\.htm StoryURL: .*\.htmStoryStart: begin body-contentStoryEnd: end body-contentStoryHeadline: <h1>(.*?)</h1>ContentsHTMLPreProcess: { s/(<td><hr size=\"1\" color=\"\#cccccc\" width=\"98\%\"><\/td>)//gm; s/(ADVERTISEMENT)//gm; s/^.*(Get in touch).*$//gm; s/^.*(davebarry).*$//gm; s/^.*(weird_news).*$//gm; s/^.*(vertdotline).*$//gm;}StoryHTMLPreProcess: { s/^.*(byline).*$//gm; s/^.*(Read more).*$//gm;}
jon_carroll.site:
# site_samples/humor/jon_carroll.site## San Francisco Chronicle : Columnists : Jon CarrollURL: Name: Jon CarrollLevels: 2AuthorName: Jan Lund ThomsenAuthorEmail: kwed@kwed.orgContentsStart: <!-- \*\*\*\*\* BEGIN COLUMN RESULTS HERE \*\*\*\*\* -->ContentsEnd: <!-- \*\*\*\*\* END COLUMN RESULTS HERE \*\*\*\*\* -->StoryURL: http://www.sfgate.com/cgi-bin/article.cgi.*StoryToPrintableSub: s/(.+)/$1\&type=printable/StoryStart: <hr size="1" align="left">#StoryStart: <!-- end #additionalcontent -->#StoryEnd: <!-- END STORY -->
pigdog.site:
# Pigdog JournalURL: Name: Pigdog JournalDescription: The Online Handbook of Bad People of the FutureContentsFormat: rssStoryURL: /.*.s?html?StoryStart: Feedback<br>StoryEnd: <td background="images/rightborder.gif">ContentsStart: <item>AuthorName: Robert Edmonds <stu@brainfood.com>
satirewire.site:
URL: Name: SatireWireDescription: New Satire for the New EconomyLevels: 2ContentsStart: <table border="0" cellpadding="4" cellspacing="2" align="right" width="125">ContentsEnd: <a href=".top">Back to Top</a>ContentsUseTableSmarts: 0StoryURL: http://www.satirewire.com/(../)?(briefs|features|news)/\S+htmlStoryStart: ======= BODY =======StoryEnd: Start of Recommend-It CodeStoryHeadline: <title>SatireWire \| (.*?)</title>URLProcess: { s,http://www.satirewire.com/../,http://www.satirewire.com/,;}
the_onion.site:
# the_onion.site# includes all stories on far left, but not the images.# includes Statshot and Infographic imagesURL: Name: The OnionLevels: 3IssueLinksStart: <FRAMESETIssueLinksEnd: <NOFRAMES>UseTableSmarts: 0ContentsStart: <!-- Side-News Content End Begin -->ContentsEnd: <!-- STATshot Link End -->ContentsURL: http://www.theonion.com/onion\d+/index\d*\.htmlContentsHTMLPreProcess: { s/<.-- News Archives Begin -->.*?<.-- News-In-Brief Begin -->//gs;}StoryStart: Header End --> StoryEnd: Footer Begin -->StoryURL: .*\.html# there's a link button in the left column, along with all the archivesStorySkipURL: http://www.theonion.com/(info/onion_link.html|archive/.*)# comment out these 2 lines to exclude the statshot and infographic imagesImageURL: .*statshot.*\.gifImageURL: .*infograph.*\.gifMinPages: 2
javaworld.site:
# Javaworld.site# Thanks to Glenn Proctor <glenn@docproc.com> and Lim Swee Tat <st_lim@3ui.com>URL: Name: JavaWorldLevels: 2Description: Java programming articles.# Mark contentsContentsStart: -- content cell --ContentsEnd: -- end content cell --StoryURL: /javaworld.*/j.*/j.*\.html# Set StoryToPrintableSub appropriately - since there's only one modification# this is fairly straightforward ...ImageURL: /javaworld.*/j.*/.*.gifStoryToPrintableSub: s,\.html,_p.html,# Only get the text between the commentsStoryStart: -- begin body text --StoryEnd: -- end body text --# Set the story headline for bookmarksStoryHeadline: <TITLE>(.*?) - JavaWorld.*</TITLE>
merlyns_columns.site:
URL: AddURL: Name: Randal Schwartz' columnsDescription: columns written by Randal Schwartz, perl hackerLevels: 2ContentsDiff: 1ContentsStart: <h2>Columns</h2>ContentsEnd: </html>TableRender: flatten# hmm, we don't have a decent .txt->.html renderer yet, so skip the# listings :(StoryURL: /merlyn/\S+/col\d+\.htmlStoryStart: <!-- INDEX END -->StoryEnd: </html>StoryHTMLPreProcess: { s/^\s+(=\d+=)/<br>$1/gm;}
php_net.site:
# php.net# Author: Hubidubi (hubidubi@freemail.hu)URL: Name: PHP.netStoryStart: <!--http://www.php.net/-->StoryEnd: Old News
use_perl.site:
# use_perl.site -- based on the slashdot site.URL: http://use.perl.org/index.pl?light=1&noboxes=1&noicons=1Name: use PerlDescription: All The Perl That's P7L to E5T and R4TLevels: 2MinPages: 2ContentsStart: <HTML>ContentsEnd: </HTML>ContentsDiff: 1StoryURL: http://use.perl.org/article.*StoryStart: <A href="submit.pl">Submit Story</A> \]</P>StoryEnd: <FORM METHOD="GET" ACTION="http://use.perl.org/search.pl"># strip out the "login" and "related links" tables, they're irrelevant offline!# added Feb 2 2000 jm #StoryHTMLPreProcess: { s,<H3>use Perl Login</H3>.*?<B>The Fine Print:</B>,</FONT></TD></TR></TABLE>,s;}URLProcess: { # fix the URL; trim out all comment settings and use our own. s{^(http://use.perl.org/article.pl\?sid=\d+/\d+/\d+/\d+).*} {$1\&light=1\&noboxes=1\&noicons=1\&mode=nested\&threshold=0}g; if (!m,^http://use.perl.org/index.pl.light=1\&noboxes=1\&noicons=1, && !/mode=nested\&threshold=0/) { undef $_; # has to include these two; block it if it does not }}# skip URLs that have been archivedStorySkipURL: http://use.perl.org/interviews/\d+/\d+/\d+/\d+.shtml
layouts.site:
# This is a site layouts file. Layouts are used to provide default information# on a given site's layout, providing StoryStart, StoryEnd, etc. parameters for# a given URL pattern.## One advantage of this is that (for example) if an article in Slashdot links to# a Wired News article, the latter page can be scooped as well, without the# slashdot.site file knowing about Wired News' page format.## This file should always be kept in the sites directory!# ---------------------------------------------------------------------------LayoutURL: http://www.wired.com/news/.* # layout for Wired News pages. Wired seems to change this every week!!, and # it's really messy right now. :( # StoryHeadline: <h1 class="lg">(.*?)</h1> StoryStart: (?i)<input type="submit" value="Go" class="blkbtn" /> StoryEnd: (?i)-- (END_OF_BODY|end content) -- ContentsStart: -- begin generic desk -- ContentsEnd: (?i)-- (end content|TRADES|FOOTER) -- StoryURL: http://www.wired.com/news/.+/[\d,]+\.html\S* StoryURL: http://r.wired.com/r/\d+/http://www.wired.com/news/.+/[\d,]+\.html\S* # I hate StoryServer. This is just ludicrous StoryToPrintableSub: s!(http://www.wired.com/news)/\S+/(\d+),\d+,([\d,]+\.html\S*)$!$1/print/$2,1294,$3!# ---------------------------------------------------------------------------LayoutURL: http://news.cnet.com/news/.* StoryStart: <font size="+2"> StoryEnd: <font face="Arial, Helvetica" size="-1"> StoryURL: http://news.cnet.com/news/\S+.html.* StoryToPrintableSub: s!(http://news.cnet.com/news/[-\d]+.html)?.*$!$1!# ---------------------------------------------------------------------------LayoutURL: http://news.bbc.co.uk/.* ContentsStart: <a name="startcontent"> ContentsEnd: SEARCH BBC NEWS StoryStart: <a name="startcontent"> StoryEnd: SEARCH BBC NEWS StoryURL: http://news.bbc.co.uk/.*low/.*stm.*# ---------------------------------------------------------------------------LayoutURL: http://quote.yahoo.com/q\?.* Levels: 1 StoryStart: Create New View ImageURL: http://chart.yahoo.com/c/.*\.gif StoryEnd: Quotes delayed 15 minutes for Nasdaq, 20 minutes otherwise. StoryHeadline: colspan=7><b>(.*?)</b> StoryCacheable: 0 UseTableSmarts: 0 StoryPostProcess: { s/\x8D//g; s/- More[^\n]*\n//g; s/\]//g; s/(Last Trade|Change|Prev Cls|Volume|Div Date|Day's Range|Bid|Ask|Open) *\n/\n$1 /g; s/(Avg Vol|Ex-Div|52-week Range|Earn\/Shr|P\/E|Mkt Cap|Div\/Shr|Yield) *\n/\n$1 /g; s/\n([ \t]*\n)+/\n\n/g; }# ---------------------------------------------------------------------------LayoutURL: http://www.wunderground.com/cgi-bin/findweather/getForecast\?.* StoryStart: Updated StoryEnd: <!-- blue credits bar --> UseTableSmarts: 0 StoryCacheable: 0 StoryPostProcess: { s/\n\n+/\n/g; s/Moon Phase//g; s/\x8D /\x8D/g; s/: /Updated: /; s/Add.*?cast\)//gs; s/\s+(Forecast as)/\n\x8D$1/g; s/\n(State Extended)/\n\x8D$1/g; s/\n(Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Today|Tonight|Tomorrow)/\n\n\x8D$1/gs; s/\nTemperature *Probability of Precipitation\n//gs; s/Place/\n\n\x8DSummary\n/gs; }# ---------------------------------------------------------------------------# New York Times# Site file for Sitescooper (http://jmason.org/software/sitescooper/)# Written by: Kennis Koldewyn <kennis.koldewyn@wcom.com># bits from: Andy Rabagliati <andyr@wizzy.com># Last updated: 2000-09-05LayoutURL: http://.*.nytimes.com/.* ContentsStart: </NYT_HEADER ContentsEnd: <NYT_FOOTER StoryStart: </NYT_HEADLINE StoryEnd: </NYT_TEXT # this is a default StoryURL, the sites can override it StoryURL: http://.*.nytimes.com/\d\d\d\d/\d\d/\d\d/.* # Contents pre-processing: ContentsHTMLPreProcess: { # Remove bogus absolute links: s/http:\/\/www.nytimes.com//gis; } # Story pre-processing: StoryHTMLPreProcess: { # Remove lists of online links, inline tables, inline images, etc.: s/<NYT_INLINEBLURB.*?<\/?NYT_INLINEBLURB>//gs; s/<NYT_INLINEIMAGE.*?<\/?NYT_INLINEIMAGE>//gs; s/<NYT_INLINETABLE.*?<\/?NYT_INLINETABLE>//gs; s/<NYT_LINKS_ONSITE.*?<\/?NYT_LINKS_ONSITE>//gs; s/<NYT_LINKS_OFFSITE.*?<\/?NYT_LINKS_OFFSITE>//gs; # Remove other NYT-specific tags: s/<\/?NYT_.*?>//gm; # Remove break after headlines: s/<\/H(\d)>\s*<BR>/<\/H$1>/gim; }# ---------------------------------------------------------------------------# Accuweather defaults, by Marko Bozikovic <marko.bozikovic /at/ envox.hr>LayoutURL: http://www.accuweather.com/adcbin/intlocal_index.* ImageURL: http://vortex.accuweather.com/iwxpage/adc/icons/.* StoryStart: <!-- 5 DAY HTML START --> StoryEnd: <!-- 5 DAY HTML END --> StoryCachable: 0 UseTableSmarts: 0 TableRender: keep AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr>
zen_stories.site:
# Zen storiesURL: AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr>Name: Zen storiesLevels: 2ContentsEnd: Do you have a good story to contributeStoryURL: .*\.htmlStorySkipURL: StorySkipURL: StoryStart: <(HEAD|head)>StoryEnd: <(h|H)5>People's reactionsStoryCachable: 1
advogato.site:
URL: Name: AdvogatoDescription: the free software developer's advocateLevels: 2ContentsStart: <html>ContentsEnd: </html>ContentsDiff: 1StoryURL: http://www.advogato.org/article/.*StoryStart: <html>StoryEnd: </html>
advogato_diaries.site:
URL: Name: Advogato DiariesDescription: the free software developer's advocateLevels: 2# specify quite a large range for the contents page, we want to pick up the "recent# log entries" link without having to AddURL it separately!#ContentsStart: <h1>Recent .*?</h1>ContentsEnd: </html>ContentsDiff: 1StoryURL: http://www.advogato.org/article/.*StoryStart: <html>StoryEnd: </html>
alan_cox_diary.site:
URL: Name: Alan Cox DiaryDescription: the daily diary of Alan Cox, kernel hacker extraordinaireStoryStart: <!-- mark -->StoryEnd: </HTML>StoryDiff: 1
debian_weekly_news.site:
URL: Name: Debian Weekly News StoryStart: <H1> StoryEnd: To receive this newsletter weekly in your mailbox StoryURL: Levels: 1
desktoplinux.site:
# converted to use Palm format site, URL thanks to# http://members.bellatlantic.net/~blumax/plink.html !#URL: Name: DesktopLinux Levels: 2# ContentsStart: <html># ContentsEnd: </html> ContentsDiff: 1# StoryStart: Tell your friends# StoryEnd: </html> StoryURL: http://www.desktoplinux.com/.*.html
footnotes.site:
# Footnotes# Author: Hubidubi (hubidubi@freemail.hu)URL: http://www.gnomedesktop.org/modules.php?op=modload&name=AvantGo&file=indexName: Gnome FootNotesLevels: 2# If you don't want the logo, comment this line out!ImageURL: http://www.gnomedesktop.org/.*\.pngStoryPostProcess: { s/Date//gm; s/Article//gm;}
freshmeat.site:
# AuthorName: Carsten Clasohm# Last modified 2000/2/20## (moved by jm to replace the original freshmeat.site)URL: Name: Freshmeat StoryStart: <TABLE CELLSPACING="0" CELLPADDING="3" StoryEnd: \[more articles/news\] StoryDiff: 1
gwn.site:
URL: Name: Gentoo Weekly NewsletterLevels: 2AuthorName: Barry Dexter A. GonzagaAuthorEMail: barryg-sitescooper /at/ kssp.upd.edu.phStoryURL: /news/en/gwn/.*-newsletter\.xmlImageURL: /images/gwn/.*ImageURL: /images/.*
kc_kde.site:
URL: Name: KC - KDE Levels: 1 TableRender: list StoryStart: <li><strong>Threads Covered<\/strong> StoryEnd: </HTML> StoryIncludeStartPattern: 1 AuthorName: Adrian Burgess <adrian /at/ corrosive.freeserve.co.uk> StoryHTMLPreProcess: { s/<td><b>.*<\/b>\.<\/td><td>.*<\/td><td>\(.* posts\)<\/td>//gmi; }
kde-news.site:
# KDE Announcement and news (with comments)# revised by jm@jmason.org to just go straight to dot.kde.org, it's pretty much# taken over as far as I can see.# Author: Jarl Friis <jarl@diku.dk>URL: Name: KDE newsLevels: 2AuthorName: Jarl FriisAuthorEmail: jarl@diku.dkContentsIncludeStartPattern: 0ContentsIncludeEndPattern: 0ContentsStart: <!-- begin contents -->ContentsEnd: <!--end contents-->StoryURL: http://dot.kde.org/\d+/.*StoryStart: <!--mark demarcation2-->StoryEnd: <FORM METHOD="POST" ACTION="http://dot.kde.org/search">ContentsDiff: 1
kernel_cousin_debian.site:
URL: Name: Debian Kernel Cousin Levels: 1 StoryStart: Table Of Contents StoryEnd: </html> # AuthorName: "michael d. ivey" <ivey@gweezlebur.com> # new URL courtesy of: "David A. Desrosiers" <hacker@gnu-designs.com>
kernel_traffic.site:
URL: Name: Kernel Traffic Levels: 1 StoryStart: Table Of Contents StoryEnd: </html># fixed by Derek Glidden <dglidden /at/ illusionary.com>
kerneltrap.site:
URL: Name: kerneltrap.comDescription: Your source for all the news that is the Linux kernelAuthorName: David DesrosiersStoryURL: http://kerneltrap.com/print.php.*StoryEnd: This article comes from kerneltrap.com
linux_gazette.site:
# About time I did this one! Linux Gazette, Jan 25 2000 <jm /at/ jmason.org>.#URL: Name: Linux GazetteDescription: Linux Gazette... making Linux just a little more fun! (monthly)Levels: 3IssueLinksStart: BEGIN issuesIssueLinksEnd: END issuesContentsURL: /issue\d+/lg_\S+\.htmlContentsStart: BEGIN tocContentsEnd: END tocStoryURL: /issue\d+/\S+\.htmlStoryStart: END navbarStoryEnd: END copyright
linux_magazine.site:
URL: Name: Linux MagazineLevels: 2ContentsStart: <TD CLASS="FEATURES" BGCOLOR="CCFFCC" ALIGN="CENTER">ContentsEnd: <TD CLASS="NEWS" BGCOLOR="CCFFCC" ALIGN="CENTER">StoryURL: http://www.linux-mag.com/\d+-\d+/\S+.html.*StoryURL: http://www.linux-mag.com/cgi-bin/printer.pl.issue=\d+-\d+.article=.*StoryToPrintableSub: s,/(\d+-\d+)/(\S+)_\d+\.html,/cgi-bin/printer.pl\?issue=$1\&article=$2,StoryStart: <BODY StoryEnd: </html>
linuxdevices.site:
URL: Name: LinuxDevices.comDescription: the embedded Linux portalLevels: 2ImageURL: /images/readmore.gifContentsStart: Best match</option>ContentsEnd: More *Links *...</a>StoryURL: http://www.linuxdevices.com/(news|links|events|articles)/.*\.htmlStoryStart: Best match</option>StoryEnd: <b>Latest headlines:</b>TableRender: flatten
slashdot.site:
# Slashdot.site -- now including comments scored 3 or higher.# TODO: strip out the so-called "funny" comments ;)## Kornelis Sietsma <korny /at/ sietsma.com>: comments support# jm: fixed again to use light mode throughout# bms: minor changes to pick up ask.slashdot.org it.slashdot.orgURL: http://slashdot.org/index.pl?light=1&noboxes=1&noicons=1Name: SlashDotLevels: 2ContentsStart: <A href=/hof.shtml>hof</A>ContentsEnd: <P><P>\[ <FONT size=2><B>StoryURL: http://slashdot.org/article.*StoryURL: (http://.*.slashdot.org/article.*|http://slashdot.org/article.*)StoryStart: <A href=hof.shtml>hof</A>StoryEnd: <P>\[ <FONT size=2><B># strip out the "login" and "related links" tables, they're irrelevant offline!# added Feb 2 2000 jm #StoryHTMLPreProcess: { s,<H3>Slashdot Login</H3>.*?<B>The Fine Print:</B>,</FONT></TD></TR></TABLE>,s;}# Because slashdot has so many links allowing views of stories with different# comment levels, formats, etc., we need a way to fix or block them here.# Unfortunately it's a bit tricky so we need to use perl code. We could just# ignore the comments, but I guess that's missing the point of slashdot ;)# added May 18 2000 jm#URLProcess: { # fix the URL; trim out all comment settings and use our own. s{^(http://slashdot.org/article.pl\?sid=\d+/\d+/\d+/\d+).*} {$1\&light=1\&noboxes=1\&noicons=1\&mode=nested\&threshold=3}g; s{^(http://.*.slashdot.org/article.pl\?sid=\d+/\d+/\d+/\d+).*} {$1\&light=1\&noboxes=1\&noicons=1\&mode=nested\&threshold=3}g; if (!m,^http://slashdot.org/index.pl.light=1\&noboxes=1\&noicons=1, && !/mode=nested\&threshold=3/) { undef $_; # has to include these two; block it if it does not }}# skip URLs that have been archivedStorySkipURL: http://slashdot.org/interviews/\d+/\d+/\d+/\d+.shtmlStoryHeadline: <TITLE>Slashdot \| (.*?)</TITLE>
a_word_a_day.site:
URL: Name: A.Word.A.Day Levels: 1 StoryStart: RealAudio\s*</A> StoryEnd: </BODY>
drinkboy.site:
# The Drinkboy ChannelURL: AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr>Name: The Drinkboy ChannelLevels: 3ImageURL: http://www.drinkboy.com/offline/images/.*gifIssueLinksStart: <html>IssueLinksEnd: </html>IssueCacheable: 0ContentsURL: http://www.drinkboy.com/offline/.*htmlContentsStart: <html>ContentsEnd: </html>ContentsCachable: 0StoryURL: http://www.drinkboy.com/offline/Recipes/.*htmlStoryStart: <html>StoryEnd: </html>StoryCachable: 0
world_wide_words.site:
URL: Name: World Wide Words Description: Investigating international English from a British viewpoint Levels: 2 ContentsStart: Q and A ContentsEnd: JOIN THE MAILING LIST ContentsUseTableSmarts: 0 # urgh, titles are images. Use their alt tags UseAltTagForURL: /img/.*\.gif # get all articles -- but not the indexes (which are huge) StoryURL: /(qa|topicalwords|inbrief|articles|weirdwords|turnsofphrase|reviews)/.*\.htm.* StorySkipURL: /(qa|topicalwords|inbrief|articles|weirdwords|turnsofphrase|reviews)/index\.htm StoryStart: BODY TEXT AREA StoryEnd: World Wide Words is copyright StoryUseTableSmarts: 0 TableRender: flatten
wired_news_business.site:
URL: Name: Wired News Business Levels: 2 StoryURL: .*//www.wired.com/news/(business|print)/[\d,]+\.html.*
wired_news_culture.site:
URL: Name: Wired News Culture Levels: 2 StoryURL: .*//www.wired.com/news/(culture|print)/[\d,]+\.html.*
wired_news_politics.site:
URL: Name: Wired News Politics Levels: 2 StoryURL: .*//www.wired.com/news/(politics|print)/[\d,]+\.html.*
wired_news_tech.site:
URL: Name: Wired News Technology Levels: 2 StoryURL: .*//www.wired.com/news/(technology|print)/[\d,]+\.html.* ContentsDiff: 1
USNews.site:
URL: Name: USNews-TueDescription: USNewsAuthorName: Goh Boon Nam# US News and World Report# Version 1.2# Date updated : 5 Apr 2005# Changes for 1.1 : Change of page format# Changes for 1.2 : Change of StoryURLLevels: 2ContentsStart: <!-- DATE -->ContentsEnd: <!-- Begin Ad1 -->StoryURL: http://www.usnews.com/usnews/(.*?)/articles/.*StoryStart: <div class="articleSectionTitle">StoryEnd: <!-- E-MAIL FORM -->StoryFollowLinks: 1StoryHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/ //gim; s/<br clear="all">//gim; s/<p>\\n(.*?)//gim; }
atlantic.site:
# The Atlantic# Originally by Akkana PeckURL: Name: The AtlanticLevels: 2UseTableSmarts: 0ContentsStart: <font class="rubric">StoryURL: http://www.theatlantic.com/issues/[\d]+/[\d]+/.*\.htmStoryURL: http://www.theatlantic.com/unbound/.*\.htmStoryStart: <!--ARTICLE CONTENT BEGINS-->
cnn_mobile.site:
# CNN MobileURL: # created from PODS file by David A. DesrosiersAuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr>Name: CNN MobileLevels: 2ImageURL: .*\.gifImageScaleToMaxWidth: 150ContentsCachable: 0StoryURL: http://wireless.cnn.com/avantgo/cnn.*StoryCachable: 1
newsweek.site:
URL: http://www.msnbc.com/news/nw-front_front.asp?0dm=s---kName: NewsweekDescription: NewsweekAuthorName: Goh Boon Nam# Version 1.2# Date updated : 24 Nov 2003# Changes made : StoreyEnd changed to work better for all articlesLevels: 2ContentsStart: ---Insert_Tertiary_Stories---ContentsEnd: nwk_hp_header_webex.gifStoryURL: http://www.msnbc.com/news/\d+\.aspStoryStart: bantop_(.*?).gifStoryEnd: (<b>MSNBC READER(.*?)S TOP 10<\/b>|Newsweek, Inc)StoryFollowLinks: 1ContentsHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/<img src="http:(.*?)nwk_hp_header_inted.gif">/International Editions/gim; }StoryHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/MPA NEW TEMPLATE CODE STARTS HERE(.*?)END MPA NEW TEMPLATE CODE HERE//gis; s/<font style="font-family:arial;font-size:13px(.*?) /<P>/gis; s/ /<P>/gis; }
newsweek_intl.site:
URL: http://www.msnbc.com/news/nw-int_front.asp?Name: NewsweekIntl-TueDescription: Newsweek InternationalAuthorName: Goh Boon Nam# Version 1.6# Date updated : 27 Jun 2005# Updated by : Goh Boon Nam# Changes made : Removal of Page Header that appears in some pages# : Workaround for relative URL not recognised by Sitescooper nextpage function# : New StoryEnd # : New ContentsEnd to remove hanging <b tag which causes all stories to be in boldLevels: 2ContentsStart: FROM THIS WEEK'S ISSUEContentsEnd: <b color="......."> FROM THE PREVIOUS ISSUEStoryURL: http://www.msnbc.msn.com/id/.*StoryStart: (class="headlineStory"|class="deckStory")StoryEnd: (©(.*?)Newsweek, Inc|Print this)# urgh, first article title is an image. Use its alt tag#UseAltTagForURL: http://(.*?).jpgContentsHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/Â//gim; s/—/--/gim; s/•/<BR>/gim; s/ //gim; }StoryHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/Â//gim; s/—/--/gim; s/•/<BR>/gim; s/ //gim; s/advertisement<br>//gim; s/<font class="textSmallBold"(.*?)<\/table><\/td><\/tr><\/table>//gis; s/href="\/id\//href="http:\/\/www.msnbc.msn.com\/id\//gim; }# s/href="\/id\//href="http:\/\/www.msnbc.msn.com\/id\//gim; # above caters to Sitescooper follownext function cannot work with relative URL; needs absolute URL
usa_today.site:
# usa_today.site# grabs the handheld version of the USA Today's paper# Henry Justin <jhenry@fjicl.com>URL: Name: USA TodayLevels: 3ImageURL: http://www.usatoday.com/palm/.*ContentsCacheable: 0StoryCacheable: 0
yahoo_business.site:
# Yahoo- Business# Original by: <spacehog@bsdjournal.com> Patrick Clochesy# Updated by: "Jonathan Becker" <jonathanlbecker@yahoo.com>URL: http://pg1.yahoo.com/raw?dp=news&f=/us/news/580/index.xmlName: Yahoo! BusinessLevels: 3
yahoo_entertainment.site:
# Yahoo- Entertainment# Original by: <spacehog@bsdjournal.com> Patrick Clochesy# Updated by: "Jonathan Becker" <jonathanlbecker@yahoo.com>URL: http://pg1.yahoo.com/raw?dp=news&f=/us/news/529/index.xmlName: Yahoo! EntertainmentLevels: 3
yahoo_politics.site:
# Yahoo- Politics# Original by: <spacehog@bsdjournal.com> Patrick Clochesy# Updated by: "Jonathan Becker" <jonathanlbecker@yahoo.com>URL: http://pg1.yahoo.com/raw?dp=news&f=/us/news/584/index.xmlName: Yahoo! PoliticsLevels: 3
yahoo_tech.site:
# Yahoo- Tech# Original by: <spacehog@bsdjournal.com> Patrick Clochesy# Updated by: "Jonathan Becker" <jonathanlbecker@yahoo.com>URL: http://pg1.yahoo.com/raw?dp=news&f=/us/news/581/index.xmlName: Yahoo! TechLevels: 3
yahoo_top_stories.site:
# Yahoo- Top Stories# Original by: <spacehog@bsdjournal.com> Patrick Clochesy# Updated by: "Jonathan Becker" <jonathanlbecker@yahoo.com>URL: http://wap.oa.yahoo.com/raw?dp=news&f=/us/news/578/index.xmlName: Yahoo! Top StoriesLevels: 3
blather.site:
# Fortean news from blather.netURL: Name: Blather Levels: 1 StoryStart: <BODY StoryEnd: feedback and comments
davenet.site:
# Dave Winer's DaveNetURL: Name: DaveNet Levels: 1 # this is inconvenient; there's no easy way to find start of text anymore. StoryStart: Part of the .*? website, syndicated on .*?</b> StoryEnd: <br>Last update: StoryDiff: 1
i_cringely.site:
URL: Name: I, Cringely Levels: 2 ContentsStart: div id="fpulp" ContentsEnd: div id="ftitshop" StoryURL: http://www.pbs.org/cgi-registry/cringely/.* StoryCacheable: 0
nro.site:
# National Review Online#URL: Name: National Review Online Levels: 2ImageURL: /images/dropcaps/\D\.gifStoryURL: /\S+/\D+\d+\.aspStoryURL: /\S+/script/printpage.asp?ref=\D+\d+\.aspStoryToPrintableSub: s,^(http://.*\.com)(\D+\d+\.asp)$,$1/script/printpage.asp?ref=$2,# #http://www.nationalreview.com/script/printpage.asp?ref=/nr_comment/nr_comment050802.asp
pulpit.site:
URL: http://cgi.pbs.org/cgi-registry/cringely/thisweek.pl?pulpit Name: The Pulpit Levels: 1 StoryStart: <!--========================== Content between these lines ==========================--> StoryEnd: <!--%%ENDCOLUMN%%--> # only scoop on fridays# EvaluatePerl: {# my(@time);# my($day);# @time = localtime();# $day = $time[6]; # day of week is 6th element of localtime# $skip_site = ($day =~ /[^5]/); #skip_site=true if day is not friday# }
roving_reporter.site:
URL: Name: the roving_reporter Description: t. byfield's regular column on the TBTF site Levels: 1 StoryStart: <table border="0" cellpadding="9" cellspacing="0" width="90%" bgcolor="white"> StoryEnd: <b>The r_r began as a semi-collaborative nym StoryDiff: 1
salon.site:
# Salon.site -- with contributions from Henry Justin <jhenry /at/ fjicl.com>,# Eric <ethomas /at/ deltanet.com> and Justin <jm /at/ jmason.org>URL: Name: Salon Magazine Levels: 2 ContentsStart: -- *begin feature story *-- ContentsEnd: >Illustration by ContentsDiff: 1 StoryURL: http://www.salon.com/(books|politics|news|people|tech|mwt|health|ent|media|travel|letters|sex|business)/.*/.* # skip the middle pages StorySkipURL: http://www.salon.com/(books|politics|news|people|tech|mwt|health|ent|media|travel|comics|letters|sex|business)/print.html # and the newswire stories StorySkipURL: http://www.salon.com/wire/.*# StoryToPrintableSub: s,^(http://www.salon.com/.*)/(?:index.html|$),\1/print.html, StoryStart: <p>To print this page, select "Print" from the File menu of your browser</p> StoryEnd: Sound Off StoryHeadline: <title>Salon.com \S+ \| (.*?)</title> MinPages: 2
suck.site:
URL: Name: Suck.comLevels: 1
slate.site:
URL: http://www.slate.com/?id=85223&date.x=10&date.y=10Name: SlateLevels: 2ContentsDiff: 1ContentsStart: <form name="form3">ContentsEnd: </form>ContentsHTMLPreProcess: s,<input.*?>,,giStoryURL: http://(www\.)?slate.(msn\.)?com/id/.+StoryURL: http://(www\.)?slate.(msn\.)?com/toolbar.+StoryToPrintableSub: s,id/(\d+)/.*,toolbar.aspx?action=print&id=$1,
alanmiller.site:
URL: Name: Alan Miller Levels: 2 StoryURL: http://www.alan-miller.org/.* ContentsDiff: 1
palm_boulevard.site:
URL: Name: Palm BoulevardDescription: The Complete Independent Palm ResourceLevels: 3ImageURL: http://www.palmblvd.com/channels/avantgo/.*ContentsURL: http://www.palmblvd.com/channels/avantgo/[^/]+\.html?StoryURL: http://www.palmblvd.com/channels/avantgo/\S+/.*IssueCacheable: 0ContentsCacheable: 0StoryCacheable: 0
palmpilotsoftware.site:
# URL thanks to http://members.bellatlantic.net/~blumax/plink.html !#URL: Name: PalmPilot SoftwareDescription: www.palmpilotsoftware.comLevels: 1ImageURL: http://www.zdnet.com/swlib/avantgo/images/.*
palmpower.site:
# Yay! Finally tracked down the PalmPower "for palmpilot delivery" site. These# are MUCH easier to read than the full javascript, multipage stuff.#URL: Name: PalmPower Description: Tips and techniques exclusively for Palm computer users. Levels: 3 IssueLinksStart: <HTML> IssueLinksEnd: </HTML> IssuePrint: 1 ContentsURL: http://.*.(pair\.com|palmpower\.com)/.*\.html ContentsStart: <HTML> ContentsEnd: </HTML> ContentsPrint: 1 StoryURL: http://.*.(pair\.com|palmpower\.com)/.*\.html StoryStart: <HTML> StoryEnd: </HTML>
palmstation.site:
URL: Name: PalmStation.ComDescription: News, Views, Reviews... To Feed The NeedContentsFormat: rssStoryURL: /view_article\.asp.*StoryStart: <td bgcolor=#550010 colspan=2>StoryEnd: Comment on this# (This is a sitescooper site file. see # It was generated from the site's RSS by rss-to-site.pl 1.0.)
visorcentral_discussion.site:
URL: http://discussion.visorcentral.com/vcforum/search.php?action=getdailyName: VisorCentral DiscussionLevels: 2ContentsURL: /vcforum/search.php.*StoryURL: http://discussion.visorcentral.com/vcforum/showthread.php\?s=[0-9a-z]+\&threadid=\d+StoryUseTableSmarts: 0StoryStart: <!-- end marcus header -->StoryEnd: <!-- /time zone and post buttons -->#remove the line below if you only want new threads, and not old threads with new repliesStoryCacheable: 0#maximum size of the output fileSizeLimit: 5000Description: the site to visit for Handspring Visor ownersAuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
visorcentral_mobile.site:
URL: Name: VisorCentral MobileLevels: 2ContentsURL: /mobile/.*ContentsDiff: 0StoryURL: /mobile/news\.php\?limit=\dStoryURL: /mobile/review\.php\?limit=\d+StoryStart: <a name='thetop'></a>StoryEnd: <div align='left'>#this line below gets the full text, every time, even if you had seen the same story alreadyStoryCacheable: 0Description: VisorCentral Mobile EditionAuthorName: Yoon Fui TheanAuthorEmail: yoonfui /at/ bigfoot.com
la_lettre_edition_mobile.site:
URL: Name: La Lettre de l'InternetLevels: 2ContentsPrint: 1ImageURL: http://.*## This site was converted from an AvantGo .subs file by subs-to-site.pl.# See http://sitescooper.org/ for more information on sitescooper.
motley_fool.site:
URL: Name: The Motley Fool - NewsLevels: 1ImageURL: http://.*## This site was converted from an AvantGo .subs file by subs-to-site.pl.# See http://sitescooper.org/ for more information on sitescooper.
the_guardian_palmsized.site:
# The UK Guardian, Palmsized# Site file for Sitescooper (http://jmason.org/software/sitescooper/)# Last updated: 13 Sep 2001 stewart@ref.collins.co.uk# s/guardianunlimited/guardian/;URL: Name: UK GuardianLevels: 3# ensure that pages with "avantgostory" are treated as "stories"# by sitescooper. This ensures that we don't wind up with 'no# new stories, ignoring'.ContentsURL: http://www.guardian.co.uk/(avantgostories|avantgo/).*\.htmlStoryURL: http://www.guardian.co.uk/avantgostory/.*\.htmlImageURL: http://www.pixunlimited.co.uk/.*ContentsSkipURL: http://www.guardian.co.uk/avantgo/advertpage/.*
the_onion_pda.site:
# the_onion_pda.site# * 2000-04-19, Andrew Chadwick: corrected depth, removed shell# metachars from Name, added advert and avantgo markup removal.URL: Name: The OnionDescription: The Onion (PDA Edition).Levels: 2ImageURL: .*ContentsHTMLPreProcess: { s!<br/?>\s*<br/?>!<p>!gsi; s!<A\s+HREF=.http://adbot.theonion.com[^>]+>.*?</A>!!gsix;}StoryHTMLPreProcess: { s!<br/?>\s*<br/?>!<p>!gsi; s!(<A\s+HREF=.?) pods://avantgo/back/? ([^>]*>.*?</A>) !$1http://mobile.theonion.com/$2!xsig; s!<A\s+HREF=.http://adbot.theonion.com[^>]+>.*?</A>!!gsix;}
the_register_rss.site:
URL: Name: The Register RSSLevels: 2ContentsFormat: rssStoryURL: .*StoryToPrintableSub: s,^(http://go.theregister.com/feed/)(.*),http://www.theregister.co.uk/\2print.html,
inq7-mobile.site:
URL: Name: INQ7 mobileDescription: The Philippine Daily Inquirer and GMA Network News Web site for Mobile Phones and PDA'sLevels: 3AuthorName: Barry Dexter A. GonzagaAuthorEMail: barryg-sitescooper /at/ kssp.upd.edu.phContentsURL: /mpda/html_output/.*StoryURL: /mpda/html_output/.*StoryURL: http://money\.inq7\.net/topstories/printable_topstories\.php.*ImageURL: /mpda/html_output/.*\.gifImageURL: http://www\.inq7money\.net/images/header/.*\.gif
movietickets.site:
# movietickets.com showtimes site file# change the URL: to include your zip codeURL: http://www.movietickets.com/house_list.asp?house_id=0&lng=0&movie_id=0&SearchZip=27560&SearchCity=&SearchState=&SearchSort=0&ShowDate=0&SearchRadius=15&image1.x=0&image1.y=0Name: Movie ShowtimesLevels: 2ContentsStart: Select a theater to display showtimes.ContentsEnd: <a href="house_search.asp">StoryStart: <td class="TheaterName">StoryEnd: Look on another date?ImageURL:
my_yahoo.site:
## You will need to set the MY_YAHOO_NAME and MY_YAHOO_PASSWORD environment# variables before using this site.#URL: http://login.yahoo.com/config/login?.src=my&.tries=1&.done=http://my.yahoo.com/&login=${MY_YAHOO_NAME}&passwd=${MY_YAHOO_PASSWORD}RequireEnvVariable: MY_YAHOO_NAMEName: My YahooDescription: My Yahoo!Levels: 2StoryURL: http://.*.yahoo.com/.*StorySkipURL: .*external.*StorySkipURL: http://(dir|edit).yahoo.com/.*StorySkipURL: http://.*search.*.yahoo.com/.*StoryFollowLinks: 0 # don't follow "More..." linksContentsUseTableSmarts: 0StoryUseTableSmarts: 0TableRender: flatten
sydney_morning_herald.site:
AuthorName: Yvonne Smith <yvonne@thewatch.net>URL: Name: Sydney Morning HeraldDescription: The Sydney Morning HeraldSizeLimit: 500Levels: 2StoryURL: http://www.smh.com.au/news/.*StoryStart: <H1>StoryEnd: </BOD>StoryHeadline: <H1> (.*)</H1>
yourmovies_canberra.site:
### For Canberra, Australia residents. Could be adjusted for other Australian citiesURL: http://www.yourmovies.com.au/handheld/session_times.cfm?&venue_id=21604,224849,93546,239722,224927,253443,90268&sort=movieName: YourMovies CanberraLevels: 2
bostonglobe.site:
URL: Name: Boston_GlobeDescription: Boston Globe City & RegionAuthorName: Bruce ZohnAuthorEmail: coffeecat@bigfoot.comLevels: 2ContentsURL: /news/globe/city_region/.*StoryURL: /news/local/.*ImageURL: http://cache.boston.com/bonzai-fba/.*ContentsStart: <div class="mainContent">ContentsEnd: <div class="footerLinks">StoryStart: <div class="story">StoryEnd: <div class="toolsMain">StoryFollowLinks: 4StorySkipURL: /news/local/.*\?mode=PFStorySkipURL: /news/local/.*\?pg=full
la_times_frontpage.site:
# The Los Angeles Times Front PageURL: Name: LA Times Front Page Levels: 2 StoryURL: /news/front/.*\.html ContentsStart: <!-- TIMESTAMP --> ContentsEnd: <!-- END RIGHT MAIN --> StoryStart: Print this story StoryEnd: <!--STORY ENDS-->
bayarea_com_news.site:
URL: Name: BayArea.com NewsDescription: local news for the SF Bay Area from BayArea.comAuthorName: Bill JanssenAuthorEmail: bill@janssen.orgLevels: 2StoryURL: http://www.bayarea.com/.*ContentsCachable: 0ContentsStart: <!--Date-->ContentsEnd: <CENTER>StoryStart: <SNML_HEADLINES>StoryEnd: </SNML_BODY>ContentsHTMLPreProcess: { s,(<I><B>Last updated</B>),<H1>SF Bay Area News</H1><br><i>(from http://www.bayarea.com/news/)</i><br>$1,s;}
sf_chronicle_food.site:
URL: http://www.sfgate.com/cgi-bin/article-list.cgi?key=FD&directory=FoodName: SF Chronicle FoodDescription: San Francisco Chronicle Food section (published Wednesdays)AuthorName: Bill JanssenAuthorEmail: bill@janssen.orgLevels: 2StoryURL: http://www.sfgate.com/cgi-bin/article.cgi.*ImageURL: .*/templates/brands/chronicle/images/chronicle\.gifContentsCachable: 0ContentsStart: <!--END HEADLINE MODULE-->ContentsEnd: <!-- \*\*\*\*\* END OF BIG TABLE \*\*\*\*\* -->StoryStart: <!-- BEGIN HEADLINE NESTED TABLE -->StoryEnd: <!-- \*\*\*\*\* END OF BIG TABLE \*\*\*\*\* -->ContentsHTMLPreProcess: { s,^,<H1>SF Chronicle Food Section</H1>,s;}StoryPostProcess: { s/<TABLE WIDTH=[0-9]*/<TABLE/gm; s/<TD([^>]*)WIDTH=[0-9]*/<TD$1/gm; s/<TD([^>]*)NOWRAP/<TD$1/gm;}
sfgate_com_news.site:
URL: Name: SFGate.com NewsDescription: SF Bay Area news stories from sfgate.com (Chronicle/Examiner/KRON)AuthorName: Bill JanssenAuthorEmail: bill@janssen.orgLevels: 2StoryURL: http://www.sfgate.com/cgi-bin/article.cgi.*StoryURL: http://www.sfgate.com/news/baycitynews/.*ContentsCachable: 0ContentsStart: <!-- \*\*\*\*\*\*\*\* BEGIN STEW \*\*\*\*\*\*\*\*\*\*\* -->ContentsEnd: <!-- END STORIES SCRIPT \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* --> StoryStart: <!-- BEGIN HEADLINE NESTED TABLE -->StoryEnd: <!-- \*\*\*\*\* END OF BIG TABLE \*\*\*\*\* -->ContentsHTMLPreProcess: { s/^/<H1>News from SFGate\.com<\/H1><br><i>(from SF Chronicle, SF Examiner, KRON, AP, SF Gate sources)<\/i><br>/gs;}StoryPostProcess: { s/<TABLE WIDTH=[0-9]*/<TABLE/gm; s/<TD([^>]*)WIDTH=[0-9]*/<TD$1/gm; s/<TD([^>]*)NOWRAP/<TD$1/gm;}
chicago_tribune_business.site:
# Site file for Sitescooper (http://)# Written by: David Czerwinski <david_czerwinski@yahoo.com># 12-23-00URL: Name: Trib Business Description: Chicago Tribune Business Section Levels: 2 StoryURL: /business/printedition/article/.*\.html StoryEnd: <LI> <A HREF=.*>E-mail this story to a friend</A>
chicago_tribune_front_page.site:
# Site file for Sitescooper (http://)# Written by: David Czerwinski <david_czerwinski@yahoo.com># 12-23-00URL: Name: Trib Front Page Description: Chicago Tribune Front Page Section Levels: 2 StoryURL: /news/printedition/article/.*\.html StoryEnd: <LI> <A HREF=.*>E-mail this story to a friend</A>
chicago_tribune_sports.site:
# Site file for Sitescooper (http://)# Written by: David Czerwinski <david_czerwinski@yahoo.com># 12-23-00URL: Name: Trib Sports Description: Chicago Tribune Sports Section Levels: 2 StoryURL: /sports/printedition/article/.*\.html StoryEnd: <LI> <A HREF=.*>E-mail this story to a friend</A>
Vecernji.site:
# Vecernji List# - comment out ContentsSkipURL: lines for links you don't want scoopedURL: AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr>Name: Vecernji ListLevels: 3ImageURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Media/(RTV-HRT1|RTV-HRT2|RTV-HRT3|RTV-OTV|RTV-NOVA|RTV-NET|RTV-EURO|RTV-DSF|)\.(gif|JPG)IssueLinksStart: <td width="1\d\d" align="left" valign="top">IssueLinksEnd: <a href="Pages/PROGNOZA.html">IssueUseTableSmarts: 0IssueCacheable: 0ContentsURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/.*\.html# ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/HRV-NAJ.html# ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/SVI-NAJ.html# ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/ZAG-NAJ.html# ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/TIS-NAJ.html# ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/PLUS-NAJ.htmlContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/KUL-NAJ.html# they seem to change sport's page URL every now and then...ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/SPO-NAJ.htmlContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/SPORT-NAJAVA.html# ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/CRN-NAJ.htmlContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/HORO-NAJ.htmlContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/ODV-NAJ.htmlContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/TRECA-NAJ.htmlContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/MOZ-NAJ.htmlContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/STIL-NAJ.htmlContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/DJE-NAJ.html# ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/RTV-PROGRAM.html# ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/ZAN-NAJ.htmlContentsStart: <td width="4\d\d" valign="top" bgcolor="ContentsEnd: </html>ContentsUseTableSmarts: 0ContentsCachable: 0StoryURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/.*\.htmlStorySkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/formular.*\.htmlStorySkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/Kronologija.htmlStorySkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/Kronologija-slika.htmlStorySkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/CRKVA.htmlStorySkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/ZEMLJE-POTPIS.htmlStorySkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/CRNA-BROJEVI.htmlStorySkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/CRNA-ZELJEZNICE.htmlStorySkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/CRNA-SAMOUBOJSTVA.htmlStoryStart: <td width="4\d\d" valign="top" bgcolor="StoryEnd: <ul>StoryUseTableSmarts: 0StoryCachable: 0TableRender: keep
accuweather_zagreb.site:
# Accuweather - Zagreb# you'll have to go to Accuweather site, find your city and# copy url to the line belowURL: http://www.accuweather.com/adcbin/intlocal_index?wxcity2=ZAGREB&wxcountry=EU;RT AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr> Name: Accuweather - Zagreb Levels: 1 # other details are picked up automatically from ../lib/layouts.site
berlingsketidende.site:
# Berlingske Tidende# Author: Jarl Friis <jarl@diku.dk>URL: Name: BerlingskeLevels: 2AuthorName: Jarl FriisAuthorEmail: jarl@diku.dkImageURL: 1ContentsIncludeStartPattern: 1ContentsIncludeEndPattern: 0ContentsUseTableSmarts: 0ContentsStart: <!-- Template c_forside_hovedhistorie begin -->ContentsEnd: <!-- Template c_forside_hovedhistorie end -->ImageURL: http://www.berlingske.dk/grafik/redaktion/.*########Use this if you want simple story versions:#####StoryToPrintableSub: s,(http://www.berlingske.dk/artikel:aid)(=[0-9]*),http://www.berlingske.dk/popup:print\2,i;#StoryURL: http://www.berlingske.dk/popup:print=[0-9]*StoryStart: <hr size=1 width=100%>StoryEnd: <!-- WebMeasure Starts -->########Use this if you want image/table versions:####StoryStart: <!-- Template c_artikel begin -->StoryEnd: <!-- Template c_artikel end -->StoryIncludeStartPattern: 0StoryIncludeEndPattern: 0TableRender: keep#StoryLifetime: 0ContentsCacheable: 1StoryCacheable: 1ContentsDiff: 1StoryDiff: 1
computerworld.dk.site:
# Computerworld.dk online# Author: Jarl Friis <jarl@diku.dk>URL: Name: Computerworld DKLevels: 2AuthorName: Jarl FriisAuthorEmail: jarl@diku.dkContentsIncludeStartPattern: 1ContentsIncludeEndPattern: 0ContentsUseTableSmarts: 0ContentsStart: <table width="490" border="0" cellspacing="0" cellpadding="0" VALIGN=TOP>#for few news:ContentsEnd: <TABLE CELLSPACING=0 CELLPADDING=0 WIDTH=510 >#for categorised news:#ContentsEnd: <!-- INDHOLD -- BREDDE = 490 PIXELS -->StoryToPrintableSub: s,(Vis_artikel)(.asp.ArticleID=[0-9]*),\1_til_udskrift\2,i;StoryURL: http://www.computerworld.dk/[vV]is_artikel_til_udskrift.asp.ArticleID=[0-9]*ImageURL: http://www.computerworld.dk/[Ii]mages/[^/]*[.].{3}StoryIncludeStartPattern: 0StoryIncludeEndPattern: 0StoryStart: <img src="Images/SiteImages/stort_computerworldlogo.gif" width=442 height=86 alt="" border="0">#This does not even exists, but it avoids standard sitescooper-"layouts"StoryEnd: </HTML>TableRender: keep#StoryLifetime: 0ContentsCacheable: 1StoryCacheable: 1ContentsDiff: 1StoryDiff: 1
dmi-vejret.site:
# Vejrudsigt for DMI# Author: Jarl Friis <jarl@diku.dk>#Change this to your lokal weather:URL: Name: DMIs vejrudsigtLevels: 1AuthorName: Jarl FriisAuthorEmail: jarl@diku.dk#Danish 7-days forecast:AddURL: StoryIncludeStartPattern: 0ContentsIncludeEndPattern: 0StoryStart: <img src="gifs/dmi-logo2.gif" width=40 height=88>StoryEnd: </BODY>#StoryUseTableSmarts: 0#TableRender: keep#StoryLifetime: 0ContentsCacheable: 0StoryCacheable: 0ContentsDiff: 0StoryDiff: 0
geekculture.site:
# # Author: Jan Lund Thomsen <kwed@kwed.org>URL: Name: GeekCulture.dkLevels: 2AuthorName: Jan Lund ThomsenAuthorEmail: kwed@kwed.orgContentsStart: section=11ContentsEnd: Ældre artiklerStoryURL: http://www.geekculture.dk/arkiv.php3\?reviewid=.*StoryStart: alt="Tilbage til Hovedsiden"StoryEnd: <form method=post action="sendtilven.php3?reviewid=877"><font size="-1">
ingeniøren.site:
# Ingeniøren# Author: Jarl Friis <jarl@diku.dk>URL: Name: IngeniørenLevels: 2AuthorName: Jarl FriisAuthorEmail: jarl@diku.dkContentsIncludeStartPattern: 1ContentsIncludeEndPattern: 1ContentsStart: <!-- Indholde Start -->#This will NOT include the ShortNews:ContentsEnd: <TR><TD COLSPAN="2"><IMG SRC="/ress/ramme/d.gif" WIDTH="2" HEIGHT="3" ALT=""></TD></TR></TABLE>#This will include ShortNews#ContentsEnd: </TD></TR></TABLE> <BR>#ContentsEnd: </HTML>#seem not to work ... ShortNews has anotherlayout.StoryStart: <!-- .BeginEditable "trumpet" -->StoryEnd: <!-- .BeginEditable "hojre_spalte_nede_bund" -->ImageURL: http://www.ing.dk/arkiv/.*StorySkipURL: mailto:.*StoryUseTableSmarts: 0TableRender: flattenContentsCacheable: 1StoryCacheable: 1ContentsDiff: 1StoryDiff: 1Active: 1
politiken_daily_summary.site:
# site_samples/regional_denmark/politiken_daily_summary.site## Daily news summary from Danish newspaper 'Politiken'.AuthorName: Jan Lund ThomsenAuthorEmail: kwed@kwed.orgURL: http://politiken.dk/VisArtikel.iasp?TemplateID=2377Name: Politiken: summaryLevels: 2ImageURL: ImageScaleToMaxWidth: 150StoryHTMLPreProcess: { s/<BR><BR>/<p>/g; s/<\/CENTER><BR>/<\/CENTER><P>/g;}
sslug-kalender.site:
# Skåne Sjællands Linux Brugergruppe kalender# Author: Jarl Friis <jarl@diku.dk>URL: http://www.sslug.dk/adict/mgroup.php?organizer=SSLUGName: SSLUG kalenderLevels: 1AuthorName: Jarl FriisAuthorEmail: jarl@diku.dkContentsIncludeStartPattern: 0ContentsIncludeEndPattern: 0StoryStart: <!-- End of LUG table -->StoryEnd: <table width="100%" cellspacing="0" cellpadding="0" border="0">Active: 1
LeMonde1_INT_FRA_STE_REG.site:
# Le Monde interactif - Tous les nouveaux articles des séquences# International France Société Régions #___________________________________________# Vous pouvez supprimer de la liste ci-après les URL que vous ne lisez pas.# Paramètres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/)# Mis à jour sur # le 10.11.99 par Jacques TurbéURL: http://www.lemonde.fr/sequence/0,2319,2037,00.html # International AddURL: http://www.lemonde.fr/sequence/0,2319,2030,00.html # France AddURL: http://www.lemonde.fr/sequence/0,2319,2079,00.html # France Société AddURL: http://www.lemonde.fr/sequence/0,2319,2075,00.html # Régions Name: Le Monde International France Société RégionsLevels: 2ContentsStart: <td width=510 valign="top"> ContentsEnd: </body>ContentsCacheable: 0StoryStart: <td width=400>StoryEnd: article_impressionStoryURL: http://www.lemonde.fr/article/.*(QUO|MIA).*\.html
LeMonde2_HORIZONS.site:
# Le Monde interactif - Tous les nouveaux articles des séquences# Horizons : Débats,Enquêtes, Editoriaux#___________________________________________# Vous pouvez supprimer de la liste ci-après les URL que vous ne lisez pas.# Paramètres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/)# Mis à jour sur # le 10.11.99 par Jacques TurbéURL: http://www.lemonde.fr/sequence/0,2319,2044,00.html # Horizons AddURL: http://www.lemonde.fr/sequence/0,2319,2070,00.html # Horizons Débats AddURL: http://www.lemonde.fr/sequence/0,2319,2065,00.html # Horizons Enquêtes AddURL: http://www.lemonde.fr/sequence/0,2319,2064,00.html # Horizons Editoriaux Name: Le Monde HORIZONSLevels: 2ContentsStart: <td width=510 valign="top"> ContentsEnd: <td width=125 valign="top">ContentsCacheable: 0StoryStart: <td width=400>StoryEnd: article_impressionStoryURL: http://www.lemonde.fr/article/.*(QUO|MIA).*\.html
LeMonde3_ENT_COM_PLA_ECO.site:
# Le Monde interactif - Tous les nouveaux articles des séquences# Entreprise Communication Placements Le Monde de l'économie#___________________________________________# Vous pouvez supprimer de la liste ci-après les URL que vous ne lisez pas.# Paramètres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/)# Mis à jour sur # le 10.11.99 par Jacques TurbéURL: http://www.lemonde.fr/sequence/0,2319,2058,00.html # Entreprises AddURL: http://www.lemonde.fr/sequence/0,2319,2061,00.html # Entreprises Communication AddURL: http://www.lemonde.fr/sequence/0,2319,2074,00.html # Placements AddURL: http://www.lemonde.fr/sequence/0,2319,2073,00.html # Le Monde de l'économie Name: Le Monde Entreprise Communication Placements EconomieLevels: 2ContentsStart: <td width=510 valign="top"> ContentsEnd: <td width=125 valign="top">ContentsCacheable: 0StoryStart: <td width=400>StoryEnd: article_impressionStoryURL: http://www.lemonde.fr/article/.*(QUO|MIA).*\.html
LeMonde4_AUJ_SCI_SPO_CULT.site:
# Le Monde interactif - Tous les nouveaux articles des séquences# Aujourd'hui Sciences Sports Culture Théâtre, danse Festivals Photographie Peinture, arts plastiques Cinéma#___________________________________________# Vous pouvez supprimer de la liste ci-après les URL que vous ne lisez pas.# Paramètres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/)# Mis à jour sur # le 10.11.99 par Jacques TurbéURL: http://www.lemonde.fr/sequence/0,2319,2040,00.html # Aujourd'hui AddURL: http://www.lemonde.fr/sequence/0,2319,2077,00.html # Sciences AddURL: http://www.lemonde.fr/sequence/0,2319,2045,00.html # Sports AddURL: http://www.lemonde.fr/sequence/0,2319,2033,00.html # Culture AddURL: http://www.lemonde.fr/sequence/0,2319,2083,00.html # Théâtre, danse AddURL: http://www.lemonde.fr/sequence/0,2319,2192,00.html # Festivals AddURL: http://www.lemonde.fr/sequence/0,2319,2219,00.html # Photographie AddURL: http://www.lemonde.fr/sequence/0,2319,2220,00.html # Peinture, arts plastiques AddURL: http://www.lemonde.fr/service_cinema/0,2331,109-QUO,00.html # Cinéma Name: Le Monde Aujourd'hui Sciences Sports CultureLevels: 2ContentsStart: <td width=510 valign="top"> ContentsEnd: <td width=125 valign="top">ContentsCacheable: 0StoryStart: <td width=400>StoryEnd: article_impressionStoryURL: http://www.lemonde.fr/article/.*(QUO|MIA).*\.html
LeMonde5_LIVRES.site:
# Le Monde interactif - Tous les nouveaux articles des séquences# Horizons : Débats,Enquêtes, Editoriaux#___________________________________________# Vous pouvez supprimer de la liste ci-après les URL que vous ne lisez pas.# Paramètres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/)# Mis à jour sur # le 10.11.99 par Jacques TurbéURL: http://www.lemonde.fr/service_livres/0,2333,116-QUO,00.html # Le Monde des livres AddURL: http://www.lemonde.fr/service/0,2321,160-QUO,00.html # Littératures AddURL: http://www.lemonde.fr/service/0,2321,156-QUO,00.html # FeuilletonAddURL: http://www.lemonde.fr/service/0,2321,138-QUO,00.html # EssaisAddURL: http://www.lemonde.fr/service/0,2321,159-QUO,00.html # ActualitésAddURL: http://www.lemonde.fr/service/0,2321,155-QUO,00.html # ChroniqueAddURL: http://www.lemonde.fr/service/0,2321,117-QUO,00.html # PochesAddURL: http://www.lemonde.fr/service/0,2321,158-QUO,00.html # Jeunesse Name: Le Monde des LivresLevels: 2ContentsStart: <td width=510 valign="top"> ContentsEnd: <td width=125 valign="top">ContentsCacheable: 0StoryStart: <td width=400>StoryEnd: article_impressionStoryURL: http://www.lemonde.fr/article/.*(QUO|MIA).*\.html
LeMonde6_Interactif.site:
# Le Monde interactif - Tous les nouveaux articles des séquences# LMI : Actus Futurs Branché Techno Business Services Conso Enquêtes#___________________________________________# Vous pouvez supprimer de la liste ci-après les URL que vous ne lisez pas.# Paramètres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/)# Mis à jour sur # le 10.11.99 par Jacques TurbéURL: http://www.lemonde.fr/aietek/0,2327,2039,00.html # LMI Actus AddURL: http://www.lemonde.fr/aietek/0,2327,2043,00.html # LMI Futurs AddURL: http://www.lemonde.fr/aietek/0,2327,2059,00.html # LMI Branché AddURL: http://www.lemonde.fr/aietek/0,2327,2081,00.html # LMI TechnoAddURL: http://www.lemonde.fr/aietek/0,2327,2060,00.html # LMI Business AddURL: http://www.lemonde.fr/aietek/0,2327,2078,00.html # LMI ServicesAddURL: http://www.lemonde.fr/aietek/0,2327,2062,00.html # LMI Conso AddURL: http://www.lemonde.fr/aietek/0,2327,2066,00.html # LMI Enquêtes Name: Le Monde InteractifLevels: 2ContentsStart: <td width=510 valign="top"> ContentsEnd: </body>ContentsCacheable: 0StoryStart: <td width=400>StoryEnd: article_impressionStoryURL: http://www.lemonde.fr/article/.*\.html
LeMonde7_UNE.site:
# Le Monde interactif - Tous les nouveaux articles des séquences# La Une : Accueil Pierre Georges Liens#___________________________________________# Vous pouvez supprimer de la liste ci-après les URL que vous ne lisez pas.# Paramètres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/)# Mis à jour sur # le 10.11.99 par Jacques TurbéURL: http://www.lemonde.fr/sequence/0,2319,2031,00.html # AccueilAddURL: http://www.lemonde.fr/sequence/0,2319,2199,00.html # Pierre Georges AddURL: http://www.lemonde.fr/article/0,2320,19245,00.html # journaux en ligne AddURL: http://www.lemonde.fr/article/0,2320,19277,00.html # signets technologiques Name: Le Monde Accueil Pierre Georges LiensLevels: 2ContentsStart: <td width=510 valign="top"> ContentsEnd: <td width=125 valign="top">ContentsCacheable: 0StoryStart: <td width=400>StoryEnd: article_impressionStoryURL: http://www.lemonde.fr/article/.*(QUO|MIA).*\.html
LeMonde_AutoMoto.site:
# Le Monde interactif - Tous les nouveaux articles des séquences# Auto Moto#___________________________________________# Vous pouvez supprimer de la liste ci-après les URL que vous ne lisez pas.# Paramètres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/)# Mis à jour sur # le 10.11.99 par Jacques TurbéURL: http://www.lemonde.fr/sequence/0,2319,2162,00.html #Auto MotoName: Le Monde - AutoMotoLevels: 2ContentsStart: <td width=510 valign="top">ContentsEnd: <td width=115 valign="top">ContentsCacheable: 0StoryURL: http://www.lemonde.fr/article/.*(QUO|MIA).*\.html
journaldunet.site:
#Le Journal du Net - Articles du jour# Paramètres d'extraction de site pour Sitescooper# (http://jmason.org/software/sitescooper/)# par: Jacques Turbé # Créé le : 1999-10-20# Mise à jour: 2000-02-01 Philippe Renard Intégration des interviewsURL: Name: Le_Journal_du_Net Levels: 2 ContentsStart: <!-- Fin du menu dynamique ContentsEnd: Pour tout probl ContentsCachable: 0 StoryURL: http://www.journaldunet.com/\d+/.+\.shtml StoryURL: http://www.journaldunet.com/it_.+\.shtml StoryStart: orps du site StoryEnd: Pour tout probl StoryCacheable: 1
journaldunet_dossiers.site:
#Le Journal du Net - Les NET 20# Paramètres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/)# par: Jacques Turbé # Mise à jour: 1999-10-19URL: Name: Net 20 Levels: 2 ContentsStart: <!-- Début Corps du site --> ContentsEnd: Pour tout probl ContentsCachable: 0 StoryURL: http://www.journaldunet.com/dossiers/net20/20.*\.shtml StoryStart: <!-- Début Corps du site --> StoryEnd: Pour tout probl StoryCacheable: 1
la_tribune.site:
# La Tribune Edition electronique# Site file for Sitescooper (http://jmason.org/software/sitescooper/)# by: "P.Y. Letournel" <e-py.letournel /at/ wanadoo.fr># Take care : The first run creates a prc file of around 750KURL: http://www.latribune.fr/Tribune/Articles.nsf/Articles?OpenView&Count=99&Expand=2#2 # EntreprisesAddURL: http://www.latribune.fr/Tribune/Articles.nsf/Articles?OpenView&Count=99&Expand=3#3 # EuropeAddURL: http://www.latribune.fr/Tribune/Articles.nsf/Articles?OpenView&Count=99&Expand=4#4 # FinanceAddURL: http://www.latribune.fr/Tribune/Articles.nsf/Articles?OpenView&Count=99&Expand=5#5 # FranceAddURL: http://www.latribune.fr/Tribune/Articles.nsf/Articles?OpenView&Count=99&Expand=6#6 # InternationalAddURL: http://www.latribune.fr/Tribune/Articles.nsf/Articles?OpenView&Count=99&Expand=7#7 # MarchesAddURL: http://www.latribune.fr/Tribune/Articles.nsf/Articles?OpenView&Count=99&Expand=8#8 # Multimedia Name: LaTribune Levels: 2 StoryURL: http://www.latribune.fr/Tribune/Articles.nsf/ArticlesWeb/.* ContentsStart: Condenser la liste# ContentsEnd: StoryStart: Accueil</A> > StoryEnd: EN SAVOIR PLUS# ContentsCacheable: 0
le_monde_full.site:
# Le Monde - Edition électronique# Classé par séquences et sous-séquences# (sans les dossiers)## Paramètres d'extraction de site pour Sitescooper# (http://jmason.org/software/sitescooper/)# Mis à jour sur # le 15-02-2000 par Jacques TurbéURL: http://www.lemonde.fr/sequence/0,2319,2031,00.htmlAddURL: http://www.lemonde.fr/sequence/0,2319,2037,00.htmlAddURL: http://www.lemonde.fr/sequence/0,2319,2079,00.htmlAddURL: http://www.lemonde.fr/sequence/0,2319,2030,00.htmlAddURL: http://www.lemonde.fr/sequence/0,2319,2075,00.htmlAddURL: http://www.lemonde.fr/sequence/0,2319,2058,00.htmlAddURL: http://www.lemonde.fr/sequence/0,2319,2061,00.htmlAddURL: http://www.lemonde.fr/sequence/0,2319,2204,00.htmlAddURL: http://www.lemonde.fr/sequence/0,2319,2044,00.htmlAddURL: http://www.lemonde.fr/sequence/0,2319,2070,00.htmlAddURL: http://www.lemonde.fr/sequence/0,2319,2065,00.htmlAddURL: http://www.lemonde.fr/sequence/0,2319,2064,00.htmlAddURL: http://www.lemonde.fr/sequence/0,2319,2077,00.htmlAddURL: http://www.lemonde.fr/sequence/0,2319,2045,00.htmlAddURL: http://www.lemonde.fr/sequence/0,2319,2033,00.htmlAddURL: http://www.lemonde.fr/sequence/0,2319,2083,00.htmlAddURL: http://www.lemonde.fr/sequence/0,2319,2192,00.htmlAddURL: http://www.lemonde.fr/sequence/0,2319,2199,00.htmlName: le_monde_edition_electroniqueLevels: 2ContentsStart: <td width=510 valign="top">ContentsEnd: <td width=125 valign="top">ContentsCachable: 0StoryStart: td valign="top" width="500"StoryEnd: >Droits de <StoryURL: http://www.lemonde.fr/article/.*\.html
multimedium.site:
#Multimédium - Actualité du jour# Paramètres d'extraction de site pour Sitescooper# (http://jmason.org/software/sitescooper/)# par: Philippe Renard hébergé par # Mise à jour: 2000-02-01URL: Name: Multimédium Levels: 2 ContentsStart: <!---Actualit ContentsEnd: <!-- fin de la table nouvelles --> ContentsCachable: 1# StoryURL: http://www.journaldunet.com/\d+/.+\.shtml StoryStart: BUT CONTENU --> StoryEnd: <!-- FIN CONTENU --> StoryCacheable: 1
nouvelobs.site:
# Le Nouvel Observateur# Script de site pour Sitescooper (http://jmason.org/software/sitescooper/)# le 08.11.99 par Jacques Turbé# Mises à jour sur URL: AddURL: AddURL: AddURL: AddURL: Name: Le Nouvel Observateur Levels: 2 ContentsStart: <TD width="31%" valign="top"> ContentsEnd: Mode</A> ContentsCacheable: 0 StoryURL: http://www.nouvelobs.com/.*\.html StoryStart: <DATE>SEMAINE StoryEnd: </TEXTE> StoryCachable: 0
de_sz.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: Sueddeutsche Description: Süddeutsche Zeitung - wichtigste Ressorts Levels: 3 SizeLimit: 800 IssueLinksStart: <!--zeitungskasten--> IssueLinksEnd: (<!--/zeitungskasten-->|<a href="vertrieb.sueddeutsche.de/) ContentsURL: /aktuell/sz/ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } IssueHTMLPreProcess: { s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_bayern.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Bayern Description: Bayernteil der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_berlin.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Berlin Description: Berlin-Seite der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_beruf.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Bildung & Beruf Description: Beilage Bildung & Beruf der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_drei.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Seite Drei Description: Seite 3 (Leitartikel) der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_feuilleton.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Feuilleton Description: Feuilleton der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_hochschule.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Hochschulseite Description: Hochschulseite der Süddeutschen Zeitung - dienstags Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_immobilien.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Immobilienseite Description: Immobilienseite der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_kultur.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Münchner Kultur Description: Ressort Münchner Kultur der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_literatur.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Literatur Description: Literaturbeilage der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_medien.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Medien Description: Ressort Medien der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_meinung.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Meinungsseite Description: Meinungsseite (Seite 4) der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_muenchen.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ München Description: München-Teil der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_panorama.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Panorama Description: Panorama der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_politik.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Politik Description: Ressort Politik der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_reise.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Reise & Erholung Description: Beilage Reise & Erholung der Süddeutschen Zeitung - mittwochs Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_sonder.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Sonderseiten Description: Sonderseiten der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_sonderbeilage.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Sonderbeilage Description: Sonderbeilage der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_sport.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Sport Description: Sportteil der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_streiflicht.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Streiflicht Description: Titelseiten-Glosse der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_verkehr.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Auto & Verkehr Description: Beilage Auto & Verkehr der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_wirtschaft.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Wirtschaft Description: Ressort Wirtschaft der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_wissen.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ Wissenschaft Description: Wissenschaftsbeilage der Süddeutschen Zeitung - dienstags Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_wochenende.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001URL: Name: SZ am Wochenende Description: Wochenendbeilage der Süddeutschen Zeitung - samstags Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_cert.site:
# de_cert.site# This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.2, 12.2.03URL: Description: German CERT Infos AuthorName: Stefan Schwingeler AuthorEMail: stobs /at/ web . de Name: CERT RUS Levels: 2 StoryURL: http://cert.uni-stuttgart.de/ticker/article.php\?mid=\d+ StoryStart: <FONT SIZE="+2"> StoryEnd: Copyright © 2003 RUS-CERT, Universität Stuttgart # remove CENTER StoryPostProcess: { s/center//gi;}
de_computerwoche.site:
# This is a sitescooper site file. see # by Stefan Schwingeler, Version 0.2, 27.10.1999URL: http://www.computerwoche.de/info-point/top-news/main.cfm?o=1 Name: Computerwoche Levels: 2 ContentsStart: <TD WIDTH="100%" VALIGN="TOP"> ContentsEnd: Copyright.*Computerwoche Verlag GmbH ContentsCachable: 0 ContentsDiff: 0 StoryURL: http://www.computerwoche.de/info-point/top-news/details.cfm\?id=\d+ StoryStart: </TABLE><br clear=all> StoryEnd: <a href=\"drucken.cfm StoryCachable: 0 StoryPostProcess: { s/<b> *Drucken<\/font><\/b>//gm; }
de_cyberkino.site:
# de_cyberkino.site# This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.2, 7.5.03URL: Description: German Cinema Infos AuthorName: Stefan Schwingeler AuthorEMail: stobs /at/ web . de ContentsDiff: 1 ImageURL: http://www.cyberkino.de/entertainment/kino/.*\.jpg Name: Cyberkino Levels: 2
de_der_pocketstandard.site:
URL: Name: Der PocketStandardLevels: 3ContentsPrint: 1IssuePrint: 1ImageURL: http://.*## This site was converted from an AvantGo .subs file by subs-to-site.pl.# See http://sitescooper.org/ for more information on sitescooper.
de_fool.site:
# de_fool.site# This is a sitescooper site file. see http://sitescooper.org/ # by Rodrigo A. Batista, Version 0.1, 13.12.2000URL: Name: MotleyFool DE Description: Weekly News from the German 'The Motley Fool' (financial site) Levels: 2 ContentsStart: <BODYTEXT> ContentsEnd: <\/BODYTEXT> ContentsCachable: 0 ContentsDiff: 1
de_gazette.site:
# de_gazette.site# This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.1, 6.2.03URL: Name: Die Gazette Description: German plitics magazine AuthorName: Stefan Schwingeler AuthorEMail: stobs /at/ web . de Levels: 2 ImageURL: .*.jpg StoryURL: [A-Za-z]\S+\.html ContentsStart: bordercolor="#CCCCCC" ContentsEnd: /Archiv/Newsletter.html StoryToPrintableSub: { s,([A-Z].+)(\.html),\1-print\2, s,[A-Z],[a-z],} StoryPostProcess: { s/<center>//gi;}
de_gnn.site:
# This is a sitescooper site file. see # by Stefan Schwingeler, Version 0.2, 27.10.1999URL: Name: GNN Levels: 2 StoryURL: http://www.gnn.de/99\d\d/\d+-..\.html ContentsStart: <!-- Special --> ContentsEnd: </HTML> StoryStart: <FONT SIZE=-1> StoryEnd: Meldung als eMail verschicken
de_heise.site:
# This is a sitescooper site file. see # by Stefan Schwingeler, Version 0.2, 27.10.1999# Modified to correct story titles and to delete links at the end of each story# by Peter Marschall, Version 0.4, 6.6.2001URL: Name: Heise Newsticker Levels: 2 MinPages: 2 ContentsStart: <\/HEISEADVERT> ContentsEnd: <\/HTML> ContentsCachable: 0 ContentsDiff: 1 StoryURL: http://www.heise.de/pda/newsticker/m\d+\.html StoryStart: <HTML> StoryEnd: </HTML> StoryCacheable: 1 StoryHeadline: <HEISETEXT>\n<B>(.*?)<\/B> StoryHTMLPreProcess: { s/(?:\s*\/\s+)?<P><HR SIZE=1 NOSHADE><B>URL dieses Artikels:<\/B>.*?<P>Copyright 200\d by Verlag Heinz Heise//igs; s/<HEISETEXT>.<B>(.*?)<\/B>/<HEISETEXT><H2>$1<\/H2>/s; }
de_heise_mobil.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.3, 23.5.03URL: Name: Heise Mobil Description: German Heise Mobil-news Levels: 2 # Ticker ignorieren: ContentsStart: Themen ContentsEnd: <!-- MITTE+RECHTS --> ContentsCachable: 0 ContentsDiff: 1 StoryURL: http://heise.de/mobil/.*/ StoryStart: <HEISETEXT> StoryEnd: </HEISETEXT> StoryCacheable: 1 ImageURL: http://heise.de/mobil/artikel/.*/aufmacher\.jpg # remove small font commands StoryPostProcess: { s/<font size=1>//gi;}
de_heise_tp.site:
# This is a sitescooper site file. see # by Stefan Schwingeler and Carsten Clasohm, Version 0.3, Thu Aug 03 19:07:32 2000# minor correction to "StoryToPrintableSub"URL: Name: Heise Telepolis Levels: 2 ContentsStart: </CONTENTBANNER> ContentsEnd: alt="top of page" ContentsDiff: 1 StoryURL: http://www.heise.de/(tp/.*/\d+/\d.html|bin/tp/issue/dl-artikel.*) StoryCacheable: 1 StoryLifetime: 2 StoryToPrintableSub: s,/tp/.*/(\d+)/\d\.html,/bin/tp/issue/dl-artikel.cgi?artikelnr=\1&rub_ordner=inhalt&mode=html,StoryPostProcess: { s/<font size="\+1">([^<]+)<\/font>/<b>$1<\/b>/sgi; s/<font size="\+2" ?>([^<]+)<\/font><br>/<h2>$1<\/h2>/sgi;}
de_onlinekosten.site:
# This is a sitescooper site file. see # by Larsen Wulff, http://www.multimediaconnection.de/palmpilot, Date: 28.05.2000# olinekosten.deURL: Name: Onlinekosten.de Description: Informationen und Preise rund ums Netz Levels: 2 ContentsStart: <!-- MAIN_CONTENT_TABLE START --> ContentsEnd: <!-- MAIN_CONTENT_TABLE ENDE --> ContentsDiff: 1 StoryURL: http://www.onlinekosten.de/news/tt.\d+.html StoryURL: http://www.onlinekosten.de/news/artikel.* StoryDiff: 1
de_pdassi_news.site:
# de_pdassi_news.site# This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.2, 4.3.03URL: Name: pdassi News Description: German Palm site AuthorName: Stefan Schwingeler AuthorEMail: stobs /at/ web . de Levels: 2 ImageURL: http://pdassi.de/images/.* StoryToPrintableSub: s/SID=[a-z0-9]+/SID=1/ StoryPostProcess: { s/<small>//gi;}
de_pdassi_software.site:
# de_pdassi_software.site# This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.2, 4.3.03URL: AddURL: AddURL: Name: pdassi Software Description: German Palm site AuthorName: Stefan Schwingeler AuthorEMail: stobs /at/ web . de Levels: 2 ImageURL: http://.*/util/screenshot.php\?pid=\d+.* StoryToPrintableSub: s/SID=[a-z0-9]+/SID=1/ StoryPostProcess: { s/align="center"//gi; s/<small>//gi;}
de_spiegel.site:
# de_Spiegel.site# This is a sitescooper site file. see # by Stefan Schwingeler, Version 0.6, 6.2.03# History:# "fixed" by by Larsen Wulff, Larsen@multimediaconnection.de# rewritten with new PDA-link (no pics) by Stefan SchwingelerURL: http://www.spiegel.de/dertag/pda/avantgo/0,1958,r20=1@r21=1@r23=1@r10=1@r22=1@r24=1@r19=1@r139=1@r140=1,00.html Name: Der Spiegel Description: German news magazine AuthorName: Stefan Schwingeler AuthorEMail: stobs /at/ web . de Levels: 2 StoryURL: http://www.spiegel.de/dertag/pda/avantgo/artikel/.*\.html
de_stern.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.1, 10.12.2002URL: #AddURL: http://www.stern.de/pda/?pda=1&rubrik=politik#AddURL: http://www.stern.de/pda/?pda=1&rubrik=wirtschaft#AddURL: http://www.stern.de/pda/?pda=1&rubrik=sport#AddURL: http://www.stern.de/pda/?pda=1&rubrik=kultur#AddURL: http://www.stern.de/pda/?pda=1&rubrik=computer#AddURL: http://www.stern.de/pda/?pda=1&rubrik=campus#AddURL: http://www.stern.de/pda/?pda=1&rubrik=wissenschaft#AddURL: http://www.stern.de/pda/?pda=1&rubrik=lifestyle Name: Stern Levels: 2 ImageURL: .*\.jpg# ContentsURL: http://www.stern.de/pda/\?pda=1\&rubrik=.* ContentsStart: <strong>Lifestyle</strong> ContentsEnd: <!-- FOOTER START --> StoryURL: http://www.stern.de/.*/index.html\?id=\d+\&pda=1 StoryStart: Beginn des Artikels StoryEnd: <!-- FOOTER START --># StoryHeadline: <div id="artikelKopf1">(.*?)</div> StoryHeadline: <h1>(.*?)</h1>
de_tagesschau.site:
# de_tagesschau.site# This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.1, 6.2.03URL: Name: Tagesschau Mobil Description: German news show AuthorName: Stefan Schwingeler AuthorEMail: stobs /at/ web . de Levels: 3 ImageURL: .*/image/.*\.jpg SizeLimit: 1000 Level2Cachable: 0 Level3Cachable: 0 Level4Cachable: 0 ContentsCachable: 0
de_tecchannel.site:
# History: # 1/11/2002: Initial version.URL: Name: tecChannelDescription: IT NewsLevels: 2StoryURL: /mobile/pda/[0-9]+/ContentsStart: - News</font></b><br><br>StoryEnd: Zurück zur ÜbersichtContentsHTMLPreProcess: { s/<hr[^>]*>//gmi; s/<font size="1">- //gmi; s/<\/?font[^>]*>//gmi; s/<\/a><br>/<\/a><br><br>/gmi; s/<b> - /<b>/gmi; s/<br>\s*<br>\s*<br>/<br><br>\n/gmi;}StoryHTMLPreProcess: { s/<\/?font[^>]*>//gmi; s/<b><a[^>]*>$//gmi; s/(<br>\s*){3,}/<br><br>/gmi;} AuthorName: Michael SchubartAuthorEmail: michael@schubart.net
de_teltarif.site:
# This is a sitescooper site file. see # by Stefan Schwingeler, Version 0.2, Sun Jul 16 11:32:59 2000URL: Name: Teltarif Levels: 2 ContentsStart: <!-- Add Ad End --> StoryURL: http://www.teltarif.de/arch/\d\d\d\d/kw\d+/s\d+\.html StoryStart: <!-- Add Ad End --> StoryEnd: Ihre Meinungen und Erfahrungen
de_tvspielfilm.site:
# This is a sitescooper site file. see # by Stefan Schwingeler and Carsten Clasohm, Version 1.0, 17.2.2000# modified by Stefan Schwingeler 25.04.01 11:26: UseTableSmarts: 0URL: # # # # # AddURL: Name: TV-Spielfilm Levels: 2 ContentsDiff: 0 StoryCachable: 0 StoryURL: http://www.tomorrow-newmedia.de/mobile/avantgo/tvs/gen/.* ImageURL: .+\.gif StoryUseTableSmarts: 0
de_welt.site:
# This is a sitescooper site file. see # by Stefan Schwingeler, Version 0.1, 24.11.1999URL: Name: Die Welt Levels: 2 ContentsStart: Tagesinhalts-Übersicht ContentsEnd: ALLE SEITEN ENDE StoryStart: <meta name="robots" StoryEnd: <P>\ <\/p> StoryURL: http://www.welt.de/daten/\d\d\d\d/\d\d/\d\d/.*\.htx StoryPostProcess: { s/align=center//gmi; s/<center>//gmi; }
de_yahoo.site:
# This is a sitescooper site file. see # by Stefan Schwingeler, Version 0.2, 27.10.1999URL: AddURL: AddURL: AddURL: AddURL: AddURL: AddURL: AddURL: Name: Yahoo News DE Levels: 2 ContentsStart: <table cellspacing=0 cellpadding=4> ContentsEnd: <h2>Frühere Meldungen<\/h2> ContentsCachable: 0 StoryURL: http:\/\/de.news.yahoo.com\/99\d+\/\d+\/.*\.html StoryStart: <h2> StoryEnd: <br clear=all> StoryCacheable: 1
mobile2day.site:
# mobile2day.site# This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.2, 6.5.03URL: http://www.mobile2day.de/pdanews_all_palm.html?n_multi=0&nf_id=0&nt_id=3&f_date_m=0&f_date_y=0&t_date_m=0&t_date_y=0&s_text=&isLimit=1 Name: mobile2day Description: German PDA-News AuthorName: Stefan /at/ Schwingeler.de ContentsDiff: 1 Levels: 2 StoryPostProcess: { s/<CENTER>//gi; s/size=\"1\"//gi;}
palmfaq_de.site:
URL: Name: PalmFAQ.deLevels: 2ContentsDiff: 1StoryCacheable: 1
pda_debitel_net.site:
URL: Name: debitel.net Mobile PortalLevels: 4ImageURL: .*[gif|jpg]SizeLimit: 1000
windows2000faq.site:
# windows2000faq.site# This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.1, 6.2.03# this was: ntfaq.siteURL: http://www.windows2000faq.com/Articles/Index.cfm?Action=New Name: Windows2000 FAQ AuthorName: Stefan Schwingeler AuthorEMail: stobs /at/ web . de Levels: 2 ContentsStart: New FAQs in the last 30 days ContentsEnd: <--   ContentsDiff: 1 StoryURL: http://www.windows2000faq.com/Articles/Index.cfm\?ArticleID=\d+ StoryStart: CLASS="title" StoryEnd: <b>Related Articles</b> StoryCachable: 1 StoryPostProcess: { s/<CENTER>//gi;}
zdnet_news.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.1, Date: 030214URL: Name: ZDNet NewsLevels: 2
de_zeit.site:
# This is a sitescooper site file. See # by Carsten Clasohm, version 1.0, 19.11.1999# Modified for new style and extended to a 3 level site# by Peter Marschall, version 1.1, 6.6.2001URL: Name: Zeit Description: Wochenzeitung Die Zeit Levels: 3 SizeLimit: 800 # Uncomment the next two lines to get images too (width setting is for iSilo) #ImageURL: (?:http://www.zeit.de/)?bilder/\d{4}/\d{1,2}.*?\.(?:jpg|png|gif) #ImageScaleToMaxWidth: 152 IssueUseTableSmarts: 0 IssueLinksStart: <!-- Sekundaernavigation Anfang -+ --> IssueLinksEnd: IN DER ZEIT ContentsURL: http://www.zeit.de/(?:politik|wirtschaft|kultur|wissen|media|reisen|leben)/ ContentsStart: <!-- Teaser Reise, Leben, Kulturkalender --> ContentsEnd: <!-- rechte spalte anfang --> StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Politik/\d{5,6}_.*?\.html StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Wirtschaft/\d{5,6}_.*?\.html StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Kultur/\d{5,6}_.*?\.html StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Wissen/\d{5,6}_.*?\.html StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Media/\d{5,6}_.*?\.html StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Reisen/\d{5,6}_.*?\.html StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Leben/\d{5,6}_.*?\.html StoryStart: <b class=.xs.> StoryEnd: (<!-- /Bibliographie -->|\(c\) DIE ZEIT) StoryHeadline: class=.subart.>(?s:.*?-->.*?-->\s*)(.*)<\/font><br> StoryCacheable: 1 # Die Zeit comes weekly on thursdays - ignore it on other days #EvaluatePerl: { # $skip_site = (localtime()[6] != 4); #} ContentsHTMLPreProcess: { s/<!-- Spitzmarke Anfang -+-->.*?<!-- Spitzmarke Ende -+-->\s*//gs; s/(<br>\s*)+<br>/<br>/isg; s/<font [^>]+class="snippletTm".*?>.*?<\/font>//sg; s/<a [^>]+class="mehr".*?>\[Diesen Artikel kommentieren\]<\/a>//sg; s/<b>\s*>{1,2}<\/b>//sg; s/»//sg; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } StoryHTMLPreProcess: { s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; s/<div.*?>//isg; s/<\/div.*?>/<P>/isg; s/>\s*Fotos?[:].*?</></gs; } IssueHTMLPreProcess: { s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_zeit_alternate.site:
# This is a sitescooper site file# by Andreas Mittler, version 1.1, 27.03.2001URL: AddURL: AddURL: AddURL: AddURL: AddURL: AddURL: AddURL: Name: Zeit Levels: 2 StoryURL: http://www.zeit.de/\d+/\d+/.+/\d+_.+\.html StoryStart: <br><br> StoryCacheable: 1
de_zeit_kultur.site:
# This is a sitescooper site file. See # by Peter Marschall, version 1.1, 6.6.2001URL: Name: Zeit Kultur Description: Wochenzeitung Die Zeit - Ressort Kultur Levels: 2 # Uncomment the next two lines to get images too (width setting is for iSilo) #ImageURL: (?:http://www.zeit.de/)?bilder/\d{4}/\d{1,2}.*?\.(?:jpg|png|gif) #ImageScaleToMaxWidth: 152 ContentsCacheable: 0 ContentsStart: <!-- Teaser Reise, Leben, Kulturkalender --> ContentsEnd: <!-- rechte spalte anfang --> StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Kultur/\d{5,6}_.*.html StoryStart: <b class=.xs.> StoryEnd: (<!-- /Bibliographie -->|\(c\) DIE ZEIT) StoryHeadline: class=.subart.>(?s:.*?-->.*?-->\s*)(.*)<\/font><br> StoryCacheable: 1 # Die Zeit comes weekly on thursdays - ignore it on other days #EvaluatePerl: { # $skip_site = (localtime()[6] != 4); #} ContentsHTMLPreProcess: { s/<!-- Spitzmarke Anfang -+-->.*?<!-- Spitzmarke Ende -+-->\s*//gs; s/(<br>\s*)+<br>/<br>/isg; s/<font [^>]+class="snippletTm".*?>.*?<\/font>//sg; s/<a [^>]+class="mehr".*?>\[Diesen Artikel kommentieren\]<\/a>//sg; s/<b>\s*>{1,2}<\/b>//sg; s/»//sg; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } StoryHTMLPreProcess: { s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; s/<div.*?>//isg; s/<\/div.*?>/<P>/isg; s/>\s*Fotos?[:].*?</></gs; }
de_zeit_leben.site:
# This is a sitescooper site file. See # by Peter Marschall, version 1.1, 6.6.2001URL: Name: Zeit Leben Description: Wochenzeitung Die Zeit - Ressort Leben Levels: 2 # Uncomment the next two lines to get images too (width setting is for iSilo) #ImageURL: (?:http://www.zeit.de/)?bilder/\d{4}/\d{1,2}.*?\.(?:jpg|png|gif) #ImageScaleToMaxWidth: 152 ContentsCacheable: 0 ContentsStart: <!-- Teaser Reise, Leben, Kulturkalender --> ContentsEnd: <!-- rechte spalte anfang --> StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Leben/\d{5,6}_.*.html StoryStart: <b class=.xs.> StoryEnd: (<!-- /Bibliographie -->|\(c\) DIE ZEIT) StoryHeadline: class=.subart.>(?s:.*?-->.*?-->\s*)(.*)<\/font><br> StoryCacheable: 1 # Die Zeit comes weekly on thursdays - ignore it on other days #EvaluatePerl: { # $skip_site = (localtime()[6] != 4); #} ContentsHTMLPreProcess: { s/<!-- Spitzmarke Anfang -+-->.*?<!-- Spitzmarke Ende -+-->\s*//gs; s/(<br>\s*)+<br>/<br>/isg; s/<font [^>]+class="snippletTm".*?>.*?<\/font>//sg; s/<a [^>]+class="mehr".*?>\[Diesen Artikel kommentieren\]<\/a>//sg; s/<b>\s*>{1,2}<\/b>//sg; s/»//sg; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } StoryHTMLPreProcess: { s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; s/<div.*?>//isg; s/<\/div.*?>/<P>/isg; s/>\s*Fotos?[:].*?</></gs; }
de_zeit_politik.site:
# This is a sitescooper site file. See # by Peter Marschall, version 1.1, 6.6.2001URL: Name: Zeit Politik Description: Wochenzeitung Die Zeit - Ressort Politik Levels: 2 # Uncomment the next two lines to get images too (width setting is for iSilo) #ImageURL: (?:http://www.zeit.de/)?bilder/\d{4}/\d{1,2}.*?\.(?:jpg|png|gif) #ImageScaleToMaxWidth: 152 ContentsCacheable: 0 ContentsStart: <!-- Teaser Reise, Leben, Kulturkalender --> ContentsEnd: <!-- rechte spalte anfang --> StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Politik/\d{5,6}_.*.html StoryStart: <b class=.xs.> StoryEnd: (<!-- /Bibliographie -->|\(c\) DIE ZEIT) StoryHeadline: class=.subart.>(?s:.*?-->.*?-->\s*)(.*)<\/font><br> StoryCacheable: 1 # Die Zeit comes weekly on thursdays - ignore it on other days #EvaluatePerl: { # $skip_site = (localtime()[6] != 4); #} ContentsHTMLPreProcess: { s/<!-- Spitzmarke Anfang -+-->.*?<!-- Spitzmarke Ende -+-->\s*//gs; s/(<br>\s*)+<br>/<br>/isg; s/<font [^>]+class="snippletTm".*?>.*?<\/font>//sg; s/<a [^>]+class="mehr".*?>\[Diesen Artikel kommentieren\]<\/a>//sg; s/<b>\s*>{1,2}<\/b>//sg; s/»//sg; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } StoryHTMLPreProcess: { s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; s/<div.*?>//isg; s/<\/div.*?>/<P>/isg; s/>\s*Fotos?[:].*?</></gs; }
de_zeit_reisen.site:
# This is a sitescooper site file. See # by Peter Marschall, version 1.1, 6.6.2001URL: Name: Zeit Reisen Description: Wochenzeitung Die Zeit - Ressort Reisen Levels: 2 # Uncomment the next two lines to get images too (width setting is for iSilo) #ImageURL: (?:http://www.zeit.de/)?bilder/\d{4}/\d{1,2}.*?\.(?:jpg|png|gif) #ImageScaleToMaxWidth: 152 ContentsCacheable: 0 ContentsStart: <!-- Teaser Reise, Leben, Kulturkalender --> ContentsEnd: <!-- rechte spalte anfang --> StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Reisen/\d{5,6}_.*.html StoryStart: <b class=.xs.> StoryEnd: (<!-- /Bibliographie -->|\(c\) DIE ZEIT) StoryHeadline: class=.subart.>(?s:.*?-->.*?-->\s*)(.*)<\/font><br> StoryCacheable: 1 # Die Zeit comes weekly on thursdays - ignore it on other days #EvaluatePerl: { # $skip_site = (localtime()[6] != 4); #} ContentsHTMLPreProcess: { s/<!-- Spitzmarke Anfang -+-->.*?<!-- Spitzmarke Ende -+-->\s*//gs; s/(<br>\s*)+<br>/<br>/isg; s/<font [^>]+class="snippletTm".*?>.*?<\/font>//sg; s/<a [^>]+class="mehr".*?>\[Diesen Artikel kommentieren\]<\/a>//sg; s/<b>\s*>{1,2}<\/b>//sg; s/»//sg; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } StoryHTMLPreProcess: { s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; s/<div.*?>//isg; s/<\/div.*?>/<P>/isg; s/>\s*Fotos?[:].*?</></gs; }
de_zeit_wirtschaft.site:
# This is a sitescooper site file. See # by Peter Marschall, version 1.1, 6.6.2001URL: Name: Zeit Wirtschaft Description: Wochenzeitung Die Zeit - Ressort Wirtschaft Levels: 2 # Uncomment the next two lines to get images too (width setting is for iSilo) #ImageURL: (?:http://www.zeit.de/)?bilder/\d{4}/\d{1,2}.*?\.(?:jpg|png|gif) #ImageScaleToMaxWidth: 152 ContentsCacheable: 0 ContentsStart: <!-- Teaser Reise, Leben, Kulturkalender --> ContentsEnd: <!-- rechte spalte anfang --> StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Wirtschaft/\d{5,6}_.*.html StoryStart: <b class=.xs.> StoryEnd: (<!-- /Bibliographie -->|\(c\) DIE ZEIT) StoryHeadline: class=.subart.>(?s:.*?-->.*?-->\s*)(.*)<\/font><br> StoryCacheable: 1 # Die Zeit comes weekly on thursdays - ignore it on other days #EvaluatePerl: { # $skip_site = (localtime()[6] != 4); #} ContentsHTMLPreProcess: { s/<!-- Spitzmarke Anfang -+-->.*?<!-- Spitzmarke Ende -+-->\s*//gs; s/(<br>\s*)+<br>/<br>/isg; s/<font [^>]+class="snippletTm".*?>.*?<\/font>//sg; s/<a [^>]+class="mehr".*?>\[Diesen Artikel kommentieren\]<\/a>//sg; s/<b>\s*>{1,2}<\/b>//sg; s/»//sg; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } StoryHTMLPreProcess: { s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; s/<div.*?>//isg; s/<\/div.*?>/<P>/isg; s/>\s*Fotos?[:].*?</></gs; }
de_zeit_wissen.site:
# This is a sitescooper site file. See # by Peter Marschall, version 1.1, 6.6.2001URL: Name: Zeit Wissen Description: Wochenzeitung Die Zeit - Ressort Wissen Levels: 2 # Uncomment the next two lines to get images too (width setting is for iSilo) #ImageURL: (?:http://www.zeit.de/)?bilder/\d{4}/\d{1,2}.*?\.(?:jpg|png|gif) #ImageScaleToMaxWidth: 152 ContentsCacheable: 0 ContentsStart: <!-- Teaser Reise, Leben, Kulturkalender --> ContentsEnd: <!-- rechte spalte anfang --> StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Wissen/\d{5,6}_.*.html StoryStart: <b class=.xs.> StoryEnd: (<!-- /Bibliographie -->|\(c\) DIE ZEIT) StoryHeadline: class=.subart.>(?s:.*?-->.*?-->\s*)(.*)<\/font><br> StoryCacheable: 1 # Die Zeit comes weekly on thursdays - ignore it on other days #EvaluatePerl: { # $skip_site = (localtime()[6] != 4); #} ContentsHTMLPreProcess: { s/<!-- Spitzmarke Anfang -+-->.*?<!-- Spitzmarke Ende -+-->\s*//gs; s/(<br>\s*)+<br>/<br>/isg; s/<font [^>]+class="snippletTm".*?>.*?<\/font>//sg; s/<a [^>]+class="mehr".*?>\[Diesen Artikel kommentieren\]<\/a>//sg; s/<b>\s*>{1,2}<\/b>//sg; s/»//sg; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } StoryHTMLPreProcess: { s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; s/<div.*?>//isg; s/<\/div.*?>/<P>/isg; s/>\s*Fotos?[:].*?</></gs; }
freebsd_hu.site:
# FreeBSD.hu# Hubidubi (hubidubi@freemail.hu)URL: Name: FreeBSD.huLevels: 1StoryDiff: 1ContentsDiff: 1# If you don't want the logo, comment this line out!ImageURL: http://www.freebsd.hu/images/.*\.jpg
hup_hu.site:
# HUP.hu# Author: Hubidubi (hubidubi@freemail.hu)URL: http://www.hup.hu/modules.php?name=PDAName: HUPLevels: 2# If you don't want the logo, comment this line out!ImageURL: http://www.hup.hu/images/.*\.pngStoryPostProcess: { s/Dátum//gm; s/Cím//gm;}
linux_hu.site:
# Linux.hu# Author: Hubidubi (hubidubi@freemail.hu)URL: Name: Linux.huStoryEnd: Ha te is olvastálStoryPostProcess: { s/" WIDTH=1 HEIGHT=1>//gm; s/Ha te is olvastál.*k!//gm;}
linuxforum_hu.site:
# Linuxforum.hu# Author: Hubidubi (hubidubi@freemail.hu)URL: http://www.linuxforum.hu/modules.php?name=AvantGoName: LinuxforumLevels: 2# If you don't want the logo, comment this line out!ImageURL: http://www.linuxforum.hu/images/.*\.gifStoryPostProcess: { s/Date//gm; s/Cím//gm;}
linuxonline_hu.site:
# LinuxOnline.hu# Author: Hubidubi (hubidubi@freemail.hu)URL: http://www.linuxonline.hu/modules.php?name=AvantGoName: LinuxOnlineLevels: 2# If you don't want the logo, comment this line out!ImageURL: http://www.linuxonline.hu/images/.*\.gifStoryPostProcess: { s/Date//gm; s/Cím//gm;}
metro_hu.site:
# Metro.hu# Author: Hubidubi (hubidubi@freemail.hu)URL: Name: MetroLevels: 3
pdamania_hu.site:
# PDAMania# Author: Hubidubi (hubidubi@freemail.hu)URL: Name: PDAMania.huLevels: 3 StoryStart: <b>::
terminal_hu.site:
# terminal.hu is a Hungarian internet news file.# Author: dlux (dlux@kapu.hu)# Distributed as the part of the sitescooper package URL: Name: Terminal.hu Levels: 2 StoryStart: End Ad StoryEnd: olvasó doboz StoryURL: http://www.terminal.hu/cikk\.php3\?id=\d+ StoryHeadline: <TITLE>.*?([^:]*?)</TITLE>
accuweather_dublin.site:
# Accuweather Dublin# you'll have to go to Accuweather site, find your city and# copy url to the line belowURL: http://www.accuweather.com/adcbin/intlocal_index?wxcity2=DUBLIN&wxcountry=EU;IE AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr> Name: Accuweather Dublin Levels: 1 # other details are picked up automatically from ../lib/layouts.site
evilgerald.site:
URL: Name: The Evil GeraldLevels: 2ContentsUseTableSmarts: 0StoryURL: http://www.evilgerald.com/Issues/Issue\d+/\S+
hackwatch.site:
URL: http://www.iol.ie/~kooltek/welcome.htmlName: Hack Watch NewsDescription: hacks, internet news, telecoms, digital TV, crypto and more, by John McCormacLevels: 1StoryStart: Last Updated At:[^<]+StoryEnd: -- Begin Current News Headlines --StoryDiff: 1
irish_aertel_listings.site:
URL: Name: Aertel TV Listings StoryStart: <font face="Verdana, Arial, Helvetica, sans-serif" size="3" color=".FFFFFF">TV TODAY</font></td> StoryEnd: <-- Previous Page</a> UseTableSmarts: 0 AddURL: AddURL: AddURL: AddURL: AddURL: StoryPostProcess: { s/^\s+//gm; s/^(\d)/\n$1/gm; s/^(Please reload |MAIN INDEX |MAIN NEWS INDEX ).*$//gm; s/^(TV Extra |ENTERTAINMENT INDEX |Last Updated: ).*$//gm; s/^(NEWS HEADLINES ).*$//gm; s/\n\s*\n+/\n\n/gs; s/<hr align="left" size="1" width="350" [^>]+>//gs; }
linux_ie.site:
URL: Name: Linux.ieDescription: Home of the Irish Linux User Group# Thanks to Ken Guest, ILUG webmaster guy, for adding some sitescooper-friendly# comments to the page. Hopefully this'll be HTML-spring-clean proof for quite# a while!Levels: 2ContentsStart: <b>Linux.ie Homepage</b><BR><BR>ContentsEnd: <a href="indigo.ie/"><img src="images/indigopower.gif" border="0"></a>ContentsPrint: 1TableRender: flattenStoryURL: /(reviews|tutorials|articles)/.*\.htmlStoryStart: <TABLE BORDER=0 CELLSPACING=0 CELLPADDING=0 WIDTH="100%">StoryEnd: </HTML>
rte_news_online.site:
URL: Name: RTE News Online Levels: 2 Description: News from RTE, Ireland's national broadcaster ContentsStart: DO NOT REMOVE COMMENTS BELOW THIS LINE ContentsEnd: © \d+ RTÉ News & RTÉ Online StoryURL: http://www.rte.ie/news/[[YYYY]]/\d+/\S+.html StorySkipURL: .*/(morningireland|1news|newsatone|6news|nationwide|9news|qanda).* StoryStart: (THE SIDEBAR CONTENT ENDS|--TEXT TABLE--) StoryEnd: (--AUDIO . VIDEO GOES HERE--|© 1999 RTÉ News) # allow the >More... image to work ImageURL: # remove all table entries for this site. TableRender: flatten
volta_netgains.site:
URL: http://www.voltapublishing.com/cgi-bin/news/index.cgi?f=keyword&keywords=Ireland_shortsName: Volta NetGainsDescription: high-quality daily news round-up of Internet issues and digital mediaLevels: 1StoryStart: <P><B>Search our news archives.</B></P>StoryEnd: </HTML>StoryDiff: 1
jerusalem_post.site:
URL: http://www.jpost.com/servlet/Satellite?pagename=JPost%2FP%2FFrontPage%2FFrontPage&cid=1002116796299Name: JPostDescription: The Jerusalem PostLevels: 3SizeLimit: 2000AuthorName: David Resnick# You've got to import your cookies for this to workRequireCookie: www.jpost.com Aly########## Issue ##########IssueUseTableSmarts: 0IssueLinksStart: SECTIONSIssueLinksEnd: SectionIndex&cid=1006953080001IssueCachable: 0############# Contents #############ContentsUseTableSmarts: 0ContentsCachable: 0ContentsStart: CLASS="lead">ContentsEnd: VALIGN="TOP"><!--OMKT--># Add sections here, but make sure to increase the IssueLinksEnd above# Today's Paper contentsContentsURL: .*pagename=JPost/P/Section/SectionIndex&cid=1006688055060.*# editorial contentsContentsURL: .*pagename=JPost/P/Opinion/SectionIndex&cid=1006953079865.*# columns contentsContentsURL: .*pagename=JPost/P/Section/SectionIndex&cid=1006953079897.*# latest news contentsContentsURL: .*pagename=JPost/P/Section/SectionIndex&cid=1008596981749.*# Arts & Leisure contentsContentsURL: .*pagename=JPost/P/Section/SectionIndex&cid=1006953079917.*# Features contentsContentsURL: .*pagename=JPost/P/Section/SectionIndex&cid=1006953079845.*########## Story ##########StoryURL: .*pagename=JPost/JPArticle.*StoryUseTableSmarts: 0StoryCachable: 0# StoryStart: <span CLASS="topstory"># something wrong with the story start check, used to work with the above formatStoryStart: "topstory"StoryEnd: Printer FriendlyStoryHeadline: topstory(.*)span
haaretz.site:
URL: Name: HaaretzDescription: Haaretz.comLevels: 3SizeLimit: 5000AuthorName: David Resnick########## Issue ##########IssueUseTableSmarts: 0IssueLinksStart: class="t13BNew"IssueLinksEnd: subContrassID=6IssueCachable: 0############# Contents #############ContentsUseTableSmarts: 0ContentsCachable: 0ContentsStart: class="t18BBordo"ContentsEnd: Top.gif# News contentsContentsURL: .*subContrassID=1.*# BusinessContentsURL: .*subContrassID=2.*# editorial contentsContentsURL: .*subContrassID=4.*# Features contentsContentsURL: .*subContrassID=5.*########## Story ##########StoryURL: .*hasen/spages.*StoryUseTableSmarts: 0StoryCachable: 0StoryStart: class="t18B"StoryEnd: HTTP-EQUIV="PRAGMA"
jpost-columns.site:
URL: http://www.jpost.com/servlet/Satellite?pagename=JPost/P/Section/IndexPhoto&cid=1006953079897Name: JPost-columnsDescription: The Jerusalem PostLevels: 2SizeLimit: 150AuthorName: David Resnick# You've got to import your cookies for this to workRequireCookie: www.jpost.com Aly############# Contents #############ContentsStart: <B>Columns</B>ContentsEnd: IST</P>ContentsHTMLPreProcess: { s!onClick="this.href=FCx\(this.href\);"!!gis;}########## Story ##########StoryURL: .*pagename=JPost/JPArticle.*StoryStart: <TD VALIGN="TOP" CLASS="byline">StoryEnd: <TD CLASS="sect">StoryFollowLinks: 1
jpost-international.site:
URL: http://www.jpost.com/servlet/Satellite?pagename=JPost/P/Section/IndexPhoto&cid=1078113566627Name: JPost-internationalDescription: The Jerusalem PostLevels: 2SizeLimit: 150AuthorName: David Resnick# You've got to import your cookies for this to workRequireCookie: www.jpost.com Aly############# Contents #############ContentsStart: <B>International</B>ContentsEnd: IST</P>ContentsHTMLPreProcess: { s!onClick="this.href=FCx\(this.href\);"!!gis;}########## Story ##########StoryURL: .*pagename=JPost/JPArticle.*StoryStart: <TD VALIGN="TOP" CLASS="byline">StoryEnd: <TD CLASS="sect">StoryFollowLinks: 1StoryHTMLPreProcess: { s!<SPAN CLASS="byline"></SPAN>.*<SPAN CLASS="lead">!!gis; s!<P CLASS="bottomline" ALIGN="CENTER">Advertisement<BR>!!gis;}
jpost-israel.site:
URL: http://www.jpost.com/servlet/Satellite?pagename=JPost/P/Section/IndexPhoto&cid=1078027574097Name: JPost-IsraelDescription: The Jerusalem PostLevels: 2SizeLimit: 150AuthorName: David Resnick# You've got to import your cookies for this to workRequireCookie: www.jpost.com Aly############# Contents #############ContentsStart: <B>Israel</B>ContentsEnd: IST</P>ContentsHTMLPreProcess: { s!onClick="this.href=FCx\(this.href\);"!!gis;}########## Story ##########StoryURL: .*pagename=JPost/JPArticle.*StoryStart: <TD VALIGN="TOP" CLASS="byline">StoryEnd: <TD CLASS="sect">StoryFollowLinks: 1StoryHTMLPreProcess: { s!<SPAN CLASS="byline"></SPAN>.*<SPAN CLASS="lead">!!gis; s!<P CLASS="bottomline" ALIGN="CENTER">Advertisement<BR>!!gis;}
jpost-me.site:
URL: http://www.jpost.com/servlet/Satellite?pagename=JPost/P/Section/IndexPhoto&cid=1101615860782Name: JPost-MEDescription: The Jerusalem PostLevels: 2SizeLimit: 150AuthorName: David Resnick# You've got to import your cookies for this to workRequireCookie: www.jpost.com Aly############# Contents #############ContentsStart: <B>Middle East</B>ContentsEnd: IST</P>ContentsHTMLPreProcess: { s!onClick="this.href=FCx\(this.href\);"!!gis;}########## Story ##########StoryURL: .*pagename=JPost/JPArticle.*StoryStart: <TD VALIGN="TOP" CLASS="byline">StoryEnd: <TD CLASS="sect">StoryFollowLinks: 1StoryHTMLPreProcess: { s!<SPAN CLASS="byline"></SPAN>.*<SPAN CLASS="lead">!!gis; s!<P CLASS="bottomline" ALIGN="CENTER">Advertisement<BR>!!gis;}
jpost-opinion.site:
URL: http://www.jpost.com/servlet/Satellite?pagename=JPost/P/Opinion/SectionIndex&cid=1006953079865Name: JPost-opinionDescription: The Jerusalem PostLevels: 2SizeLimit: 150AuthorName: David Resnick# You've got to import your cookies for this to workRequireCookie: www.jpost.com Aly############# Contents #############ContentsStart: <B>Opinion</B>ContentsEnd: EDITOR'S PICKSContentsHTMLPreProcess: { s!onClick="this.href=FCx\(this.href\);"!!gis;}########## Story ##########StoryURL: .*pagename=JPost/JPArticle.*StoryStart: <TD VALIGN="TOP" CLASS="byline">StoryEnd: <TD CLASS="sect">StoryFollowLinks: 1StoryHTMLPreProcess: { s!<SPAN CLASS="byline"></SPAN>.*<SPAN CLASS="lead">!!gis; s!<P CLASS="bottomline" ALIGN="CENTER">Advertisement<BR>!!gis;}
jp_japan_times_business.site:
# History: # 12/30/2001: Initial version.URL: Name: Japan Times BusinessDescription: English language news of Japan, "Business" section.Levels: 2StoryURL: /cgi-bin/getarticle.pl.*ContentsStart: </CENTER><BR>ContentsEnd: <! bottombarstart>StoryStart: <! staticstart>StoryEnd: <! bottombarstart>ContentsHTMLPreProcess: { s/<\/font><\/a><br>\n\n/<\/a><br><br>/gmi; s/<\/?font[^>]*>//gmi;} StoryHTMLPreProcess: { s/<\/?font[^>]*>//gmi;} AuthorName: Michael SchubartAuthorEmail: michael@schubart.net
jp_japan_times_news.site:
# History: # 12/30/2001: Initial version.URL: Name: Japan Times NewsDescription: English language news of Japan, "News" section.Levels: 2StoryURL: /cgi-bin/getarticle.pl.*ContentsStart: </CENTER><BR>ContentsEnd: <! bottombarstart>StoryStart: <! staticstart>StoryEnd: <! bottombarstart>ContentsHTMLPreProcess: { s/<\/font><\/a><br>\n\n/<\/a><br><br>/gmi; s/<\/?font[^>]*>//gmi;} StoryHTMLPreProcess: { s/<\/?font[^>]*>//gmi;} AuthorName: Michael SchubartAuthorEmail: michael@schubart.net
jp_daily_yomiuri_english.site:
# History: # 12/31/2001: Initial version.URL: Name: Daily Yomiuri EnglishDescription: English language news of JapanLevels: 2StoryURL: /newse/.*ContentsStart: Latest news as of:ContentsEnd: <!-- Language Labo -->ContentsHTMLPreProcess: { s/<img[^>]*alt="([^"]*)"[^>]*>/<b>$1<\/b>/gmi; s/ target = "main"//gmi; s/<\/?ul>//gmi; s/<li>/<br>/gmi; s/<\/?font[^>]*>//gmi;}StoryHTMLPreProcess: { s/<\/?font[^>]*>//gmi;} AuthorName: Michael SchubartAuthorEmail: michael@schubart.net
ny_post.site:
URL: Name: New York PostLevels: 3
christchurch_press.site:
# Christchurch Press Site coverted to doc format using sitescooper# #URL: http://www.stuff.co.nz/inl/index/0,1008,0a1561,FF.html Name: Christchurch Press Levels: 2 ContentsStart: All the material on this page has the protection of international copyright. All rights reserved ContentsEnd: TOP OF PAGE StoryStart: All the material on this page has the protection of international copyright. All rights reserved StoryEnd: TOP OF PAGE StoryURL: http://www.stuff.co.nz/inl/index/.*\.html
gist_tv.site:
# gist_tv.site# For gist.com TV Listings## To customize these listings (the "uid" in the URL below is for my settings), set up a # normal account at gist.com, set up the "Handheld Gist" feature on the main page,# do any setup necessary for the handheld version (the service seems a little shaky right now, # it may take some work).## When you get to the step where you are supposed to click "submit" to install the # AvantGo channel, just view the source of the page you're looking at instead and look # for the uid number in the source near the submit button. Replace it below, and you're all # set. Put the URL in your browser to make sure you've got it set up right.## Levels: 3 because that's what the gist AvantGo .subs file indicated. Not sure why.# AuthorName: Justin Henry <jhenry@fjicl.com>URL: http://avantgo.gist.com/tv/avantgo/index.jsp?uid=541624Name: GIST TV ListingsLevels: 3ImageURL: .*
whyytv12.site:
URL: Name: WHYY Philadelphia TV12/91FMLevels: 3ContentsPrint: 1IssuePrint: 1ImageURL: http://.*## This site was converted from an AvantGo .subs file by subs-to-site.pl.# See http://sitescooper.org/ for more information on sitescooper.
ctc-movies-metro.site:
URL: Name: ClickTheCity.com - Metro Manila Movie GuideLevels: 3AuthorName: Barry Dexter A. GonzagaAuthorEMail: barryg-sitescooper /at/ kssp.upd.edu.phContentsURL: /movies/movie.asp.movid=.*StoryURL: /movies/theater.asp.theid=.*IssueLinksStart: Movie Guide HomeIssueLinksEnd: active.macromedia.comContentsStart: Movie Guide HomeContentsEnd: active.macromedia.comStoryStart: Movie Guide HomeStoryEnd: active.macromedia.com
inq7.site:
URL: Name: INQ7 ExpressDescription: The Philippine Daily Inquirer and GMA Network News Web siteLevels: 2AuthorName: Barry Dexter A. GonzagaAuthorEMail: barryg-sitescooper /at/ kssp.upd.edu.phStoryURL: /express/html_output/.*StoryURL: http://money\.inq7\.net/topstories/printable_topstories\.php.*ImageURL: /express/html_output/.*\.gifImageURL: http://www\.inq7money\.net/images/header/.*\.gifContentsStart: class="mainContent">ContentsEnd: <!-- Creative for 120x600 format -->StoryStart: class="mainContent">StoryEnd: <!-- Creative for 120x600 format -->
seattle_p_i.site:
# Site file contributed by Jason Simpson <jason /at/ xio.com>#URL: Name: Seattle P-I Levels: 2# ContentsStart: </center># ContentsEnd: </body> StoryURL: \S+\.shtml# StoryStart: </center># StoryEnd: </body>
elmundo_culture.site:
# Author: Sergi Pusó <sergi /at/ iagora.net>URL: Name: El Mundo CulturaDescription: Culture news from spanish newspaper El MundoLevels: 2StoryURL: http://www.elmundo.es/diario/cultura/.*.htmlStoryStart: <!--FIN CABECERA--> StoryEnd: <!--TABLA VOTACION-->
elmundo_economy.site:
# Author: Sergi Pusó <sergi /at/ iagora.net>URL: Name: El Mundo EconomiaDescription: Economy news from spanish newspaper El MundoLevels: 2StoryURL: http://www.elmundo.es/diario/economia/.*.htmlStoryStart: <!--FIN CABECERA--> StoryEnd: <!--TABLA VOTACION-->
elmundo_europe.site:
# Author: Sergi Pusó <sergi /at/ iagora.net>URL: Name: El Mundo EuropaDescription: Europe news from spanish newspaper El MundoLevels: 2StoryURL: http://www.elmundo.es/diario/europa/.*.htmlStoryStart: <!--FIN CABECERA--> StoryEnd: <!--TABLA VOTACION-->
elmundo_international.site:
# Author: Sergi Pusó <sergi /at/ iagora.net>URL: Name: El Mundo InternacionalDescription: International news from spanish newspaper El MundoLevels: 2StoryURL: http://www.elmundo.es/diario/internacional/.*.htmlStoryStart: <!--FIN CABECERA--> StoryEnd: <!--TABLA VOTACION-->
elmundo_national.site:
# Author: Sergi Pusó <sergi /at/ iagora.net>URL: Name: El Mundo NacionalDescription: Spain news from spanish newspaper El MundoLevels: 2StoryURL: http://www.elmundo.es/diario/espana/.*.htmlStoryStart: <!--FIN CABECERA--> StoryEnd: <!--TABLA VOTACION-->
elmundo_society.site:
# Author: Sergi Pusó <sergi /at/ iagora.net>URL: Name: El Mundo SociedadDescription: Society news from spanish newspaper El MundoLevels: 2StoryURL: http://www.elmundo.es/diario/sociedad/.*.htmlStoryStart: <!--FIN CABECERA--> StoryEnd: <!--TABLA VOTACION-->
elmundo_sports.site:
# Author: Sergi Pusó <sergi /at/ iagora.net>URL: Name: El Mundo DeportesDescription: Sports news from spanish newspaper El MundoLevels: 2StoryURL: http://www.elmundo.es/diario/deportes/.*.htmlStoryStart: <!--FIN CABECERA--> StoryEnd: <!--TABLA VOTACION-->
le_temps.site:
# script sitescooper pour le site du Temps, quotidien suisse# édité à Genève# écrit par Vincent Oberson (reverso@club-internet.fr)# avec les précieux conseils de Jacques Turbé et Pierre-Yves LetournelURL: http://www.letemps.ch/template/default.asp?page=sommaireName: Le TempsLevels: 2StoryURL: http://www.letemps.ch/template/.*ContentsStart: >: <ContentsEnd: <a name="débat">StoryStart: document.frmPrint.submit()StoryEnd: La Une
globe_and_mail_columnists.site:
# The Globe and Mail is a general interest newspaper# based in Toronto, Canada.## This script scoops the Stories presented in# the paper's National Columnists section.URL: Name: G&M Columnists Levels: 2 ContentsStart: <!-- /fragments/nav/HubNav_National.html ends --> ContentsEnd: Complete Index of Today's Print Headlines</b></font></a> # StoryURL: http://www\.globeandmail\.com/servlet/\S*&hub=national StoryURL: http://www\.globeandmail\.com/servlet\S*hub=national\S* StoryStart: <!-- Full Story Header --> StoryEnd: <!-- Full Story Footer --># This story processor slows things down a lot, but# it removes the annoying text "PRINT EDITION" that# appears above every story.StoryPostProcess: { s{^<.*><b>PRINT EDITION</b><.*>$}{}mg;}
globe_and_mail_national.site:
# The Globe and Mail is a general interest newspaper# based in Toronto, Canada.## This script scoops the National news stories# presented on the paper's homepage.URL: Name: G&M National Levels: 2 ContentsStart: <!-- /fragments/nav/HubNav_National.html ends --> ContentsEnd: <b>Additional National Stories</b> # Use the following if you want to include the "Additional National # Stories" at the bottom of the page: # # ContentsEnd: <!-- /fragments/completeheadlineindex.html begins --> # StoryURL: http://www\.globeandmail\.com/servlet/\S*&hub=national StoryURL: http://www\.globeandmail\.com/servlet\S*hub=national\S* StoryStart: <!-- Full Story Header --> StoryEnd: <!-- Full Story Footer --># This story processor slows things down a lot, but# it removes the annoying text "PRINT EDITION" that# appears above every story.StoryPostProcess: { s{^<.*><b>PRINT EDITION</b><.*>$}{}mg;}
globe_and_mail_thearts.site:
# The Globe and Mail is a general interest newspaper# based in Toronto, Canada.## This script scoops the stories presented in# the paper's "The Arts" section.URL: Name: G&M The Arts Levels: 2 ContentsStart: <!-- /fragments/nav/HubNav_TheArts.html ends --> ContentsEnd: <!-- /fragments/completeheadlineindex.html begins --> # StoryURL: http://www\.globeandmail\.com/servlet/\S*&hub=national StoryURL: http://www\.globeandmail\.com/servlet\S*hub=thearts\S* StoryStart: <!-- Full Story Header --> StoryEnd: <!-- Full Story Footer --># This story processor slows things down a lot, but# it removes the annoying text "PRINT EDITION" that# appears above every story.StoryPostProcess: { s{^<.*><b>PRINT EDITION</b><.*>$}{}mg;}
globe_and_mail_toronto.site:
# The Globe and Mail is a general interest newspaper# based in Toronto, Canada.## This script scoops the Stories presented in# the paper's National Toronto section.URL: Name: G&M Toronto Levels: 2 ContentsStart: <!-- /fragments/nav/HubNav_National.html ends --> ContentsEnd: Complete Index of Today's Print Headlines</b></font></a> # StoryURL: http://www\.globeandmail\.com/servlet/\S*&hub=national StoryURL: http://www\.globeandmail\.com/servlet\S*hub=national\S* StoryStart: <!-- Full Story Header --> StoryEnd: <!-- Full Story Footer --># This story processor slows things down a lot, but# it removes the annoying text "PRINT EDITION" that# appears above every story.StoryPostProcess: { s{^<.*><b>PRINT EDITION</b><.*>$}{}mg;}
bbc_news_front.site:
URL: Name: BBC Front Page Levels: 2
bbc_news_health.site:
URL: Name: BBC News Health Levels: 2
bbc_news_sci_tech.site:
URL: Name: BBC News Sci-Tech Levels: 2 ContentsDiff: 1
bbc_news_world.site:
URL: Name: BBC World News Levels: 2
the_guardian.site:
# The UK Guardian# Site file for Sitescooper (http://jmason.org/software/sitescooper/)# Written by: Jason Yanowitz <yanowitz /at/ poboxes.com># Last updated: Oct 2 2001 jm URL: Name: UK GuardianLevels: 2StoryURL: http://www.guardian.co.uk/.*/story/.*StoryStart: <.-- Navbar: no scribbling --><.-- Vignette StoryServer 4 [^>]+ -->StoryEnd: riangle_up.gifStoryHeadline: Guardian Unlimited \| The Guardian \| (.*)
gabriels_mobile_channel.site:
# Gabriel's Mobile Channel# Weekly Roman Catholic lectionary readings, daily meditations, and# religious news# Site file by Joe Pfeiffer, pfeiffer /at/ cs.nmsu.edu#URL: http://www.erienet.net/~stjoseph/ppcchannel/gabehomepp.html Name: Gabriels Channel Levels: 3 IssueUseTableSmarts: 0 IssueLinksStart: (<BODY>|<body>) IssueLinksEnd: (</BODY>|</body>) ContentsStart: (<BODY>|<body>) ContentsEnd: (</BODY>|</body>)# ContentsPrint: 1 StoryDiff: 1
scifiwire.site:
# Author: MMiller /at/ media-general.com (thanks!)URL: Name: SciFi Wire Levels: 2 StoryDiff: 1
archaeology_org.site:
URL: Name: Archaeology Org NewsLevels: 2ContentsStart: <!--Begin main table-->ContentsEnd: <!-- End news-->StoryURL: http://.*
explorezone.site:
# URL thanks to http://members.bellatlantic.net/~blumax/plink.htmlURL: Name: ExploreZoneLevels: 2StoryURL: http://explorezone.space.com/go/.*ImageURL: http://explorezone.space.com/go/.*StoryDiff: 1StoryCacheable: 1
grahamhancock.site:
URL: Name: HancockLevels: 2ContentsStart: <div class=newsItem id=newsItem>ContentsEnd: <a href="news/index.php?archive=1" target="_top" title="NewsDesk Archive" class="text">News desk archive...</a>StoryURL: http://.*
new_scientist.site:
URL: Name: New Scientist Levels: 2 AddURL: StoryURL: http://www.newscientist.com/\S+/\S+_\d+\.htm.* StoryURL: http://www.newscientist.com/\S+/\S+\.jsp\?id=\S+ ContentsStart: (magazine contents|Index Table) ContentsEnd: More than 1500 science, technology and academic vacancies StoryStart: <b class="(?:newsarthead|letterhead|heading|bktitle|intvwhead)"> StoryEnd: For exclusive insights into the most important developments in StoryHeadline: <b class="(?:newsarthead|letterhead|heading|bktitle|intvwhead)">(.*?)</b>
new_scientist_news.site:
# New Scientist News in RSS formatURL: http://www.newscientist.com/feed.ns;jsessionid=HOGGBFOGNOAA?index=online-newsName: New Scientist NewsContentsFormat: rssContentsDiff: 1StoryToPrintableSub: s,(id=.+),$1\&print=true,StoryToPrintableSub: s,\&feedId=.*,\&print=true,StoryStart: <div id="printbody">StoryEnd: <div class="artlinks">
science_daily.site:
URL: AuthorName: Derek Glidden <dglidden /at/ illusionary.com> Name: Science Daily Headlines Levels: 2 StoryURL: http://www.sciencedaily.com/releases/.* StoryStart: NEXT StoryEnd: RELATED
smithsonian.site:
URL: Name: Smithsonian Description: Smithsonian Magazine Levels: 2 StoryURL: http://www.smithsonianmag.si.edu/smithsonian/.*
spaceref.site:
URL: AuthorName: Derek Glidden <dglidden /at/ illusionary.com> Name: SpaceRef.com Levels: 3 ContentsURL: http://www.spaceref.com/avantgo/(srnews|pressr|events).html ContentsPrint: 1 StoryURL: http://www.spaceref.com/avantgo/viewnews.html?.* StoryURL: http://www.spaceref.com/avantgo/viewpr.html?.* StoryURL: http://www.spaceref.com/avantgo/calendar.html?.*
crypto_gram.site:
URL: Name: Crypto-Gram Levels: 2 ContentsStart: <!--* begin page content --*> ContentsEnd: <!--* end page content --*> StoryURL: /crypto-gram-[[YY]]([[MM]]|[[MM-1]]|[[MM-2]]|[[MM-3]])\.html StoryStart: <!--* begin page content --*> StoryEnd: <!--* end page content --*># fixed by Derek Glidden <dglidden /at/ illusionary.com># and Adrian Colley aecolley /at/ spamcopdotnet
cryptome.site:
URL: Name: Cryptome Levels: 2 ContentsStart: <HTML> ContentsEnd: </HTML> StoryURL: http://cryptome.org/\S+\.html? ContentsDiff: 1
GSR_Appearance_Mods.site:
#GS Resources Appearance Mods Forum#You will appreciate this site more if you#are a diehard fan of Classic Suzuki Motorcycles URL: http://www.thegsresources.com/_forum/viewforum.php?f=8Name: GSR Appearance ModsDescription: Suzuki GS Resources Appearance ModsAuthorName: Delmer Wells -- delmer at delmer dot comLevels: 2ContentsStart: nowrap="nowrap"> Last Post </th>ContentsEnd: <td class="catBottom"#The following strips out view count and last poster's nameContentsHTMLPreProcess: { s/<td class="row2".*?<td class="row1" width="100%"//gs; }StoryStart: Posted:StoryEnd: class= <span class="topictitle"><a
GSR_Bike.site:
#GS Resources Bike of the Month#You will appreciate this site more if you#are a diehard fan of Classic Suzuki MotorcyclesURL: Name: GSR Bike of The MonthDescription: Suzuki GS PageStoryDiff: 1ImageOnlySite: 1ImageURL: http://www.thegsresources.com/images/monthly_photo/.*jpgImageScaleToMaxWidth: 500AuthorName: Delmer Wells Levels: 2
GSR_General_Disc.site:
#GS Resources General Discussion Forum#You will appreciate this site more if you#are a diehard fan of Classic Suzuki MotorcyclesURL: http://www.thegsresources.com/_forum/viewforum.php?f=3Name: GSR General DiscussionDescription: Suzuki GS PageAuthorName: Delmer Wells Levels: 2# ContentsUseTableSmarts: 1# TableRender: list#ContentsFormat: RSSContentsStart: Announcement:ContentsEnd: <td class="catBottom"#gensmall"></span></td>ContentsHTMLPreProcess: { s/<td class="row2".*?<td class="row1" width="100%"//gs; }# name= "jumpbox"# StoryURL: http://www.thegsresources.com/_forum/viewtopic.php?t=\d+StoryStart: Posted:StoryEnd: class= <span class="topictitle"><a# StoryHeadline: <META NAME="headline" CONTENT="(.*?)">
GSR_Owners.site:
URL: http://www.thegsresources.com/_forum/viewforum.php?f=4Name: GSR OwnersDescription: Suzuki GS PageAuthorName: Delmer Wells AuthorEmail: delmer at delmer dot comLevels: 2# ContentsUseTableSmarts: 1# TableRender: list#ContentsFormat: RSSContentsStart: nowrap="nowrap"> Last Post </th>ContentsEnd: <td class="catBottom"#gensmall"></span></td>#The following strips out view count and last poster's nameContentsHTMLPreProcess: { s/<td class="row2".*?<td class="row1" width="100%"//gs; }# name= "jumpbox"# StoryURL: http://www.thegsresources.com/_forum/viewtopic.php?t=\d+StoryStart: Posted:StoryEnd: class= <span class="topictitle"><a# StoryHeadline: <META NAME="headline" CONTENT="(.*?)">
GSR_Performance_Mods.site:
URL: http://www.thegsresources.com/_forum/viewforum.php?f=9Name: GSR Performance ModsDescription: Suzuki GS Performance ModsAuthorName: Delmer Wells -- delmer at delmer dot comLevels: 2ContentsStart: nowrap="nowrap"> Last Post </th>ContentsEnd: <td class="catBottom"#The following strips out view count and last poster's nameContentsHTMLPreProcess: { s/<td class="row2".*?<td class="row1" width="100%"//gs; }StoryStart: Posted:StoryEnd: class= <span class="topictitle"><a
GSR_Stories.site:
URL: http://www.thegsresources.com/_forum/viewforum.php?f=5Name: GSR Stories ForumDescription: Suzuki GS PageAuthorName: Delmer Wells Levels: 2# ContentsUseTableSmarts: 1# TableRender: list#ContentsFormat: RSSContentsStart: nowrap="nowrap"> Last Post </th>ContentsEnd: <td class="catBottom"#gensmall"></span></td>#The following strips out view count and last poster's nameContentsHTMLPreProcess: { s/<td class="row2".*?<td class="row1" width="100%"//gs; }# name= "jumpbox"# StoryURL: http://www.thegsresources.com/_forum/viewtopic.php?t=\d+StoryStart: Posted:StoryEnd: class= <span class="topictitle"><a# StoryHeadline: <META NAME="headline" CONTENT="(.*?)">
GSR_Technical.site:
URL: http://www.thegsresources.com/_forum/viewforum.php?f=7Name: GSR Technical ForumDescription: Suzuki GS PageAuthorName: Delmer Wells Levels: 2# ContentsUseTableSmarts: 1# TableRender: list#ContentsFormat: RSSContentsStart: nowrap="nowrap"> Last Post </th>ContentsEnd: <td class="catBottom"#gensmall"></span></td>#The following strips out view count and last poster's nameContentsHTMLPreProcess: { s/<td class="row2".*?<td class="row1" width="100%"//gs; }# name= "jumpbox"# StoryURL: http://www.thegsresources.com/_forum/viewtopic.php?t=\d+StoryStart: Posted:StoryEnd: class= <span class="topictitle"><a# StoryHeadline: <META NAME="headline" CONTENT="(.*?)">
GSR_Tips-n-Tricks.site:
URL: http://www.thegsresources.com/_forum/viewforum.php?f=11Name: GSR Tips & TricksDescription: Suzuki GS PageAuthorName: Delmer Wells Levels: 2# ContentsUseTableSmarts: 1# TableRender: list#ContentsFormat: RSSContentsStart: nowrap="nowrap"> Last Post </th>ContentsEnd: <td class="catBottom"#gensmall"></span></td>#The following strips out view count and last poster's nameContentsHTMLPreProcess: { s/<td class="row2".*?<td class="row1" width="100%"//gs; }# name= "jumpbox"# StoryURL: http://www.thegsresources.com/_forum/viewtopic.php?t=\d+StoryStart: Posted:StoryEnd: class= <span class="topictitle"><a# StoryHeadline: <META NAME="headline" CONTENT="(.*?)">
cnn_sports.site:
# CNN SportsURL: # created from PODS file by David A. DesrosiersAuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr>Name: CNN SportsLevels: 2ImageURL: .*\.gifImageScaleToMaxWidth: 150ContentsCachable: 0StoryURL: http://wireless.cnn.com/avantgo/CNNSI/.*StoryCachable: 1
mobilebikes.site:
URL: Name: MobileBikesLevels: 4AuthorName: Barry Dexter A. GonzagaAuthorEMail: barryg /at/ kssp.upd.edu.phStoryURL: /mobile/.*\.htmImageURL: /images/.*\.gif
yahoo_sport_news.site:
# Yahoo- Top stories page site grabber# Written by: Saunders, Richard <risaunde@usa.capgemini.com>URL: Name: Yahoo! Sports News# Theres two levels: the news index followed by each story.Levels: 2# There are lots of links off the page, but not all are stories.StoryURL: http://dailynews.yahoo.com/h/nm/\d+/sp/.*\.html# Yahoo provides great comments to stop sitescooper from breaking!StoryStart: !-- TextStart --StoryEnd: !-- TextEnd --
anandtech.site:
URL: Name: AnandTechLevels: 1StoryStart: <a name="TopNews">StoryEnd: Copyright © \d+-\d+ AnandTech, Inc. All rightsUseTableSmarts: 0
ars_technica.site:
URL: Name: Ars Technica StoryStart: <STRONG><SMALL>From the News Desk</SMALL></STRONG> StoryEnd: </html> StoryDiff: 1
computer_world.site:
URL: Name: ComputerWorldDescription: ComputerWorldAuthorName: Goh Boon Nam# ComputerWorld - US# Note - PDB output filesize typically around 80KB# Version 1.0# Date updated : 22 Aug 2003# Changes for 1.0 : 1st time creationLevels: 2ContentsStart: Center cell primary contentContentsEnd: -- secondary --StoryToPrintableSub: s!http://www.computerworld.com/.*/story/0,(.*?),!http://www.computerworld.com/printthis/2003/0,4814,!StoryURL: http://www.computerworld.com/printthis/.*StoryStart: -- Begin top component of story --StoryEnd: -- End body content --StoryHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/ //gim; }
firstmonday.site:
URL: Name: First Monday Description: a peer-reviewed journal on the internet Levels: 2 TableRender: flatten StoryURL: http://firstmonday.org/issues/issue\S+/\S+/(index.html|) StoryURL: http://firstmonday.org/issues/current_issue/\S+/(index.html|) ImageURL: .*/img/.*\.gif ImageScaleToMaxWidth: 150 AuthorName: Dwight D. McKay and Justin Mason
infoworld.site:
URL: Name: InfoWorld to GoLevels: 3ContentsPrint: 1IssuePrint: 1ImageURL: http://.*ContentsURL: http://www.infoworld.com/togo/.*StoryURL: http://www.infoworld.com/togo/.*## This site was converted from an AvantGo .subs file by subs-to-site.pl.# See http://sitescooper.org/ for more information on sitescooper.
joelonsoftware.site:
URL: Name: Joel on Software Levels: 2# ContentsStart: NEW:# ContentsEnd: TAKE ACTION StoryURL: http://www.joelonsoftware.com/articles/.* ContentsDiff: 1# by Akkana Peck <akkana /at/ shallowsky.com>
newsforge.site:
URL: Name: NewsForge Levels: 2 ContentsStart: <div class="article_box"> ContentsDiff: 1 StoryStart: <div class="article_box"> StoryURL: http://.*\.newsforge\.com/.* ContentsHTMLPreProcess: { s/<B>//gm; }
oreillynet_features.site:
URL: Name: O'ReillyNet FeaturesDescription: Features from across the O'Reilly NetworkLevels: 2ContentsStart: -- weekly package --ContentsEnd: -- top five --# ContentsDiff: 1StoryURL: http://www.oreillynet.com/pub/a/\S+/\d+/([[MM]]|[[MM-1]])/\d+/\S+.htmlStoryURL: http://www.oreillynet.com/pub/a/\S+/\d+/([[MM]]|[[MM-1]])/\d+/\S+.html.page=\d+StoryStart: -- content here --StoryEnd: -- sponsor column --StoryFollowLinks: 1
os_opinion.site:
URL: Name: OS Opinion Levels: 2 ContentsStart: <TABLE width="100%" border="0" cellspacing="5" ContentsEnd: <TD width="3%" valign="top" height="3587" bgcolor="#FFFFFF" align="center"> StoryURL: /Opinions/.*\.html StoryStart: .BeginEditable .Content%20image%20area. StoryEnd: .EndEditable StoryFollowLinks: 1 StoryHeadline: <.-- .BeginEditable .doctitle. -->(.*?)<.-- .EndEditable -->
pcmag_images.site:
URL: Name: PCMagazine-BiWedDescription: PCMagazineAuthorName: Goh Boon Nam# PC Magazine with Images# Note - PDB output filesize typically more than 500KB# Version 1.4# Date updated : 8 Jun 2005# Changes for 1.5 : Changes to cater to new ContentsEnd & StoryEnd# Also to cater to ill-formed img tags in PCMag html# and to take in slide show photosLevels: 2ContentsStart: BEGIN MAIN TABLEContentsEnd: <!-- include file="display_homepage_line_break.asp" -->StoryURL: http://www.pcmag.com/article2/.*StoryURL: http://www.pcmag.com/slideshow/.*ImageURL: http://common.ziffdavisinternet.com/util_get_image/\d+.*ImageURL: http://www.pcmag.com/images/(.*?)dot.gifStoryStart: BEGIN MAIN TABLEStoryEnd: (OpenSaveArticleWindow|<div class="slideshow_caption">)StoryHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/<div id="oc_header">(.*?)<\/div>//gis; s/<div id="online_classified">(.*?)<\/div>//gis; s/<div class="article_price_container">(.*?)<\/div>//gis; s/<table width="100%" cellpadding="0" cellspacing="0" border="0" class="Article_Header_Table">(.*?)<\/table>//gis; s/<table width="100%" cellpadding="0" cellspacing="0" border="0" class="PG_Header_Table">(.*?)<\/table>//gis; s/<br\/>//gim; s/<div id="Premium_Partners_Container">(.*?)<\/div>//gis; s/>All Shots<\/a>/>next ><\/a>/gim; s/jpg"\/>/jpg">/gim; s/gif"\/>/gif">/gim; }# Notes# -----# s/>All Shots<\/a>/>next ><\/a>/gim; -> trick sitescooper to follow link to All Shots page# s/jpg"\/>/jpg">/gim; -> clean up malformed img tag - eg. in All Shots page# s/gif"\/>/gif">/gim; -> clean up malformed img tag - eg. in All Shots page
risks.site:
URL: Name: comp.risks Levels: 2 StoryURL: http://catless.ncl.ac.uk/go/risks/\d+/\d+/\d+ MinPages: 2 StoryHeadline: <b>(.+?)</b>
slashdot_top.site:
## Slashdot, top level only, no stories.#URL: http://slashdot.org/index.pl?simpledesign=1&lowbandwidth=1Name: SlashDot TopDescription: News for Nerds, Stuff that MattersLevels: 1# In a single-level scoop, sitescooper uses StoryStart/StoryEnd# even for the first page.StoryStart: <div class="article">StoryEnd: <div class="btmnav">
slyck.site:
URL: Name: SlyckLevels: 2ContentsStart: <img src=pics/slyck_news.gif>ContentsEnd: <img src=pics/new_rel.gif usemap=#nrmap border=0>StoryURL: http://.*
techdirt.site:
URL: http://www.techdirt.com/search.pl?query=&topic=§ion=&author=Name: TechDirtStoryURL: /(articles|fotr)/\d+/\S+htmlStoryStart: <TABLE width=99% cellpadding=0 cellspacing=0 border=0StoryEnd: This site was built onContentsStart: <SELECT name=author>ContentsEnd: This site was built onContentsDiff: 1Levels: 2MinPages: 2
the_register.site:
# Modified to include less unnecessary text, bold titles, ...# by Peter Marschall, Version 1.1, 3.11.2000URL: Name: The Register Levels: 2# ContentsStart: <div id=\"Index\"> ContentsStart: <h2>Headlines</h2> ContentsDiff: 1 ContentsUseTableSmarts: 0 StoryURL: http://www.theregister.co.uk/.*# StoryHeadline: <DIV CLASS="storyhead">(.*?)</DIV> StoryStart: <div class=\"IconsTop\"> StoryCacheable: 1 # This probably isn't relevant any more: #StoryHTMLPreProcess: { # s/<DIV CLASS=.storyhead.>(.*?)<\/DIV>/<H2 CLASS='storyhead'>$1<\/H2>/is; # s/<br>.<br><B>Related (?:[sS]tory|[sS]tories|[lL]ink|[lL]inks)<\/B>.*\Z//s; # s/<br>+/<br>/i; # s/<br><p>(?:<br>)*/<p>/i; #} #MinPages: 2
wiredmag.site:
URL: # Also check out: Name: Wired Description: Wired Magazine Levels: 2 ContentsStart: <div id="mainStories"> StoryURL: http://www.wired.com/.* StoryStart: <div class="storyTxt">
xmlhack.site:
URL: http://xmlhack.com/dlist.php?date=[[MM]]-[[YYYY]]Name: XMLHackDescription: Developer news from the XML communityLevels: 2ContentsStart: <BODYContentsEnd: <TD VALIGN="TOP" WIDTH=241 ALIGN="RIGHT">ContentsDiff: 1StoryURL: /read.php.*StoryStart: <BODYStoryEnd: <TD VALIGN="TOP" WIDTH=241 ALIGN="RIGHT">MinPages: 2
zzz.site:
# Sitescooper site file for ZZZ Online# Written by Alastair Rankine <arankine@avaya.com>#URL: Name: ZZZ OnlineDescription: Technology newsLevels: 2ContentsDiff: 1StoryURL: http://zzz.com.ru/\d+.htmlImageURL: http://zzz.com.ru/pic\d+.jpgImageURL: http://zzz.com.ru/\d+num\d+.jpgImageScaleToMaxWidth: 156ContentsStart: <!-- main column // -->ContentsEnd: <!-- end of main column // -->StoryStart: <!-- main column // -->StoryEnd: <!-- end of main column // -->
paulgraham.site:
URL: Name: Paul Graham Levels: 2 StoryURL: http://paulgraham.com/.*.html ContentsDiff: 1
pcmag_firstlooks.site:
URL: http://www.pcmag.com/category2/0,1738,21,00.aspName: PCMag-1stLooksDescription: PCMagazine First LooksAuthorName: Goh Boon Nam# PC Magazine's First Looks Section with Images# Version 1.0# Date updated : 14 Jun 2005Levels: 2ContentsStart: BEGIN MAIN TABLEContentsEnd: <!-- include file="display_homepage_line_break.asp" -->StoryURL: http://www.pcmag.com/article2/.*StoryURL: http://www.pcmag.com/slideshow/.*StoryStart: BEGIN MAIN TABLEStoryEnd: (OpenSaveArticleWindow|<div class="slideshow_caption">)ImageURL: http://common.ziffdavisinternet.com/util_get_image/\d+.*ImageURL: http://www.pcmag.com/images/(.*?)dot.gifContentsHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/<div id="oc_header">(.*?)<\/div>//gis; s/<div id="online_classified">(.*?)<\/div>//gis; s/<div class="article_price_container">(.*?)<\/div>//gis; s/<table width="100%" cellpadding="0" cellspacing="0" border="0" class="Article_Header_Table">(.*?)<\/table>//gis; s/<table width="100%" cellpadding="0" cellspacing="0" border="0" class="PG_Header_Table">(.*?)<\/table>//gis; s/<br\/>//gim; s/<div id="Premium_Partners_Container">(.*?)<\/div>//gis; s/>All Shots<\/a>/>next ><\/a>/gim; s/jpg"\/>/jpg">/gim; s/gif"\/>/gif">/gim; }StoryHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/<div id="oc_header">(.*?)<\/div>//gis; s/<div id="online_classified">(.*?)<\/div>//gis; s/<div class="article_price_container">(.*?)<\/div>//gis; s/<table width="100%" cellpadding="0" cellspacing="0" border="0" class="Article_Header_Table">(.*?)<\/table>//gis; s/<table width="100%" cellpadding="0" cellspacing="0" border="0" class="PG_Header_Table">(.*?)<\/table>//gis; s/<br\/>//gim; s/<div id="Premium_Partners_Container">(.*?)<\/div>//gis; s/>All Shots<\/a>/>next ><\/a>/gim; s/jpg"\/>/jpg">/gim; s/gif"\/>/gif">/gim; }# Notes# -----# s/>All Shots<\/a>/>next ><\/a>/gim; -> trick sitescooper to follow link to All Shots page# s/jpg"\/>/jpg">/gim; -> clean up malformed img tag - eg. in All Shots page# s/gif"\/>/gif">/gim; -> clean up malformed img tag - eg. in All Shots page
tvguide.site:
URL: Name: TVGENLevels: 2ContentsPrint: 1StoryURL: http://.*ImageURL: http://.*## This site was converted from an AvantGo .subs file by subs-to-site.pl.# See http://sitescooper.org/ for more information on sitescooper.
freshmeat_articles.site:
URL: Name: Freshmeat ArticlesDescription: editorials and articles from Freshmeat, UNIX software listings siteLevels: 2AuthorName: jmContentsStart: -- Content --ContentsEnd: -- End of content --StoryURL: http://freshmeat.net/articles/view/\d+/StoryStart: -- Content --StoryEnd: -- End of content --
rootprompt.site:
URL: Name: RootPrompt.org Description: Nothing but Unix Levels: 2 ContentsFormat: rss StoryStart: <table width=100% VALIGN=top cellpading=0 cellspacing=5 border=0><tr width=100% NOSAVE> StoryEnd: <a href=submit.php3>Suggest an article or news story</a> StoryURL: http://rootprompt.org/article.php3\?article=\d+
samba_traffic.site:
# thanks to Lim Swee Tat <st_lim@3ui.com>URL: Name: Kernel Traffic - Samba Levels: 1 StoryStart: Table Of Contents StoryEnd: </html>
wine_traffic.site:
URL: Name: KC - Wine Levels: 1 StoryStart: Table Of Contents StoryEnd: </HTML>
iwin.site:
URL: Name: IWIN WeatherLevels: 3IssueURL: http://iwin2.nws.noaa.gov/iwin/.*IssuePrint: 1ContentsURL: http://iwin2.nws.noaa.gov/iwin/.*ContentsPrint: 1
nrcc_northeast_forecast.site:
URL: Name: NRCC Forecasts for Northeastern USLevels: 3ImageURL: http://.*ContentsURL: ContentsPrint: 1StoryURL: http://met-www.cit.cornell.edu/cgi-bin.*
wu_new_mexico.site:
# Current weather reports# To use (in the US), replace ``88005'' with your Zip code# Site file by Joe Pfeiffer, pfeiffer /at/ cs.nmsu.edu# Changed to use a layout by jm /at/ jmason.org#URL: http://www.wunderground.com/cgi-bin/findweather/getForecast?query=88005Name: Weather - New Mexico
wu_redmond.site:
# Current weather reports# To use (in the US), adjust zip code.# Site file by Joe Pfeiffer, pfeiffer /at/ cs.nmsu.edu# Changed to use a layout by jm /at/ jmason.org## Robb Canfield <robb@canfield.com># * Switched to print... URL, it parses better# * Use my new Table reformaterURL: http://printer.wunderground.com/cgi-bin/findweather/getForecast?query=98059StoryUseTableSmarts: 0Name: Weather - RedmondTableRender: list
alertbox.site:
URL: Name: Alertbox Levels: 2 ContentsStart: Current Column ContentsEnd: Previous Columns StoryURL: http://www.useit.com/alertbox/\d+\.html
jon_udell.site:
URL: Name: Jon Udells ArticlesDescription: Articles by Jon Udell, Byte.com columnistLevels: 2ContentsStart: <b>Tuning in to Jon's channel</b>ContentsEnd: This channel rendering courtesy ofStoryURL: http://www.byte.com/column/BYT\S+StoryURL: http://www.byte.com/printableArticle\?doc_id=BYT\S+StoryToPrintableSub: s,/column/,/printableArticle?doc_id=,StoryStart: <font size="5">StoryEnd: </HTML>
mappa_mundi.site:
URL: Name: Mappa.Mundi Description: Revealing Invisible Worlds Levels: 2 # jm: man, this site has an excellent site map! Cheers guys ContentsStart: -- CURRENT ISSUE -- ContentsEnd: ====== FOOTER FOLLOWS ====== StoryURL: /(inform|locus|visions|reviews|maps|about)/.* StoryStart: ====== LEFT HAND NAVIGATION FOLLOWS ==== StoryEnd: <A HREF="contact/">contact</A> TableRender: flatten
mozillazine.site:
URL: Name: MozillaZineDescription: Your source for Mozilla news, advocacy, interviews, builds, and more!ContentsFormat: rssStoryURL: /talkback\.html\?article=\d+# You may also want to add a StoryStart and StoryEnd line to# clean up the stories. Here's sample lines (you need to edit them):#StoryStart: --features--StoryEnd: form method="post" action# (This is a sitescooper site file. see # It was generated from the site's RSS by rss-to-site.pl 1.0.)
researchbuzz.site:
URL: Name: ResearchBuzzDescription: covering the world of Internet researchStoryDiff: 1StoryStart: News: This WeekStoryEnd: <SPAN class=body4>All original material on
searchenginereport.site:
URL: Name: Search Engine ReportDescription: Danny Sullivan's monthly newsletter covering search engine developmentsAuthorName: jmLevels: 2StoryURL: http://searchenginewatch.com/sereport/\d+/\d+.*\.htmlContentsEnd: <form name="myForm">StoryEnd: <form name="myForm">
bifurcated_rivets.site:
# Bifurcated Rivets by Lindsay Marshall#URL: Name: Bifurcated Rivets StoryStart: <!--@@H1--> StoryEnd: <!--@@F1--> StoryDiff: 1
boingboing.site:
URL: Name: Boing BoingDescription: Mark Frauenfelder's directory of wonderful thingsStoryEnd: -- BEGIN SPYONIT.COM SPYMAKER --StoryDiff: 1
camworld.site:
URL: Name: CamWorldDescription: Random Thoughts, New Media, Web DesignLevels: 1StoryStart: Search CamWorld for:StoryEnd: Go to the entries for
crummy.site:
URL: Name: CrummyDescription: weblog by Leonard RichardsonLevels: 1StoryStart: <td halign=left width=50%>StoryEnd: <h4>Catch up on previously Bruised NewsStoryDiff: 1# I really need to get into the habit of doing this.AuthorName: jm@jmason.orgAuthorEMail: jm@jmason.org
doc_searls.site:
URL: Name: Doc Searls Weblog Description: Doc Searls' weblog Levels: 1 StoryStart: -- End Image Map -- StoryEnd: <table class="hCalendarTable" cellspacing="0" border="0"> StoryDiff: 1 TableRender: flatten
eckes.site:
# From: Mela Eckenfels <mela AT darkover.inka.de>URL: http://www.eckes.org/modules.php?name=AvantGoName: Eckes.org - Opinions of some GeeksDescription: Thoughts about Tech, GeekStuff and the unsettling RealLife.Levels: 2StoryURL: /.*sid\=.*StoryDiff: 1ContentsPrint: 1ContentsDiff: 1
ethel_the_blog.site:
URL: http://stommel.tamu.edu/~baum/ethel/blogger.htmlName: Ethel The BlogDescription: Observations on science, computers, books, music and other shiny things that catch my mind's eye.Levels: 1StoryStart: create a gutter between the left margin and page contentStoryEnd: <FONT SIZE=4>\s+LEISURE\s+</FONT>StoryDiff: 1# argh, big fonts! Nein danke.StoryPostProcess: { s/<FONT size=\"?4\"?>/<font size=3>/gs;}
flutterby.site:
URL: Name: Flutterby Levels: 1 StoryEnd: Connectivity provided by StoryDiff: 1
genehack.site:
URL: Name: GeneHack Levels: 1 StoryStart: /universal header StoryEnd: Unless otherwise noted, all rights reserved. StoryDiff: 1
hack_the_planet.site:
URL: Name: Hack The Planet Levels: 1 StoryStart: <a href="crit.org/http://crit.org/pub/cs.utexas.edu/wesf/">CritLink</a><br> StoryEnd: <td width="20"> </td> StoryDiff: 1
honeyguide.site:
URL: Name: Honeyguide Description: good science-oriented weblog Levels: 1 StoryStart: <p class=archmenu> StoryEnd: Copyright \d+-\d+ StoryDiff: 1
jason_pettus.site:
# site_samples/weblog/jason_pettus.site## Jason Pettus, Chicago USAURL: Name: Jason PettusLevels: 1AuthorName: Jan Lund ThomsenAuthorEmail: kwed@kwed.orgStoryStart: <!-- Content start -->StoryEnd: <!-- Content end -->
memepool.site:
URL: Name: Memepool Levels: 1 StoryDiff: 1 UseTableSmarts: 0
monkeyfist.site:
URL: Name: Monkeyfist Description: The Monkeyfist Collective AuthorName: Justin Mason AuthorEMail: jm /at/ jmason.org Levels: 1 StoryDiff: 1
mydog.site:
# contributed by michael d. ivey <ivey /at/ gweezlebur.com>#URL: http://gweezlebur.com/~ivey/weblog/Name: my dog wants to be on the radioDescription: michael d. ivey's weblogLevels: 1StoryStart: Begin Daily SectionStoryEnd: <a href="~ivey/index.shtml">home</a>
ntk.site:
URL: Name: NTKnow StoryStart: <table StoryEnd: </table>
peterme.site:
# peterme.com -- "Providing meme therapy for your troubled thoughts"#URL: Name: PeterMe StoryDiff: 1
rathergood.site:
URL: Name: rathergood.com Description: The Crab of Eternal Wisdom ponders the nature of existence AuthorName: Justin Mason AuthorEMail: jm /at/ jmason.org Levels: 1 StoryDiff: 1
rc3.site:
URL: Name: RC3 Levels: 1 StoryStart: <body StoryEnd: This site designed by Rafe Colburn. Copyright StoryDiff: 1
riverbend.site:
# Baghdad Burning: Riverbend's BlogURL: Name: RiverbendDescription: Riverbend: Girl Blog from IraqLevels: 1StoryStart: id="Title">Baghdad BurningStoryDiff: 1
robot_wisdom.site:
URL: Name: Robot Wisdom Levels: 1 StoryStart: <b>Headlines:</b> StoryEnd: <b>WebLog Archives</b> StoryDiff: 1
scripting_news.site:
# Dave Winer's Scripting NewsURL: Name: Scripting News Levels: 1 # this is inconvenient; there's no easy way to find start of text anymore. StoryStart: <table width="400" cellspacing="0" cellpadding="3" border="0"> StoryEnd: <b>Last update</b>: StoryDiff: 1
tim_oreilly.site:
URL: Name: Tim O'Reilly's WeblogLevels: 1StoryStart: -- *content here *--StoryEnd: -- *sponsor column *--StoryDiff: 1
tomalaks_realm.site:
# Tomalak's RealmURL: Name: Tomalaks Realm Levels: 1 StoryStart: <html> StoryEnd: </html> StoryDiff: 1
where_is_raed.site:
# Where is Raed?URL: Name: WhereIsRaedDescription: Where Is RaedLevels: 1StoryStart: blogger code # ContentsDiff doesn't seem to work here, for some reason.ContentsDiff: 1# Salam puts everything inside blockquote, which makes it come out# in a tiny central column on the palm.ContentsHTMLPreProcess: { s/<blockquote>//gim; s/<\/blockquote>//gim;}
kevin_sites.site:
##This file captures images and text from the site. The pictures flesh out a lot of the #stories but the writing is good enough that they aren't really needed. I sync to a card#so file size isn't important. Comment out the last two lines for smaller files.#URL: Name: Iraq War Blog - K. SitesDescription: Kevin Sites' War Blog - IraqAuthorName: Delmer Wells <delmer at delmer dot com> #Updated 11-4-04Levels: 1StoryDiff: 1ImageURL: http://www.kevinsites.net/images/.*ImageScaleToMaxWidth: 500
(Scooped by sitescooper . Go back to the sitescooper page)