#--------------------------------------------------------- # path : /web/docs/canoe/robots.txt # # specification : # include *.rss.canoe.ca/ # exclude: http://canoe.ca/ (only the homepage, not the directory) # exclude: http://www.canoe.ca/Canoe/SunMedia/home.html (only the homepage, not the directory) # exclude: http://www.canoe.ca/OntQueTicker/ (only that page, not the directory) # exclude: http://www.canoe.ca/WesternTicker/ (only that page, not the directory) # exclude: http://www.canoe.ca/AtlanticTicker/ (only that page, not the directory) # exclude: http://www.canoe.ca/CanoeMail/ (the entire directory, except the homepage) # exclude: http://www.canoe.ca/cgi-bin/mb2/view.pl?board=slam (only that page, not the directory) # exclude: http://www.canoe.ca/cgi-bin/reg/NR-register.pl?MODE=SUBSCRIBE&LOOK=HOME_GARDEN&PRODUCT=44 (only that page, not the directory) # This file had pre-existing values which have remained in place as they were found. #--------------------------------------------------------- # Domain: http://www.canoe.ca/ # Creation: 11/28/2007 # Update: 12/14/2009 User-agent: * Disallow: /event.ng/ Disallow: /html.ng/ Disallow: /js.ng/ Disallow: /click.ng/ Disallow: /image.ng/ Disallow: /ping.ng/ User-agent: Yahoo! DE Slurp # Disallow: /$ ## 2 below commented out by ptrin, oct. 16 2008 #Disallow: /index.php #Disallow: /home.html Disallow: /home.php Disallow: /CanoeMail/ Disallow: /Canoe/SunMedia/home.html Disallow: /OntQueTicker/home.html Disallow: /WesternTicker/home.html Disallow: /AtlanticTicker/home.html Disallow: /cgi-bin/mb2/view.pl?board=slam Disallow: /cgi-bin/reg/NR-register.pl?MODE=SUBSCRIBE&LOOK=HOME_GARDEN&PRODUCT=44 # 2009 Dec 11 # User-Agents to block # Note: match as substrings - full User-Agents will be longer, and will have # varying version numbers # Meltwater User-agent: libwww Disallow: / User-agent: libwww-perl Disallow: / User-agent: mag-crawl Disallow: / # Moreover User-agent: Moreoverbot Disallow: / # Vocus User-agent: Vocus Disallow: / # CustomScoop User-agent: ScooperBot Disallow: / # CyberAlert User-Agent: CyberAlert Disallow: / # CEDROM User-agent: CEDROM Disallow: / User-agent: Newscan Disallow: / User-agent: Eureka Disallow: / User-agent: Europresse Disallow: / # Radian6 User-agent: Radian6 Disallow: / User-agent: R6_FeedFetcher Disallow: / User-agent: R6_CommentReader Disallow: / #Extra Rogue Robots User-agent: sitecheck.internetseer.com Disallow: / User-agent: Zealbot Disallow: / User-agent: MSIECrawler Disallow: / User-agent: SiteSnagger Disallow: / User-agent: WebStripper Disallow: / User-agent: WebCopier Disallow: / User-agent: Fetch Disallow: / User-agent: Offline Explorer Disallow: / User-agent: Teleport Disallow: / User-agent: TeleportPro Disallow: / User-agent: WebZIP Disallow: / User-agent: linko Disallow: / User-agent: HTTrack Disallow: / User-agent: Microsoft.URL.Control Disallow: / User-agent: Xenu Disallow: / User-agent: larbin Disallow: / User-agent: libwww Disallow: / User-agent: ZyBORG Disallow: / User-agent: Download Ninja Disallow: / User-agent: wget Disallow: / User-agent: grub-client Disallow: / User-agent: k2spider Disallow: / User-agent: NPBot Disallow: / User-agent: WebReaper Disallow: / User-agent: ia_archiver Disallow: / User-agent: zeus Disallow: / # Sitemap: http://www.canoe.com/sitemap_canoe_index.xml # EOF