Crawl-delay: 30 # taskId.16820075 User-agent: * Disallow: /modules/pdfgen/pdfmaker/ # Block All trafic for certain user agents User-agent: Mozilla/5.0 (compatible; Seznam screenshot-generator 2.1; +http://fulltext.sblog.cz/screenshot/) User-agent: AddThis.com robot tech.support@clearspring.com User-agent: MSRBOT (http://research.microsoft.com/research/sv/msrbot/) User-agent: MSRBOT # Microsoft Research Bot User-agent: SandCrawler # Unknown Microsoft crawler User-agent: Shim-Crawler # Japanese university research User-agent: ScoutJet # Not relevant yet # AhrefsBot User-agent: Mozilla/5.0 (compatible; AhrefsBot/5.0; +http://ahrefs.com/robot/) #Link tracker # User-agent: Mozilla/5.0 (compatible; DotBot/1.1; http://www.dotnetdotcom.org/, crawler@dotnetdotcom.org) User-agent: DotBot/1.0.1 (http://www.dotnetdotcom.org/#info, crawler@dotnetdotcom.org)DotBot # Link tracker User-agent: Mozilla/5.0 (compatible; DotBot/1.1; http://www.opensiteexplorer.org/dotbot, help@moz.com) # Find Links User-agent: findlinks/2.0.1 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1.6-beta6 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1.6-beta4 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1.6-beta1 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1.5-beta7 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1.4-beta1 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1.3-beta9 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1.3-beta8 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1.3-beta6 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1.3-beta4 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1.3-beta2 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1.3-beta1 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1.2-a5 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1.1-a5 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1.1-a1 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1.1 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1-a9 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1-a8 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1-a8 ( http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1-a7 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1-a5 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1-a4 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1-a3 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.1 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.06 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.0.9 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.0.8 (+http://wortschatz.uni-leipzig.de/findlinks/) User-agent: findlinks/1.0 (+http://wortschatz.uni-leipzig.de/findlinks/) # User-agent: Mozilla/5.0 (compatible; heritrix/1.14.3 +http://www.searchtechnologies.com) User-agent: SputnikBot User-agent: 008 User-agent: Accoona User-agent: AddSugarSpiderBot User-agent: AnyApexBot User-agent: BecomeBot #Consumer shopping User-agent: BeslistBot # Dutch consumer shopping User-agent: BillyBobBot # Dead site User-agent: Bimbot # Hiding identity User-agent: Boitho # Not relevant User-agent: btbot # Torrent search User-agent: CatchBot # Dead Australian search User-agent: cosmos # not relevant User-agent: Covario User-agent: Cerberian # not relevant User-agent: DiamondBot User-agent: Discobot User-agent: EmeraldShield.com User-agent: envolk[ITS]spider User-agent: EsperanzaBot User-agent: Exabot User-agent: FAST Enterprise Crawler User-agent: FAST-WebCrawler User-agent: FDSE robot User-agent: FurlBot User-agent: FyberSpider User-agent: g2crawler User-agent: Gaisbot User-agent: GalaxyBot User-agent: genieBot User-agent: Gigabot User-agent: Girafabot User-agent: HappyFunBot User-agent: Holmes User-agent: htdig User-agent: iaskspider # unknown Chinese spider User-agent: ia_archiver # Alexa crawler User-agent: IRLbot User-agent: IssueCrawler User-agent: Jaxified Bot User-agent: Jyxobot User-agent: KoepaBot User-agent: L.webis User-agent: LapozzBot User-agent: Larbin User-agent: LDSpider User-agent: LDSpider User-agent: Linguee Bot User-agent: LinkWalker User-agent: lmspider User-agent: lwp-trivial User-agent: mabontland User-agent: magpie-crawler User-agent: MJ12bot User-agent: mogimogi User-agent: MojeekBot User-agent: Moreoverbot User-agent: Morning Paper User-agent: MVAClient User-agent: mxbot User-agent: NetResearchServer User-agent: NetSeer Crawler User-agent: NewsGator User-agent: NG-Search User-agent: nicebot User-agent: Nusearch User-agent: NutchCVS User-agent: Nymesis User-agent: obot User-agent: oegp User-agent: omgilibot User-agent: OmniExplorer_Bot User-agent: OOZBOT User-agent: Orbiter User-agent: PageBitesHyperBot User-agent: Peew User-agent: polybot User-agent: PostPost User-agent: Psbot - Image search User-agent: PycURL User-agent: Qseero User-agent: Radian6 User-agent: RAMPyBot User-agent: RufusBot User-agent: SBIder User-agent: Scrubby User-agent: SearchSight User-agent: Seekbot User-agent: semanticdiscovery User-agent: Sensis Web Crawler User-agent: SEOChat::Bot User-agent: ShopWiki User-agent: Shoula robot User-agent: silk User-agent: Sitebot User-agent: Snappy User-agent: Speedy Spider User-agent: Sqworm User-agent: suggybot User-agent: SurveyBot User-agent: TerrawizBot User-agent: TheSuBot # TinEye User-agent: TinEye/1.1 (http://tineye.com/crawler.html) User-agent: TinEye # User-agent: truwoGPS User-agent: TurnitinBot User-agent: TweetedTimes Bot User-agent: TwengaBot User-agent: updated User-agent: Urlfilebot User-agent: Vagabondo User-agent: Vortex User-agent: voyager User-agent: VYU2 User-agent: webcollage User-agent: Websquash.com User-agent: wf84 User-agent: WomlpeFactory User-agent: Xaldon_WebSpider User-agent: yacy User-agent: Yasaklibot User-agent: yoogliFetchAgent User-agent: Zao/0.1 (http://www.kototoi.org/zao/) User-agent: Mozilla/4.0 (compatible; Zealbot 1.0) User-agent: zspider/0.9-dev http://feedback.redkolibri.com/ # Zyborg User-agent: Mozilla/4.0 compatible ZyBorg/1.0 DLC (wn.zyborg@looksmart.net; http://www.WISEnutbot.com) User-agent: Mozilla/4.0 compatible ZyBorg/1.0 Dead Link Checker (wn.zyborg@looksmart.net; http://www.WISEnutbot.com) User-agent: Mozilla/4.0 compatible ZyBorg/1.0 Dead Link Checker (wn.dlc@looksmart.net; http://www.WISEnutbot.com) User-agent: Mozilla/4.0 compatible ZyBorg/1.0 (wn.zyborg@looksmart.net; http://www.WISEnutbot.com) User-agent: Mozilla/4.0 compatible ZyBorg/1.0 (wn-16.zyborg@looksmart.net; http://www.WISEnutbot.com) User-agent: Mozilla/4.0 compatible ZyBorg/1.0 (wn-14.zyborg@looksmart.net; http://www.WISEnutbot.com) # Disallow: / # ----------------------------------- # Chinese Spider - Block all languages other than Chinese and English User-agent: Baiduspider+(+http://www.baidu.com/search/spider_jp.html) User-agent: Baiduspider+(+http://www.baidu.com/search/spider.htm) User-agent: BaiDuSpider User-agent: Sosospider+(+http://help.soso.com/webspider.htm) User-agent: sogou spider User-agent: Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07) User-agent: Mozilla/5.0 (compatible; EasouSpider; +http://www.easou.com/search/spider.html) User-agent: Mozilla/5.0 (compatible; YodaoBot/1.0; http://www.yodao.com/help/webmaster/spider/; ) User-agent: Mozilla/5.0 (compatible; YodaoBot/1.0; http://www.yodao.com/help/webmaster/spider/; ) Disallow: /ja/ Disallow: /fr/ Disallow: /de/ Disallow: /it/ Disallow: /cs/ Disallow: /hu/ Disallow: /es/ Disallow: /ru/ Disallow: /pl/ Disallow: /pt/ Disallow: /ko/ Disallow: /vi/ Disallow: /th/ # Russian Spiders # ----------------------------------- # Russian Spiders - Block all languages other than Russian and English User-agent: Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots) User-agent: igdeSpyder (compatible; igde.ru; +http://igde.ru/doc/tech.html) User-agent: StackRambler/2.0 (MSIE incompatible User-agent: StackRambler/2.0 Disallow: / # -------------------------------------- # Japanese Spiders - Block all languages other than Japanese and English User-agent: Mozilla/4.0 (compatible; Arachmo) User-agent: ichiro/4.0 (http://help.goo.ne.jp/door/crawler.html) User-agent: ichiro/3.0 (http://help.goo.ne.jp/door/crawler.html) User-agent: ichiro/2.0+(http://help.goo.ne.jp/door/crawler.html) User-agent: ichiro/2.0 (ichiro@nttr.co.jp) User-agent: ichiro/2.0 (http://help.goo.ne.jp/door/crawler.html) Disallow: /fr/ Disallow: /zh/ Disallow: /de/ Disallow: /it/ Disallow: /cs/ Disallow: /hu/ Disallow: /es/ Disallow: /ru/ Disallow: /pl/ Disallow: /pt/ Disallow: /ko/ Disallow: /vi/ Disallow: /th/ # ---------------------------------------- # German Spiders - Block all langauges other than German and English User-agent: ABACHOBot User-agent: Mozilla/4.0 (compatible; B-l-i-t-z-B-O-T) User-agent: iCCrawler (http://www.iccenter.net/bot.htm)# German job listing site User-agent: SynooBot/0.7.1 (SynooBot; http://www.synoo.de/bot.html; webmaster@synoo.com) User-agent: fastbot.de crawler 2.0 beta (http://www.fastbot.de) Disallow: /ja/ Disallow: /fr/ Disallow: /zh/ Disallow: /it/ Disallow: /cs/ Disallow: /hu/ Disallow: /es/ Disallow: /ru/ Disallow: /pl/ Disallow: /pt/ Disallow: /ko/ Disallow: /vi/ Disallow: /th/ # --------------------------------- # French Spiders - Block all languages other than French and English #Block downloads for all user agents User-agent: Pompos/1.3 http://dir.com/pompos.html User-agent: Pompos/1.2 http://pompos.iliad.fr User-agent: Pompos/1.1 http://pompos.iliad.fr User-agent: Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) VoilaBot BETA 1.2 (http://www.voila.com/) Disallow: /ja/ Disallow: /zh/ Disallow: /de/ Disallow: /it/ Disallow: /cs/ Disallow: /hu/ Disallow: /es/ Disallow: /ru/ Disallow: /pl/ Disallow: /pt/ Disallow: /ko/ Disallow: /vi/ Disallow: /th/ # ---------------------------------------- # Czech Langauge - Block all languages other than Czech and English User-agent: SeznamBot/2.0 (+http://fulltext.seznam.cz/) User-agent: SeznamBot/2.0 (+http://fulltext.sblog.cz/robot/) Disallow: /ja/ Disallow: /fr/ Disallow: /zh/ Disallow: /de/ Disallow: /it/ Disallow: /hu/ Disallow: /es/ Disallow: /ru/ Disallow: /pl/ Disallow: /pt/ Disallow: /ko/ Disallow: /vi/ Disallow: /th/ # ---------------------------------------------- # Korean Language - Block all languages other than Korean and English # Block downloads for all user agents User-agent: Yeti/1.0 (NHN Corp.; http://help.naver.com/robots/) User-agent: Yeti/1.0 (+http://help.naver.com/robots/) User-agent: Mozilla/5.0 (compatible; MSIE or Firefox mutant; not on Windows server; + http://tab.search.daum.net/aboutWebSearch.html) Daumoa/3.0 Disallow: /ja/ Disallow: /fr/ Disallow: /zh/ Disallow: /de/ Disallow: /it/ Disallow: /cs/ Disallow: /hu/ Disallow: /es/ Disallow: /ru/ Disallow: /pl/ Disallow: /pt/ Disallow: /vi/ Disallow: /th/ # All User Agents # ------------------------------- User-agent: * # # Slow them down Crawl-delay: 15 # # Disallow: /*/? # google supports wildcards -- this causes problem with adwords Disallow: /*/?logout # # Disallow quote request page for all Disallow: /*/contact-us/ # google supports wildcards Disallow: /en/contact-us/ Disallow: /ja/contact-us/ Disallow: /fr/contact-us/ Disallow: /zh/contact-us/ Disallow: /de/contact-us/ Disallow: /it/contact-us/ Disallow: /cs/contact-us/ Disallow: /hu/contact-us/ Disallow: /es/contact-us/ Disallow: /ru/contact-us/ Disallow: /pl/contact-us/ Disallow: /pt/contact-us/ Disallow: /ko/contact-us/ Disallow: /vi/contact-us/ Disallow: /th/contact-us/ # # # Disallow contact us page for all Disallow: /*/contact-us # google supports wildcards Disallow: /en/products/contact-us/ Disallow: /ja/products/contact-us/ Disallow: /fr/products/contact-us/ Disallow: /zh/products/contact-us/ Disallow: /de/products/contact-us/ Disallow: /it/products/contact-us/ Disallow: /cs/products/contact-us/ Disallow: /hu/products/contact-us/ Disallow: /es/products/contact-us/ Disallow: /ru/products/contact-us/ Disallow: /pl/products/contact-us/ Disallow: /pt/products/contact-us/ Disallow: /ko/products/contact-us/ Disallow: /vi/products/contact-us/ Disallow: /th/products/contact-us/ # Disallow quote request page for all Disallow: /*/quote-request/ # google supports wildcards Disallow: /en/quote-request/ Disallow: /ja/quote-request/ Disallow: /fr/quote-request/ Disallow: /zh/quote-request/ Disallow: /de/quote-request/ Disallow: /it/quote-request/ Disallow: /cs/quote-request/ Disallow: /hu/quote-request/ Disallow: /es/quote-request/ Disallow: /ru/quote-request/ Disallow: /pl/quote-request/ Disallow: /pt/quote-request/ Disallow: /ko/quote-request/ Disallow: /vi/quote-request/ Disallow: /th/quote-request/ # Disallow demo request page for all Disallow: /*/request-a-demo/ # google supports wildcards Disallow: /en/request-a-demo/ Disallow: /ja/request-a-demo/ Disallow: /fr/request-a-demo/ Disallow: /zh/request-a-demo/ Disallow: /de/request-a-demo/ Disallow: /it/request-a-demo/ Disallow: /cs/request-a-demo/ Disallow: /hu/request-a-demo/ Disallow: /es/request-a-demo/ Disallow: /ru/request-a-demo/ Disallow: /pl/request-a-demo/ Disallow: /pt/request-a-demo/ Disallow: /ko/request-a-demo/ Disallow: /vi/request-a-demo/ Disallow: /th/request-a-demo/ # Disallow demo request page for all Disallow: /*/bookmarks/ # google supports wildcards Disallow: /en/bookmarks/ Disallow: /ja/bookmarks/ Disallow: /fr/bookmarks/ Disallow: /zh/bookmarks/ Disallow: /de/bookmarks/ Disallow: /it/bookmarks/ Disallow: /cs/bookmarks/ Disallow: /hu/bookmarks/ Disallow: /es/bookmarks/ Disallow: /ru/bookmarks/ Disallow: /pl/bookmarks/ Disallow: /pt/bookmarks/ Disallow: /ko/bookmarks/ Disallow: /vi/bookmarks/ Disallow: /th/bookmarks/ # Disallow demo request page for all Disallow: /*/subscribe-newsletter/ # google supports wildcards Disallow: /en/subscribe-newsletter/ Disallow: /ja/subscribe-newsletter/ Disallow: /fr/subscribe-newsletter/ Disallow: /zh/subscribe-newsletter/ Disallow: /de/subscribe-newsletter/ Disallow: /it/subscribe-newsletter/ Disallow: /cs/subscribe-newsletter/ Disallow: /hu/subscribe-newsletter/ Disallow: /es/subscribe-newsletter/ Disallow: /ru/subscribe-newsletter/ Disallow: /pl/subscribe-newsletter/ Disallow: /pt/subscribe-newsletter/ Disallow: /ko/subscribe-newsletter/ Disallow: /vi/subscribe-newsletter/ Disallow: /th/subscribe-newsletter/ Disallow: /en/404/ Disallow: /ja/404/ Disallow: /fr/404/ Disallow: /zh/404/ Disallow: /de/404/ Disallow: /it/404/ Disallow: /cs/404/ Disallow: /hu/404/ Disallow: /es/404/ Disallow: /pl/404/ Disallow: /pt/404/ Disallow: /ko/404/ Disallow: /th/404/ Disallow: /vi/404/ Disallow: */?logout Crawl-delay: 10 Disallow: */.downloads/download/* # do not crawl files itself