Sitemap: http://www.hark.com/sitemaps/sitemap_index.xml.gz User-agent: * Disallow: /a/ Disallow: /at/ Disallow: /cgi-bin/ Disallow: /doubleclick/ Disallow: /_vti_bin/ Disallow: /users/ Disallow: /admin/ Disallow: /edit/ Disallow: /svndirs/ Disallow: /clips/embed_code/ Disallow: /clips/send_to_friend/ Disallow: /clips/inline_like_it/ Disallow: /original.txt Disallow: /clips/versions/ Disallow: /clips.rss Disallow: /collections/embed_code/ Disallow: /collections/facebook/ Disallow: /collections/tweet/ Disallow: /collections/versions/ Disallow: /tshirts/ Disallow: *clipped=false Disallow: /*/download Disallow: /*/execute_download Disallow: /*?collection_id Disallow: /video Disallow: /alphabird Disallow: /*/homepage_embed* # Takedown Request # Amie Lowhorn Disallow: /users/351076 # WordPress Blog Disallow: /blog/wp-content/plugins/ Disallow: /blog/wp-includes/ Disallow: /blog/wp-admin/ # Use robots.txt to prevent crawling of search results pages or other auto-generated pages that don't add much value for users coming from search engines. # Block keywords params pages as well, as these are both search pages and returns soft-404s for empty searches. # http://www.google.com/support/webmasters/bin/answer.py?answer=35769 Disallow: /search Disallow: *keywords= # 100,000s of links are being generated by things that haven't been identified: # http://www.hark.com/0VMHgSm 5wr/bVH /6Uk2laiRbJycjkhzk2tczalPA= # http://www.hark.com/clips/tpjpjqjbwn-fiesta-beat?iframe=true&width=80%&height=80% Disallow: /*=$ Disallow: /*%$ # Don't download or cache json files or really old json files. Disallow: /*.json Disallow: /*.query