我們可以根據客戶端的 user-agents 首部字段來阻止指定的爬蟲爬取我們的網站:
虛擬主機配置如下:(紅色標記為添加或者修改內容)
[root@Nginx www_date]# cat brian.conf server { listen 80; server_name www.brian.com; if ($http_user_agent ~* "qihoobot|Baiduspider|Googlebot|Googlebot-Mobile|Googlebot-Image|Mediapartners-Google|Adsbot-Google|Yahoo! Slurp China|YoudaoBot|Sosospider|Sogou spider|Sogou web spider|MSNBot") { return 403; } location / { root html/brian; index index.html index.htm; #limit_conn addr 1; limit_conn perserver 2; auth_basic "brian training"; auth_basic_user_file /opt/nginx/conf/htpasswd; } location ~ .*\.(js|jpg|JPG|jpeg|JPEG|css|bmp|gif|GIF)$ { access_log off; } access_log logs/brian.log main gzip buffer=128k flush=5s; error_page 500 502 503 504 /50x.html; location = /50x.html { root html; } }