Thomas Krichel writes
no evidence I have available that they would not obey robots.txt.
Emminently reasonable, Thomas! I have written a script for you my dear: | #!/usr/bin/python | | import datetime | | fufi = '/var/log/nginx/access.log' | | now = datetime.datetime.now() | now_month = int(datetime.datetime.now().strftime('%m')) | now_year = int(datetime.datetime.now().strftime('%Y')) | | the_file = open(fufi, 'r') | line = the_file.readline() | count = 0 | with open(fufi, 'r') as file: | for line in file: | if 'PetalBot' not in line: | continue | start = line.partition('[')[2] | time = start.partition(']')[0] | day = int(time[0:2]) | hour = int(time[12:14]) | minute = int(time[15:17]) | sec = int(time[18:20]) | time = datetime.datetime(now_year, now_month, day, hour, minute, sec) | diff = str(now - time) | ## just look at the last hour | if not diff.startswith('0:'): | continue | count += 1 | average = 3600 / count | print('one hit every ' + str(int(average)) + ' seconds') This shows a request every 15 seconds. robots.txt says limit is 5. -- Written by Thomas Krichel http://openlib.org/home/krichel on his 21162nd day.