<?xml version="1.0"?>
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
  <channel>
    <title>CRIN Trac: Ticket #94: Ongoing high load caused by a bot</title>
    <link>https://trac.crin.org/trac/ticket/94</link>
    <description>&lt;p&gt;
Ticket to working out what to do with this issue...
&lt;/p&gt;
</description>
    <language>en-us</language>
    <image>
      <title>CRIN Trac</title>
      <url>https://trac.crin.org/trac/chrome/site/logo.gif</url>
      <link>https://trac.crin.org/trac/ticket/94</link>
    </image>
    <generator>Trac 1.0.2</generator>
    <item>
      
        <dc:creator>chris</dc:creator>

      <pubDate>Thu, 15 Dec 2016 19:28:13 GMT</pubDate>
      <title>attachment set</title>
      <link>https://trac.crin.org/trac/ticket/94</link>
      <guid isPermaLink="false">https://trac.crin.org/trac/ticket/94</guid>
      <description>
          &lt;ul&gt;
            &lt;li&gt;&lt;strong&gt;attachment&lt;/strong&gt;
                set to &lt;em&gt;memory-day.png&lt;/em&gt;
            &lt;/li&gt;
          &lt;/ul&gt;
      </description>
      <category>Ticket</category>
    </item><item>
      
        <dc:creator>chris</dc:creator>

      <pubDate>Thu, 15 Dec 2016 19:28:25 GMT</pubDate>
      <title>attachment set</title>
      <link>https://trac.crin.org/trac/ticket/94</link>
      <guid isPermaLink="false">https://trac.crin.org/trac/ticket/94</guid>
      <description>
          &lt;ul&gt;
            &lt;li&gt;&lt;strong&gt;attachment&lt;/strong&gt;
                set to &lt;em&gt;load-day.png&lt;/em&gt;
            &lt;/li&gt;
          &lt;/ul&gt;
      </description>
      <category>Ticket</category>
    </item><item>
      
        <dc:creator>chris</dc:creator>

      <pubDate>Thu, 15 Dec 2016 19:28:36 GMT</pubDate>
      <title>attachment set</title>
      <link>https://trac.crin.org/trac/ticket/94</link>
      <guid isPermaLink="false">https://trac.crin.org/trac/ticket/94</guid>
      <description>
          &lt;ul&gt;
            &lt;li&gt;&lt;strong&gt;attachment&lt;/strong&gt;
                set to &lt;em&gt;cpu-day.png&lt;/em&gt;
            &lt;/li&gt;
          &lt;/ul&gt;
      </description>
      <category>Ticket</category>
    </item><item>
      
        <dc:creator>chris</dc:creator>

      <pubDate>Thu, 15 Dec 2016 19:29:10 GMT</pubDate>
      <title>attachment set</title>
      <link>https://trac.crin.org/trac/ticket/94</link>
      <guid isPermaLink="false">https://trac.crin.org/trac/ticket/94</guid>
      <description>
          &lt;ul&gt;
            &lt;li&gt;&lt;strong&gt;attachment&lt;/strong&gt;
                set to &lt;em&gt;multips_memory-day.png&lt;/em&gt;
            &lt;/li&gt;
          &lt;/ul&gt;
      </description>
      <category>Ticket</category>
    </item><item>
      
        <dc:creator>chris</dc:creator>

      <pubDate>Thu, 15 Dec 2016 19:29:21 GMT</pubDate>
      <title>attachment set</title>
      <link>https://trac.crin.org/trac/ticket/94</link>
      <guid isPermaLink="false">https://trac.crin.org/trac/ticket/94</guid>
      <description>
          &lt;ul&gt;
            &lt;li&gt;&lt;strong&gt;attachment&lt;/strong&gt;
                set to &lt;em&gt;multips-day.png&lt;/em&gt;
            &lt;/li&gt;
          &lt;/ul&gt;
      </description>
      <category>Ticket</category>
    </item><item>
      
        <dc:creator>chris</dc:creator>

      <pubDate>Thu, 15 Dec 2016 19:29:33 GMT</pubDate>
      <title>attachment set</title>
      <link>https://trac.crin.org/trac/ticket/94</link>
      <guid isPermaLink="false">https://trac.crin.org/trac/ticket/94</guid>
      <description>
          &lt;ul&gt;
            &lt;li&gt;&lt;strong&gt;attachment&lt;/strong&gt;
                set to &lt;em&gt;phpfpm_status-day.png&lt;/em&gt;
            &lt;/li&gt;
          &lt;/ul&gt;
      </description>
      <category>Ticket</category>
    </item><item>
      
        <dc:creator>chris</dc:creator>

      <pubDate>Thu, 15 Dec 2016 19:29:45 GMT</pubDate>
      <title>attachment set</title>
      <link>https://trac.crin.org/trac/ticket/94</link>
      <guid isPermaLink="false">https://trac.crin.org/trac/ticket/94</guid>
      <description>
          &lt;ul&gt;
            &lt;li&gt;&lt;strong&gt;attachment&lt;/strong&gt;
                set to &lt;em&gt;phpfpm_memory-day.png&lt;/em&gt;
            &lt;/li&gt;
          &lt;/ul&gt;
      </description>
      <category>Ticket</category>
    </item><item>
      
        <dc:creator>chris</dc:creator>

      <pubDate>Thu, 15 Dec 2016 19:30:13 GMT</pubDate>
      <title>attachment set</title>
      <link>https://trac.crin.org/trac/ticket/94</link>
      <guid isPermaLink="false">https://trac.crin.org/trac/ticket/94</guid>
      <description>
          &lt;ul&gt;
            &lt;li&gt;&lt;strong&gt;attachment&lt;/strong&gt;
                set to &lt;em&gt;phpfpm_memory-day.2.png&lt;/em&gt;
            &lt;/li&gt;
          &lt;/ul&gt;
      </description>
      <category>Ticket</category>
    </item><item>
      
        <dc:creator>chris</dc:creator>

      <pubDate>Thu, 15 Dec 2016 19:30:54 GMT</pubDate>
      <title>attachment set</title>
      <link>https://trac.crin.org/trac/ticket/94</link>
      <guid isPermaLink="false">https://trac.crin.org/trac/ticket/94</guid>
      <description>
          &lt;ul&gt;
            &lt;li&gt;&lt;strong&gt;attachment&lt;/strong&gt;
                set to &lt;em&gt;nginx_status-day.png&lt;/em&gt;
            &lt;/li&gt;
          &lt;/ul&gt;
      </description>
      <category>Ticket</category>
    </item><item>
      
        <dc:creator>chris</dc:creator>

      <pubDate>Thu, 15 Dec 2016 19:31:42 GMT</pubDate>
      <title>attachment set</title>
      <link>https://trac.crin.org/trac/ticket/94</link>
      <guid isPermaLink="false">https://trac.crin.org/trac/ticket/94</guid>
      <description>
          &lt;ul&gt;
            &lt;li&gt;&lt;strong&gt;attachment&lt;/strong&gt;
                set to &lt;em&gt;http_loadtime-day.png&lt;/em&gt;
            &lt;/li&gt;
          &lt;/ul&gt;
      </description>
      <category>Ticket</category>
    </item><item>
      
        <dc:creator>chris</dc:creator>

      <pubDate>Thu, 15 Dec 2016 19:31:57 GMT</pubDate>
      <title>attachment set</title>
      <link>https://trac.crin.org/trac/ticket/94</link>
      <guid isPermaLink="false">https://trac.crin.org/trac/ticket/94</guid>
      <description>
          &lt;ul&gt;
            &lt;li&gt;&lt;strong&gt;attachment&lt;/strong&gt;
                set to &lt;em&gt;fw_packets-day.png&lt;/em&gt;
            &lt;/li&gt;
          &lt;/ul&gt;
      </description>
      <category>Ticket</category>
    </item><item>
      
        <dc:creator>chris</dc:creator>

      <pubDate>Thu, 15 Dec 2016 19:32:09 GMT</pubDate>
      <title>attachment set</title>
      <link>https://trac.crin.org/trac/ticket/94</link>
      <guid isPermaLink="false">https://trac.crin.org/trac/ticket/94</guid>
      <description>
          &lt;ul&gt;
            &lt;li&gt;&lt;strong&gt;attachment&lt;/strong&gt;
                set to &lt;em&gt;if_eth0-day.png&lt;/em&gt;
            &lt;/li&gt;
          &lt;/ul&gt;
      </description>
      <category>Ticket</category>
    </item><item>
      
        <dc:creator>chris</dc:creator>

      <pubDate>Thu, 15 Dec 2016 19:32:23 GMT</pubDate>
      <title>attachment set</title>
      <link>https://trac.crin.org/trac/ticket/94</link>
      <guid isPermaLink="false">https://trac.crin.org/trac/ticket/94</guid>
      <description>
          &lt;ul&gt;
            &lt;li&gt;&lt;strong&gt;attachment&lt;/strong&gt;
                set to &lt;em&gt;fw_conntrack-day.png&lt;/em&gt;
            &lt;/li&gt;
          &lt;/ul&gt;
      </description>
      <category>Ticket</category>
    </item><item>
      
        <dc:creator>chris</dc:creator>

      <pubDate>Thu, 15 Dec 2016 19:32:36 GMT</pubDate>
      <title>attachment set</title>
      <link>https://trac.crin.org/trac/ticket/94</link>
      <guid isPermaLink="false">https://trac.crin.org/trac/ticket/94</guid>
      <description>
          &lt;ul&gt;
            &lt;li&gt;&lt;strong&gt;attachment&lt;/strong&gt;
                set to &lt;em&gt;memcached_rates-day.png&lt;/em&gt;
            &lt;/li&gt;
          &lt;/ul&gt;
      </description>
      <category>Ticket</category>
    </item><item>
      
        <dc:creator>chris</dc:creator>

      <pubDate>Thu, 15 Dec 2016 19:45:01 GMT</pubDate>
      <title>hours changed; totalhours set</title>
      <link>https://trac.crin.org/trac/ticket/94#comment:1</link>
      <guid isPermaLink="false">https://trac.crin.org/trac/ticket/94#comment:1</guid>
      <description>
          &lt;ul&gt;
            &lt;li&gt;&lt;strong&gt;hours&lt;/strong&gt;
                changed from &lt;em&gt;0&lt;/em&gt; to &lt;em&gt;0.5&lt;/em&gt;
            &lt;/li&gt;
            &lt;li&gt;&lt;strong&gt;totalhours&lt;/strong&gt;
                set to &lt;em&gt;0.5&lt;/em&gt;
            &lt;/li&gt;
          &lt;/ul&gt;
        &lt;p&gt;
The PHP server, &lt;a class="wiki" href="https://trac.crin.org/trac/wiki/Crin2"&gt;Crin2&lt;/a&gt; is really suffering:
&lt;/p&gt;
&lt;pre class="wiki"&gt;top - 19:23:57 up 228 days, 6 min,  2 users,  load average: 7.24, 8.24, 8.42
Tasks: 181 total,  11 running, 170 sleeping,   0 stopped,   0 zombie
%Cpu(s):  7.3 us,  0.2 sy,  0.0 ni, 92.3 id,  0.0 wa,  0.0 hi,  0.0 si,  0.0 st
KiB Mem:   8195036 total,  7548024 used,   647012 free,    48340 buffers
KiB Swap:  5468156 total,   139116 used,  5329040 free.  1132416 cached Mem
  PID USER      PR  NI    VIRT    RES    SHR S  %CPU %MEM     TIME+ COMMAND
28407 www-data  20   0  622320 260800  43776 R 100.0  3.2  26:28.39 php5-fpm
28965 www-data  20   0  781076 422372  46692 R 100.0  5.2  25:13.99 php5-fpm
28408 www-data  20   0  503676 145892  46688 R  95.7  1.8  25:59.46 php5-fpm
28411 www-data  20   0  506228 145824  43980 R  95.7  1.8  25:10.23 php5-fpm
28415 www-data  20   0  528292 173916  52080 R  95.7  2.1  24:48.56 php5-fpm
28975 www-data  20   0  497376 135620  42848 R  95.7  1.7  24:58.28 php5-fpm
28976 www-data  20   0  644020 282436  43636 R  95.7  3.4  25:07.01 php5-fpm
28405 www-data  20   0  614152 253392  43880 R  89.3  3.1  26:19.49 php5-fpm
28966 www-data  20   0  470168 108520  42912 R  89.3  1.3  23:57.28 php5-fpm
28974 www-data  20   0  631000 270404  43256 R  89.3  3.3  24:34.78 php5-fpm
...
&lt;/pre&gt;&lt;p&gt;
Some graphs of the ongoing high load:
&lt;/p&gt;
&lt;p&gt;
&lt;a style="padding:0; border:none" href="https://trac.crin.org/trac/attachment/ticket/94/memory-day.png"&gt;&lt;img src="https://trac.crin.org/trac/raw-attachment/ticket/94/memory-day.png" /&gt;&lt;/a&gt;
&lt;a style="padding:0; border:none" href="https://trac.crin.org/trac/attachment/ticket/94/load-day.png"&gt;&lt;img src="https://trac.crin.org/trac/raw-attachment/ticket/94/load-day.png" /&gt;&lt;/a&gt;
&lt;a style="padding:0; border:none" href="https://trac.crin.org/trac/attachment/ticket/94/cpu-day.png"&gt;&lt;img src="https://trac.crin.org/trac/raw-attachment/ticket/94/cpu-day.png" /&gt;&lt;/a&gt;
&lt;a style="padding:0; border:none" href="https://trac.crin.org/trac/attachment/ticket/94/multips_memory-day.png"&gt;&lt;img src="https://trac.crin.org/trac/raw-attachment/ticket/94/multips_memory-day.png" /&gt;&lt;/a&gt;
&lt;a style="padding:0; border:none" href="https://trac.crin.org/trac/attachment/ticket/94/multips-day.png"&gt;&lt;img src="https://trac.crin.org/trac/raw-attachment/ticket/94/multips-day.png" /&gt;&lt;/a&gt;
&lt;a style="padding:0; border:none" href="https://trac.crin.org/trac/attachment/ticket/94/phpfpm_status-day.png"&gt;&lt;img src="https://trac.crin.org/trac/raw-attachment/ticket/94/phpfpm_status-day.png" /&gt;&lt;/a&gt;
&lt;a style="padding:0; border:none" href="https://trac.crin.org/trac/attachment/ticket/94/phpfpm_memory-day.png"&gt;&lt;img src="https://trac.crin.org/trac/raw-attachment/ticket/94/phpfpm_memory-day.png" /&gt;&lt;/a&gt;
&lt;a style="padding:0; border:none" href="https://trac.crin.org/trac/attachment/ticket/94/nginx_status-day.png"&gt;&lt;img src="https://trac.crin.org/trac/raw-attachment/ticket/94/nginx_status-day.png" /&gt;&lt;/a&gt;
&lt;a style="padding:0; border:none" href="https://trac.crin.org/trac/attachment/ticket/94/http_loadtime-day.png"&gt;&lt;img src="https://trac.crin.org/trac/raw-attachment/ticket/94/http_loadtime-day.png" /&gt;&lt;/a&gt;
&lt;a style="padding:0; border:none" href="https://trac.crin.org/trac/attachment/ticket/94/fw_packets-day.png"&gt;&lt;img src="https://trac.crin.org/trac/raw-attachment/ticket/94/fw_packets-day.png" /&gt;&lt;/a&gt;
&lt;a style="padding:0; border:none" href="https://trac.crin.org/trac/attachment/ticket/94/if_eth0-day.png"&gt;&lt;img src="https://trac.crin.org/trac/raw-attachment/ticket/94/if_eth0-day.png" /&gt;&lt;/a&gt;
&lt;a style="padding:0; border:none" href="https://trac.crin.org/trac/attachment/ticket/94/fw_conntrack-day.png"&gt;&lt;img src="https://trac.crin.org/trac/raw-attachment/ticket/94/fw_conntrack-day.png" /&gt;&lt;/a&gt;
&lt;a style="padding:0; border:none" href="https://trac.crin.org/trac/attachment/ticket/94/memcached_rates-day.png"&gt;&lt;img src="https://trac.crin.org/trac/raw-attachment/ticket/94/memcached_rates-day.png" /&gt;&lt;/a&gt;
&lt;/p&gt;
&lt;p&gt;
And the bot responsible:
&lt;/p&gt;
&lt;pre class="wiki"&gt;Mozilla/5.0 (compatible; SemrushBot/1.1~bl; +http://www.semrush.com/bot.html)
&lt;/pre&gt;&lt;p&gt;
The number of requests:
&lt;/p&gt;
&lt;pre class="wiki"&gt;grep "SemrushBot/" /var/log/nginx/crin.org.ssl_access.log | wc -l
15866
&lt;/pre&gt;&lt;p&gt;
And an example request:
&lt;/p&gt;
&lt;pre class="wiki"&gt;46.229.168.66 - - [15/Dec/2016:19:36:41 +0000] "GET /en/library/custom-search-legal?f%5B0%5D=field_date%3Avalue%3A%5B2008-01-01T00%3A00%3A00Z%20TO%202009-01-01T00%3A00%3A00Z%5D&amp;amp;f%5B1%5D=field_date%3Avalue%3A%5B2006-01-01T00%3A00%3A00Z%20TO%202007-01-01T00%3A00%3A00Z%5D&amp;amp;qt-countr-tabs=3 HTTP/1.1" 200 19601 "-" "Mozilla/5.0 (compatible; SemrushBot/1.1~bl; +http://www.semrush.com/bot.html)"
&lt;/pre&gt;&lt;p&gt;
So we could simply block this bot, or rate limit it, notes that the reason it is generating a high load is because it is requesting searches.
&lt;/p&gt;
&lt;p&gt;
At the moment we have this in &lt;tt&gt;/etc/nginx/nginx.conf&lt;/tt&gt;:
&lt;/p&gt;
&lt;pre class="wiki"&gt;limit_req_zone  $binary_remote_addr  zone=one:10m   rate=6r/s;
&lt;/pre&gt;&lt;p&gt;
And I can't see a simple way right now to add a lower rate for one IP address so using the ip tables script to block it:
&lt;/p&gt;
&lt;pre class="wiki"&gt;ipdrop 46.229.168.66
&lt;/pre&gt;&lt;p&gt;
And I'll check back later to see the results...
&lt;/p&gt;
      </description>
      <category>Ticket</category>
    </item><item>
      
        <dc:creator>chris</dc:creator>

      <pubDate>Thu, 15 Dec 2016 19:46:32 GMT</pubDate>
      <title></title>
      <link>https://trac.crin.org/trac/ticket/94#comment:2</link>
      <guid isPermaLink="false">https://trac.crin.org/trac/ticket/94#comment:2</guid>
      <description>
        &lt;p&gt;
It is using more than one IP:
&lt;/p&gt;
&lt;pre class="wiki"&gt;46.229.168.71 - - [15/Dec/2016:19:45:46 +0000] "GET /en/library/custom-search-legal?f%5B0%5D=field_date%3Avalue%3A%5B2013-01-01T00%3A00%3A00Z%20TO%202014-01-01T00%3A00%3A00Z%5D&amp;amp;f%5B1%5D=field_date%3Avalue%3A%5B1997-01-01T00%3A00%3A00Z%20TO%201998-01-01T00%3A00%3A00Z%5D&amp;amp;field_country=All&amp;amp;field_country_1=All&amp;amp;field_crc=All&amp;amp;field_instruments=All&amp;amp;field_monitoring_body=All&amp;amp;field_scope=All&amp;amp;field_themes=All&amp;amp;promo=1&amp;amp;search_api_language=current HTTP/1.1" 499 0 "-" "Mozilla/5.0 (compatible; SemrushBot/1.1~bl; +http://www.semrush.com/bot.html)"
&lt;/pre&gt;&lt;p&gt;
So:
&lt;/p&gt;
&lt;pre class="wiki"&gt;ipdrop 46.229.168.71
&lt;/pre&gt;
      </description>
      <category>Ticket</category>
    </item><item>
      
        <dc:creator>chris</dc:creator>

      <pubDate>Thu, 15 Dec 2016 19:48:49 GMT</pubDate>
      <title></title>
      <link>https://trac.crin.org/trac/ticket/94#comment:3</link>
      <guid isPermaLink="false">https://trac.crin.org/trac/ticket/94#comment:3</guid>
      <description>
        &lt;p&gt;
And a few more:
&lt;/p&gt;
&lt;pre class="wiki"&gt;ipdrop 46.229.168.67
ipdrop 46.229.168.72
ipdrop 46.229.168.73
ipdrop 46.229.168.69
ipdrop 46.229.168.70
ipdrop 46.229.168.74
ipdrop 46.229.168.65
ipdrop 46.229.168.68
&lt;/pre&gt;
      </description>
      <category>Ticket</category>
    </item>
 </channel>
</rss>