moved adjustment of PingTimeout to right place in PingScheduler;
[pingcheck] / src / host / hoststatus.cpp
1 /*
2 The software in this package is distributed under the GNU General
3 Public License version 2 (with a special exception described below).
4
5 A copy of GNU General Public License (GPL) is included in this distribution,
6 in the file COPYING.GPL.
7
8 As a special exception, if other files instantiate templates or use macros
9 or inline functions from this file, or you compile this file and link it
10 with other works to produce a work based on this file, this file
11 does not by itself cause the resulting work to be covered
12 by the GNU General Public License.
13
14 However the source code for this file must still be made available
15 in accordance with section (3) of the GNU General Public License.
16
17 This exception does not invalidate any other reasons why a work based
18 on this file might be covered by the GNU General Public License.
19 */
20 #include "host/hoststatus.h"
21
22 #include <iostream>
23 #include <iomanip>
24 #include <logfunc.hpp>
25
26 #include "boost_assert_handler.h"
27
28 using namespace std;
29 using I2n::Logger::GlobalLogger;
30
31 //-----------------------------------------------------------------------------
32 // HostStatus
33 //-----------------------------------------------------------------------------
34
35 /**
36  * @param host_address The address of the host it has to analyze.
37  * @param ping_fail_percentage_limit The percentage threshold of pings that can
38  * fail.
39  * @param ping_congestion_limit_percentage The percentage threshold of pings
40  * that can fail due to line congestion
41  * @param ping_duration_congestion_thresh Threshold in micro seconds that marks
42  * the difference between a "normal" and a congested line
43  * @param n_parallel_pings Number of pings that is sent for each IP
44  * @param link_analyzer The object used to notify the status of the host.
45  */
46 HostStatus::HostStatus(
47         const string &host_address,
48         const int ping_fail_limit_percentage,
49         const int ping_congestion_limit_percentage,
50         const int congest_caused_by_fail_limit_percentage,
51         const int ping_duration_congestion_thresh,
52         const int n_parallel_pings,
53         const LinkStatusItem link_analyzer
54 ) :
55     HostAddress( host_address ),
56     LinkAnalyzer( link_analyzer ),
57     PingFailLimitPercentage( ping_fail_limit_percentage ),
58     PingCongestionLimitPercentage( ping_congestion_limit_percentage ),
59     CongestCausedByFailLimitPercentage(congest_caused_by_fail_limit_percentage),
60     PingDurationCongestionsThresh( ping_duration_congestion_thresh*1000000 ),
61     ResolvedIpCount( 0 ),
62     PingsPerformedCount( 0 ),
63     PingsFailedCount( 0 ),
64     PingCongestionCount( 0 ),
65     ExceededPingFailedLimit( false ),
66     ExceededPingCongestionLimit( false ),
67     NParallelPingers( n_parallel_pings),
68     InBurstMode( false )
69 {
70     BOOST_ASSERT( !HostAddress.empty() );
71     BOOST_ASSERT( ( 0 <= PingFailLimitPercentage )
72                     && ( PingFailLimitPercentage <= 100 ) );
73     BOOST_ASSERT( ( 0 <= PingCongestionLimitPercentage )
74                     && ( PingCongestionLimitPercentage <= 100 ) );
75 }
76
77 HostStatus::~HostStatus()
78 {
79 }
80
81
82 void HostStatus::set_n_parallel_pings(const int n_parallel_pings)
83 {
84     if (ExceededPingCongestionLimit)
85         InBurstMode = true;
86     else
87         InBurstMode = false;
88
89     if (NParallelPingers != n_parallel_pings)
90     {
91         NParallelPingers = n_parallel_pings;
92         reset_ping_counters();
93     }
94
95     log_status_count();
96 }
97
98 void HostStatus::log_status_count()
99 {
100     std::stringstream temp;
101     temp << "Stat(" << HostAddress << "): " << ResolvedIpCount << " IPs"
102         << "*" << NParallelPingers << " (burst=" << InBurstMode << "); "
103         << PingsPerformedCount << " pings; ";
104     temp << std::fixed << std::setprecision(2);
105     float limit = static_cast<float>( PingsPerformedCount
106                                     * PingFailLimitPercentage) / 100.f;
107     temp << PingsFailedCount << " fail (limit " << limit << "), ";
108     limit = static_cast<float>( PingsPerformedCount
109                                     * PingCongestionLimitPercentage) / 100.f;
110     float limitC = static_cast<float>( PingsPerformedCount
111                                     * CongestCausedByFailLimitPercentage)/100.f;
112     temp << PingCongestionCount << " congest (limits " << limit << ","
113         << limitC << ")";
114     GlobalLogger.info() << temp.str();
115 }
116
117
118 std::string HostStatus::log_prefix()
119 {
120     std::stringstream temp;
121     temp << "Stat(" << HostAddress;
122     if (InBurstMode)
123         temp << "!";
124     temp << "): "
125         << PingsFailedCount << " fail," << PingCongestionCount << " cong/"
126         << PingsPerformedCount << " pings/" << NParallelPingers << "*"
127         << ResolvedIpCount << " IPs: ";
128     return temp.str();
129 }
130
131 /**
132  * @param resolved_ip_count The number of IPs resolved for the host.
133  */
134 void HostStatus::set_resolved_ip_count( const int resolved_ip_count )
135 {
136     BOOST_ASSERT( 0 <= resolved_ip_count );
137
138     if (resolved_ip_count != ResolvedIpCount)
139     {   // assume that the target has changed --> reset counters
140         reset_ping_counters();
141     }
142     ResolvedIpCount = resolved_ip_count;
143
144     log_status_count();
145 }
146
147 /**
148  * @return true if the amount of failed pings given to the host exceeded the
149  * limit.
150  */
151 bool HostStatus::exceeded_ping_failed_limit() const
152 {
153     return ExceededPingFailedLimit;
154 }
155
156 /**
157  * @return true if the amount of congested pings given to the host exceeded the
158  * limit.
159  */
160 bool HostStatus::exceeded_ping_congestion_limit() const
161 {
162     return ExceededPingCongestionLimit;
163 }
164
165 /**
166  * Tells the status analyzer how the last ping went
167  *
168  * @param result: status of ping specifying success/failure and reason of fail
169  * @param ping_duration_us duration of ping in micro seconds
170  */
171 void HostStatus::update_ping_statistics( const PingStatus &result,
172                                          const long ping_duration_us )
173 {
174     float ping_duration_ms = static_cast<float>(ping_duration_us) / 1000.0f;
175
176     BOOST_ASSERT( 0 <= ResolvedIpCount );
177     BOOST_ASSERT( 0 <= PingsPerformedCount );
178     BOOST_ASSERT( PingsFailedCount <= PingsPerformedCount );
179     BOOST_ASSERT( PingCongestionCount <= PingsPerformedCount );
180
181     increase_ping_performed_count();
182
183     bool failed_because_congested = update_congestion_stats( result,
184                                                              ping_duration_us );
185     update_fail_stats( result, failed_because_congested );
186
187     log_status_count();
188
189     // after we tried all IPs resolved for this host, we can analyze how many
190     // failed
191     if ( tried_all_resolved_ip() )
192     {
193         analyze_ping_statistics();
194
195         reset_ping_counters();
196     }
197
198     BOOST_ASSERT( PingsFailedCount <= PingsPerformedCount );
199     BOOST_ASSERT( PingCongestionCount <= PingsPerformedCount );
200 }
201
202
203 void HostStatus::update_fail_stats( const PingStatus &result,
204                                     const bool failed_because_congested )
205 {
206     if ( result != PingStatus_SuccessReply
207       && result != PingStatus_SuccessOutdatedIP
208       && !failed_because_congested )
209     {
210         increase_ping_failed_count();
211     }
212
213     analyze_ping_failed_count();
214 }
215
216
217 bool HostStatus::update_congestion_stats( const PingStatus &result,
218                                           const long ping_duration_us )
219 {
220     bool is_congested = false;
221     if (ping_duration_us > PingDurationCongestionsThresh)
222         is_congested = true;
223     else if ( result == PingStatus_FailureTimeout )
224         is_congested = true;
225     // PingStatus_FailureNoIP, PingStatus_SuccessOutdatedIP could also be caused
226     // by congestion, but also by other reasons (e.g. firewall blocking port 53)
227
228     if (is_congested)
229         increase_ping_congestion_count();
230
231     analyze_ping_congestion_count();
232
233     return is_congested;
234 }
235
236
237 bool HostStatus::tried_all_resolved_ip() const
238 {
239     BOOST_ASSERT( 0 < PingsPerformedCount );
240
241     return ( PingsPerformedCount >= ResolvedIpCount*NParallelPingers );
242 }
243
244
245 /** @brief called when tried_all_resolved_ip() */
246 void HostStatus::analyze_ping_statistics()
247 {
248     BOOST_ASSERT( !HostAddress.empty() );
249     BOOST_ASSERT( PingsPerformedCount >= ResolvedIpCount*NParallelPingers );
250
251     // timeouts are not counted towards failures, only count as congestions
252     // However, if many pings timed out even in burst mode, then we still
253     // declare the line down
254     float limit = static_cast<float>( PingsPerformedCount
255                                     * CongestCausedByFailLimitPercentage)/100.f;
256     if (InBurstMode && PingCongestionCount > limit)
257     {
258         GlobalLogger.info() << log_prefix()
259             << "Assume congestion is actually caused by compromised connection "
260             << "to host because " << PingCongestionCount << " of "
261             << PingsPerformedCount << " burst pings timed out";
262         PingsFailedCount += PingCongestionCount;
263         PingCongestionCount = 0;
264         ExceededPingFailedLimit = true;
265         ExceededPingCongestionLimit = false;
266     }
267
268     // notify if the amount of pings that failed exceed the limit
269     if ( exceeded_ping_failed_limit() )
270     {
271         GlobalLogger.debug() << log_prefix() << "notify down";
272         LinkAnalyzer->notify_host_down( HostAddress );
273     }
274     else if (exceeded_ping_congestion_limit() && !InBurstMode)
275         // only notify up if will not try burst mode next
276         // otherwise will continuously notify up and down if get timeouts
277         GlobalLogger.debug() << log_prefix() << "will not notify up because "
278             << " will go into burst mode next";
279     else
280     {
281         GlobalLogger.debug() << log_prefix() << "notify up";
282         LinkAnalyzer->notify_host_up( HostAddress );
283     }
284
285     // nothing else to do about congestion here, congestion is not forwarded to
286     // central LinkAnalyzer
287 } //lint !e1762
288
289 void HostStatus::reset_ping_counters()
290 {
291     PingsPerformedCount = 0;
292     PingsFailedCount = 0;
293     PingCongestionCount = 0;
294 }
295
296 void HostStatus::increase_ping_performed_count()
297 {
298     ++PingsPerformedCount;
299
300     BOOST_ASSERT( 0 < PingsPerformedCount );
301 }
302
303 void HostStatus::increase_ping_failed_count()
304 {
305     ++PingsFailedCount;
306
307     BOOST_ASSERT( ( 0 <= PingsFailedCount ) && ( PingsFailedCount <= PingsPerformedCount ) );
308 }
309
310 void HostStatus::increase_ping_congestion_count()
311 {
312     ++PingCongestionCount;
313
314     BOOST_ASSERT( ( 0 <= PingCongestionCount )
315                     && ( PingCongestionCount <= PingsPerformedCount ) );
316 }
317
318 void HostStatus::analyze_ping_failed_count()
319 {
320     BOOST_ASSERT( ( 0 <= PingFailLimitPercentage ) && ( PingFailLimitPercentage <= 100 ) );
321     BOOST_ASSERT( ( 0 <= PingsFailedCount ) && ( PingsFailedCount <= PingsPerformedCount ) );
322
323     float limit = static_cast<float>( PingsPerformedCount
324                                     * PingFailLimitPercentage) / 100.f;
325
326     // keep a boolean variable because the PingsFailedCount can be reseted
327     if ( PingsFailedCount > limit )
328         ExceededPingFailedLimit = true;
329     else
330         ExceededPingFailedLimit = false;
331 }
332
333 void HostStatus::analyze_ping_congestion_count()
334 {
335     BOOST_ASSERT( ( 0 <= PingCongestionLimitPercentage )
336                     && ( PingCongestionLimitPercentage <= 100 ) );
337     BOOST_ASSERT( ( 0 <= PingCongestionCount )
338                     && ( PingCongestionCount <= PingsPerformedCount ) );
339
340     float limit = static_cast<float>( PingsPerformedCount
341                                     * PingCongestionLimitPercentage) / 100.f;
342
343     // keep a boolean variable because the PingCongestionCount can be reseted
344     if ( PingCongestionCount > limit )
345         ExceededPingCongestionLimit = true;
346     else
347         ExceededPingCongestionLimit = false;
348 }