to avoid going down in congested line scenario, also need longer ping timeout
[pingcheck] / src / host / hoststatus.cpp
1 /*
2 The software in this package is distributed under the GNU General
3 Public License version 2 (with a special exception described below).
4
5 A copy of GNU General Public License (GPL) is included in this distribution,
6 in the file COPYING.GPL.
7
8 As a special exception, if other files instantiate templates or use macros
9 or inline functions from this file, or you compile this file and link it
10 with other works to produce a work based on this file, this file
11 does not by itself cause the resulting work to be covered
12 by the GNU General Public License.
13
14 However the source code for this file must still be made available
15 in accordance with section (3) of the GNU General Public License.
16
17 This exception does not invalidate any other reasons why a work based
18 on this file might be covered by the GNU General Public License.
19 */
20 #include "host/hoststatus.h"
21
22 #include <iostream>
23 #include <logfunc.hpp>
24
25 #include "boost_assert_handler.h"
26
27 using namespace std;
28 using I2n::Logger::GlobalLogger;
29
30 //-----------------------------------------------------------------------------
31 // HostStatus
32 //-----------------------------------------------------------------------------
33
34 /**
35  * @param host_address The address of the host it has to analyze.
36  * @param ping_fail_percentage_limit The percentage threshold of pings that can
37  * fail.
38  * @param ping_congestion_limit_percentage The percentage threshold of pings
39  * that can fail due to line congestion
40  * @param ping_duration_congestion_thresh Threshold in micro seconds that marks
41  * the difference between a "normal" and a congested line
42  * @param n_parallel_pings Number of pings that is sent for each IP
43  * @param link_analyzer The object used to notify the status of the host.
44  */
45 HostStatus::HostStatus(
46         const string &host_address,
47         const int ping_fail_limit_percentage,
48         const int ping_congestion_limit_percentage,
49         const int ping_duration_congestion_thresh,
50         const int n_parallel_pings,
51         const LinkStatusItem link_analyzer
52 ) :
53     HostAddress( host_address ),
54     LinkAnalyzer( link_analyzer ),
55     PingFailLimitPercentage( ping_fail_limit_percentage ),
56     PingCongestionLimitPercentage( ping_congestion_limit_percentage ),
57     PingDurationCongestionsThresh( ping_duration_congestion_thresh*1000000 ),
58     ResolvedIpCount( 0 ),
59     PingsPerformedCount( 0 ),
60     PingsFailedCount( 0 ),
61     PingCongestionCount( 0 ),
62     ExceededPingFailedLimit( false ),
63     ExceededPingCongestionLimit( false ),
64     NParallelPingers( n_parallel_pings),
65     InBurstMode( false )
66 {
67     BOOST_ASSERT( !HostAddress.empty() );
68     BOOST_ASSERT( ( 0 <= PingFailLimitPercentage )
69                     && ( PingFailLimitPercentage <= 100 ) );
70     BOOST_ASSERT( ( 0 <= PingCongestionLimitPercentage )
71                     && ( PingCongestionLimitPercentage <= 100 ) );
72 }
73
74 HostStatus::~HostStatus()
75 {
76 }
77
78
79 void HostStatus::set_n_parallel_pings(const int n_parallel_pings)
80 {
81     if (ExceededPingCongestionLimit)
82         InBurstMode = true;
83     else
84         InBurstMode = true;
85
86     if (NParallelPingers != n_parallel_pings)
87     {
88         NParallelPingers = n_parallel_pings;
89         reset_ping_counters();
90     }
91     GlobalLogger.debug() << log_prefix() << "#pingers set";
92 }
93
94
95 std::string HostStatus::log_prefix()
96 {
97     std::stringstream temp;
98     temp << "Stat(" << HostAddress << "): "
99         << PingsFailedCount << " fail," << PingCongestionCount << " cong/"
100         << PingsPerformedCount << " pings/" << NParallelPingers << "*"
101         << ResolvedIpCount << " IPs: ";
102     return temp.str();
103 }
104
105 /**
106  * @param resolved_ip_count The number of IPs resolved for the host.
107  */
108 void HostStatus::set_resolved_ip_count( const int resolved_ip_count )
109 {
110     BOOST_ASSERT( 0 <= resolved_ip_count );
111
112     if (resolved_ip_count != ResolvedIpCount)
113     {   // assume that the target has changed --> reset counters
114         reset_ping_counters();
115     }
116     ResolvedIpCount = resolved_ip_count;
117
118     GlobalLogger.debug() << log_prefix() << "#IPs set";
119 }
120
121 /**
122  * @return true if the amount of failed pings given to the host exceeded the
123  * limit.
124  */
125 bool HostStatus::exceeded_ping_failed_limit() const
126 {
127     return ExceededPingFailedLimit;
128 }
129
130 /**
131  * @return true if the amount of congested pings given to the host exceeded the
132  * limit.
133  */
134 bool HostStatus::exceeded_ping_congestion_limit() const
135 {
136     return ExceededPingCongestionLimit;
137 }
138
139 /**
140  * Tells the status analyzer how the last ping went
141  *
142  * @param result: status of ping specifying success/failure and reason of fail
143  * @param ping_duration_us duration of ping in micro seconds
144  */
145 void HostStatus::update_ping_statistics( const PingStatus &result,
146                                          const long ping_duration_us )
147 {
148     float ping_duration_ms = static_cast<float>(ping_duration_us) / 1000.0f;
149
150     GlobalLogger.debug() << log_prefix() << "add ping with result "
151         << to_string(result) << " which took " << ping_duration_ms << " ms";
152
153     BOOST_ASSERT( 0 <= ResolvedIpCount );
154     BOOST_ASSERT( 0 <= PingsPerformedCount );
155     BOOST_ASSERT( PingsFailedCount <= PingsPerformedCount );
156     BOOST_ASSERT( PingCongestionCount <= PingsPerformedCount );
157
158     increase_ping_performed_count();
159
160     bool failed_because_congested = update_congestion_stats( result,
161                                                              ping_duration_us );
162     update_fail_stats( result, failed_because_congested );
163
164     // after we tried all IPs resolved for this host, we can analyze how many
165     // failed
166     if ( tried_all_resolved_ip() )
167     {
168         analyze_ping_statistics();
169
170         reset_ping_counters();
171     }
172
173     BOOST_ASSERT( PingsFailedCount <= PingsPerformedCount );
174     BOOST_ASSERT( PingCongestionCount <= PingsPerformedCount );
175 }
176
177
178 void HostStatus::update_fail_stats( const PingStatus &result,
179                                     const bool failed_because_congested )
180 {
181     if ( result != PingStatus_SuccessReply
182       && result != PingStatus_SuccessOutdatedIP
183       && !failed_because_congested )
184     {
185         increase_ping_failed_count();
186     }
187
188     analyze_ping_failed_count();
189 }
190
191
192 bool HostStatus::update_congestion_stats( const PingStatus &result,
193                                           const long ping_duration_us )
194 {
195     bool is_congested = false;
196     if (ping_duration_us > PingDurationCongestionsThresh)
197         is_congested = true;
198     else if ( result == PingStatus_FailureTimeout )
199         is_congested = true;
200     // PingStatus_FailureNoIP, PingStatus_SuccessOutdatedIP could also be caused
201     // by congestion, but also by other reasons (e.g. firewall blocking port 53)
202
203     if (is_congested)
204         increase_ping_congestion_count();
205
206     analyze_ping_congestion_count();
207
208     return is_congested;
209 }
210
211
212 bool HostStatus::tried_all_resolved_ip() const
213 {
214     BOOST_ASSERT( 0 < PingsPerformedCount );
215
216     return ( PingsPerformedCount >= ResolvedIpCount*NParallelPingers );
217 }
218
219
220 /** @brief called when tried_all_resolved_ip() */
221 void HostStatus::analyze_ping_statistics()
222 {
223     BOOST_ASSERT( !HostAddress.empty() );
224     BOOST_ASSERT( PingsPerformedCount >= ResolvedIpCount*NParallelPingers );
225
226     // timeouts are not counted towards failures, only count as congestions
227     // However, if all pings timed out even in burst mode, then we still declare
228     // the line down
229     if (InBurstMode && PingCongestionCount >= PingsPerformedCount)
230     {
231         GlobalLogger.notice() << log_prefix() << "All pings timed out despite "
232             << "using more pings per IP --> assume connection is really down";
233         ExceededPingFailedLimit = true;
234     }
235
236     // notify if the amount of pings that failed exceed the limit
237     if ( exceeded_ping_failed_limit() )
238     {
239         GlobalLogger.debug() << log_prefix() << "notify down";
240         LinkAnalyzer->notify_host_down( HostAddress );
241     }
242     else
243     {
244         GlobalLogger.debug() << log_prefix() << "notify up";
245         LinkAnalyzer->notify_host_up( HostAddress );
246     }
247
248     // nothing else to do about congestion here, congestion is not forwarded to
249     // central LinkAnalyzer
250 } //lint !e1762
251
252 void HostStatus::reset_ping_counters()
253 {
254     PingsPerformedCount = 0;
255     PingsFailedCount = 0;
256     PingCongestionCount = 0;
257 }
258
259 void HostStatus::increase_ping_performed_count()
260 {
261     ++PingsPerformedCount;
262
263     BOOST_ASSERT( 0 < PingsPerformedCount );
264 }
265
266 void HostStatus::increase_ping_failed_count()
267 {
268     ++PingsFailedCount;
269
270     BOOST_ASSERT( ( 0 <= PingsFailedCount ) && ( PingsFailedCount <= PingsPerformedCount ) );
271 }
272
273 void HostStatus::increase_ping_congestion_count()
274 {
275     ++PingCongestionCount;
276
277     BOOST_ASSERT( ( 0 <= PingCongestionCount )
278                     && ( PingCongestionCount <= PingsPerformedCount ) );
279 }
280
281 void HostStatus::analyze_ping_failed_count()
282 {
283     BOOST_ASSERT( ( 0 <= PingFailLimitPercentage ) && ( PingFailLimitPercentage <= 100 ) );
284     BOOST_ASSERT( ( 0 <= PingsFailedCount ) && ( PingsFailedCount <= PingsPerformedCount ) );
285
286     int limit = ( PingsPerformedCount * PingFailLimitPercentage) / 100;
287
288     // keep a boolean variable because the PingsFailedCount can be reseted
289     if ( PingsFailedCount > limit )
290     {
291         ExceededPingFailedLimit = true;
292
293         GlobalLogger.debug() << log_prefix() << "exceed fail limit=" << limit;
294     }
295     else
296     {
297         ExceededPingFailedLimit = false;
298
299         GlobalLogger.debug() << log_prefix() << "below fail limit=" << limit;
300     }
301 }
302
303 void HostStatus::analyze_ping_congestion_count()
304 {
305     BOOST_ASSERT( ( 0 <= PingCongestionLimitPercentage )
306                     && ( PingCongestionLimitPercentage <= 100 ) );
307     BOOST_ASSERT( ( 0 <= PingCongestionCount )
308                     && ( PingCongestionCount <= PingsPerformedCount ) );
309
310     int limit = ( PingsPerformedCount * PingCongestionLimitPercentage) / 100;
311
312     // keep a boolean variable because the PingCongestionCount can be reseted
313     if ( PingCongestionCount > limit )
314     {
315         ExceededPingCongestionLimit = true;
316
317         GlobalLogger.debug() << log_prefix() << "exceed congestion limit="
318                              << limit;
319     }
320     else
321     {
322         ExceededPingCongestionLimit = false;
323
324         GlobalLogger.debug() << log_prefix() << "below congestion limit="
325                              << limit;
326     }
327 }