fixed bug that caused congestion flag to stay on when all is congested and fail flag...
[pingcheck] / src / host / hoststatus.cpp
1 /*
2 The software in this package is distributed under the GNU General
3 Public License version 2 (with a special exception described below).
4
5 A copy of GNU General Public License (GPL) is included in this distribution,
6 in the file COPYING.GPL.
7
8 As a special exception, if other files instantiate templates or use macros
9 or inline functions from this file, or you compile this file and link it
10 with other works to produce a work based on this file, this file
11 does not by itself cause the resulting work to be covered
12 by the GNU General Public License.
13
14 However the source code for this file must still be made available
15 in accordance with section (3) of the GNU General Public License.
16
17 This exception does not invalidate any other reasons why a work based
18 on this file might be covered by the GNU General Public License.
19 */
20 #include "host/hoststatus.h"
21
22 #include <iostream>
23 #include <logfunc.hpp>
24
25 #include "boost_assert_handler.h"
26
27 using namespace std;
28 using I2n::Logger::GlobalLogger;
29
30 //-----------------------------------------------------------------------------
31 // HostStatus
32 //-----------------------------------------------------------------------------
33
34 /**
35  * @param host_address The address of the host it has to analyze.
36  * @param ping_fail_percentage_limit The percentage threshold of pings that can
37  * fail.
38  * @param ping_congestion_limit_percentage The percentage threshold of pings
39  * that can fail due to line congestion
40  * @param ping_duration_congestion_thresh Threshold in micro seconds that marks
41  * the difference between a "normal" and a congested line
42  * @param n_parallel_pings Number of pings that is sent for each IP
43  * @param link_analyzer The object used to notify the status of the host.
44  */
45 HostStatus::HostStatus(
46         const string &host_address,
47         const int ping_fail_limit_percentage,
48         const int ping_congestion_limit_percentage,
49         const int ping_duration_congestion_thresh,
50         const int n_parallel_pings,
51         const LinkStatusItem link_analyzer
52 ) :
53     HostAddress( host_address ),
54     LinkAnalyzer( link_analyzer ),
55     PingFailLimitPercentage( ping_fail_limit_percentage ),
56     PingCongestionLimitPercentage( ping_congestion_limit_percentage ),
57     PingDurationCongestionsThresh( ping_duration_congestion_thresh*1000000 ),
58     ResolvedIpCount( 0 ),
59     PingsPerformedCount( 0 ),
60     PingsFailedCount( 0 ),
61     PingCongestionCount( 0 ),
62     ExceededPingFailedLimit( false ),
63     ExceededPingCongestionLimit( false ),
64     NParallelPingers( n_parallel_pings),
65     InBurstMode( false )
66 {
67     BOOST_ASSERT( !HostAddress.empty() );
68     BOOST_ASSERT( ( 0 <= PingFailLimitPercentage )
69                     && ( PingFailLimitPercentage <= 100 ) );
70     BOOST_ASSERT( ( 0 <= PingCongestionLimitPercentage )
71                     && ( PingCongestionLimitPercentage <= 100 ) );
72 }
73
74 HostStatus::~HostStatus()
75 {
76 }
77
78
79 void HostStatus::set_n_parallel_pings(const int n_parallel_pings)
80 {
81     if (ExceededPingCongestionLimit)
82         InBurstMode = true;
83     else
84         InBurstMode = true;
85
86     if (NParallelPingers != n_parallel_pings)
87     {
88         NParallelPingers = n_parallel_pings;
89         reset_ping_counters();
90     }
91     GlobalLogger.debug() << log_prefix() << "#pingers set";
92 }
93
94
95 std::string HostStatus::log_prefix()
96 {
97     std::stringstream temp;
98     temp << "Stat(" << HostAddress << "): "
99         << PingsFailedCount << " fail," << PingCongestionCount << " cong/"
100         << PingsPerformedCount << " pings/" << NParallelPingers << "*"
101         << ResolvedIpCount << " IPs: ";
102     return temp.str();
103 }
104
105 /**
106  * @param resolved_ip_count The number of IPs resolved for the host.
107  */
108 void HostStatus::set_resolved_ip_count( const int resolved_ip_count )
109 {
110     BOOST_ASSERT( 0 <= resolved_ip_count );
111
112     if (resolved_ip_count != ResolvedIpCount)
113     {   // assume that the target has changed --> reset counters
114         reset_ping_counters();
115     }
116     ResolvedIpCount = resolved_ip_count;
117
118     GlobalLogger.debug() << log_prefix() << "#IPs set";
119 }
120
121 /**
122  * @return true if the amount of failed pings given to the host exceeded the
123  * limit.
124  */
125 bool HostStatus::exceeded_ping_failed_limit() const
126 {
127     return ExceededPingFailedLimit;
128 }
129
130 /**
131  * @return true if the amount of congested pings given to the host exceeded the
132  * limit.
133  */
134 bool HostStatus::exceeded_ping_congestion_limit() const
135 {
136     return ExceededPingCongestionLimit;
137 }
138
139 /**
140  * Tells the status analyzer how the last ping went
141  *
142  * @param result: status of ping specifying success/failure and reason of fail
143  * @param ping_duration_us duration of ping in micro seconds
144  */
145 void HostStatus::update_ping_statistics( const PingStatus &result,
146                                          const long ping_duration_us )
147 {
148     float ping_duration_ms = static_cast<float>(ping_duration_us) / 1000.0f;
149
150     GlobalLogger.debug() << log_prefix() << "add ping with result "
151         << to_string(result) << " which took " << ping_duration_ms << " ms";
152
153     BOOST_ASSERT( 0 <= ResolvedIpCount );
154     BOOST_ASSERT( 0 <= PingsPerformedCount );
155     BOOST_ASSERT( PingsFailedCount <= PingsPerformedCount );
156     BOOST_ASSERT( PingCongestionCount <= PingsPerformedCount );
157
158     increase_ping_performed_count();
159
160     bool failed_because_congested = update_congestion_stats( result,
161                                                              ping_duration_us );
162     update_fail_stats( result, failed_because_congested );
163
164     // after we tried all IPs resolved for this host, we can analyze how many
165     // failed
166     if ( tried_all_resolved_ip() )
167     {
168         analyze_ping_statistics();
169
170         reset_ping_counters();
171     }
172
173     BOOST_ASSERT( PingsFailedCount <= PingsPerformedCount );
174     BOOST_ASSERT( PingCongestionCount <= PingsPerformedCount );
175 }
176
177
178 void HostStatus::update_fail_stats( const PingStatus &result,
179                                     const bool failed_because_congested )
180 {
181     if ( result != PingStatus_SuccessReply
182       && result != PingStatus_SuccessOutdatedIP
183       && !failed_because_congested )
184     {
185         increase_ping_failed_count();
186     }
187
188     analyze_ping_failed_count();
189 }
190
191
192 bool HostStatus::update_congestion_stats( const PingStatus &result,
193                                           const long ping_duration_us )
194 {
195     bool is_congested = false;
196     if (ping_duration_us > PingDurationCongestionsThresh)
197         is_congested = true;
198     else if ( result == PingStatus_FailureTimeout )
199         is_congested = true;
200     // PingStatus_FailureNoIP, PingStatus_SuccessOutdatedIP could also be caused
201     // by congestion, but also by other reasons (e.g. firewall blocking port 53)
202
203     if (is_congested)
204         increase_ping_congestion_count();
205
206     analyze_ping_congestion_count();
207
208     return is_congested;
209 }
210
211
212 bool HostStatus::tried_all_resolved_ip() const
213 {
214     BOOST_ASSERT( 0 < PingsPerformedCount );
215
216     return ( PingsPerformedCount >= ResolvedIpCount*NParallelPingers );
217 }
218
219
220 /** @brief called when tried_all_resolved_ip() */
221 void HostStatus::analyze_ping_statistics()
222 {
223     BOOST_ASSERT( !HostAddress.empty() );
224     BOOST_ASSERT( PingsPerformedCount >= ResolvedIpCount*NParallelPingers );
225
226     // timeouts are not counted towards failures, only count as congestions
227     // However, if all pings timed out even in burst mode, then we still declare
228     // the line down
229     if (InBurstMode && PingCongestionCount >= PingsPerformedCount)
230     {
231         GlobalLogger.notice() << log_prefix() << "All pings timed out despite "
232             << "using more pings per IP --> assume connection is really down";
233         PingsFailedCount += PingCongestionCount;
234         PingCongestionCount = 0;
235         ExceededPingFailedLimit = true;
236         ExceededPingCongestionLimit = false;
237     }
238
239     // notify if the amount of pings that failed exceed the limit
240     if ( exceeded_ping_failed_limit() )
241     {
242         GlobalLogger.debug() << log_prefix() << "notify down";
243         LinkAnalyzer->notify_host_down( HostAddress );
244     }
245     else
246     {
247         GlobalLogger.debug() << log_prefix() << "notify up";
248         LinkAnalyzer->notify_host_up( HostAddress );
249     }
250
251     // nothing else to do about congestion here, congestion is not forwarded to
252     // central LinkAnalyzer
253 } //lint !e1762
254
255 void HostStatus::reset_ping_counters()
256 {
257     PingsPerformedCount = 0;
258     PingsFailedCount = 0;
259     PingCongestionCount = 0;
260 }
261
262 void HostStatus::increase_ping_performed_count()
263 {
264     ++PingsPerformedCount;
265
266     BOOST_ASSERT( 0 < PingsPerformedCount );
267 }
268
269 void HostStatus::increase_ping_failed_count()
270 {
271     ++PingsFailedCount;
272
273     BOOST_ASSERT( ( 0 <= PingsFailedCount ) && ( PingsFailedCount <= PingsPerformedCount ) );
274 }
275
276 void HostStatus::increase_ping_congestion_count()
277 {
278     ++PingCongestionCount;
279
280     BOOST_ASSERT( ( 0 <= PingCongestionCount )
281                     && ( PingCongestionCount <= PingsPerformedCount ) );
282 }
283
284 void HostStatus::analyze_ping_failed_count()
285 {
286     BOOST_ASSERT( ( 0 <= PingFailLimitPercentage ) && ( PingFailLimitPercentage <= 100 ) );
287     BOOST_ASSERT( ( 0 <= PingsFailedCount ) && ( PingsFailedCount <= PingsPerformedCount ) );
288
289     int limit = ( PingsPerformedCount * PingFailLimitPercentage) / 100;
290
291     // keep a boolean variable because the PingsFailedCount can be reseted
292     if ( PingsFailedCount > limit )
293     {
294         ExceededPingFailedLimit = true;
295
296         GlobalLogger.debug() << log_prefix() << "exceed fail limit=" << limit;
297     }
298     else
299     {
300         ExceededPingFailedLimit = false;
301
302         GlobalLogger.debug() << log_prefix() << "below fail limit=" << limit;
303     }
304 }
305
306 void HostStatus::analyze_ping_congestion_count()
307 {
308     BOOST_ASSERT( ( 0 <= PingCongestionLimitPercentage )
309                     && ( PingCongestionLimitPercentage <= 100 ) );
310     BOOST_ASSERT( ( 0 <= PingCongestionCount )
311                     && ( PingCongestionCount <= PingsPerformedCount ) );
312
313     int limit = ( PingsPerformedCount * PingCongestionLimitPercentage) / 100;
314
315     // keep a boolean variable because the PingCongestionCount can be reseted
316     if ( PingCongestionCount > limit )
317     {
318         ExceededPingCongestionLimit = true;
319
320         GlobalLogger.debug() << log_prefix() << "exceed congestion limit="
321                              << limit;
322     }
323     else
324     {
325         ExceededPingCongestionLimit = false;
326
327         GlobalLogger.debug() << log_prefix() << "below congestion limit="
328                              << limit;
329     }
330 }