made congestion/offline behaviour more stable: do no declare online right after going...
[pingcheck] / src / host / hoststatus.cpp
CommitLineData
91fcc471
TJ
1/*
2The software in this package is distributed under the GNU General
3Public License version 2 (with a special exception described below).
4
5A copy of GNU General Public License (GPL) is included in this distribution,
6in the file COPYING.GPL.
7
8As a special exception, if other files instantiate templates or use macros
9or inline functions from this file, or you compile this file and link it
10with other works to produce a work based on this file, this file
11does not by itself cause the resulting work to be covered
12by the GNU General Public License.
13
14However the source code for this file must still be made available
15in accordance with section (3) of the GNU General Public License.
16
17This exception does not invalidate any other reasons why a work based
18on this file might be covered by the GNU General Public License.
19*/
6c14bbee 20#include "host/hoststatus.h"
ddf41c89
GMF
21
22#include <iostream>
3f7c921f 23#include <logfunc.hpp>
ddf41c89 24
780b0bca 25#include "boost_assert_handler.h"
ddf41c89
GMF
26
27using namespace std;
3f7c921f 28using I2n::Logger::GlobalLogger;
ddf41c89
GMF
29
30//-----------------------------------------------------------------------------
6c14bbee 31// HostStatus
ddf41c89
GMF
32//-----------------------------------------------------------------------------
33
c01a6023 34/**
6c14bbee
GMF
35 * @param host_address The address of the host it has to analyze.
36 * @param ping_fail_percentage_limit The percentage threshold of pings that can
c01a6023 37 * fail.
a7b15639
CH
38 * @param ping_congestion_limit_percentage The percentage threshold of pings
39 * that can fail due to line congestion
40 * @param ping_duration_congestion_thresh Threshold in micro seconds that marks
41 * the difference between a "normal" and a congested line
42 * @param n_parallel_pings Number of pings that is sent for each IP
6c14bbee 43 * @param link_analyzer The object used to notify the status of the host.
c01a6023 44 */
6c14bbee 45HostStatus::HostStatus(
ddf41c89 46 const string &host_address,
cd4048df 47 const int ping_fail_limit_percentage,
a7b15639
CH
48 const int ping_congestion_limit_percentage,
49 const int ping_duration_congestion_thresh,
91aa83f9 50 const int n_parallel_pings,
c6c54dfb 51 const LinkStatusItem link_analyzer
ddf41c89 52) :
c1fff16a 53 HostAddress( host_address ),
fb469ffa 54 LinkAnalyzer( link_analyzer ),
cd4048df 55 PingFailLimitPercentage( ping_fail_limit_percentage ),
a7b15639
CH
56 PingCongestionLimitPercentage( ping_congestion_limit_percentage ),
57 PingDurationCongestionsThresh( ping_duration_congestion_thresh*1000000 ),
ddf41c89
GMF
58 ResolvedIpCount( 0 ),
59 PingsPerformedCount( 0 ),
d8a91bd6 60 PingsFailedCount( 0 ),
a7b15639 61 PingCongestionCount( 0 ),
91aa83f9 62 ExceededPingFailedLimit( false ),
a7b15639 63 ExceededPingCongestionLimit( false ),
4d7db1af
CH
64 NParallelPingers( n_parallel_pings),
65 InBurstMode( false )
ddf41c89 66{
d4793cc9 67 BOOST_ASSERT( !HostAddress.empty() );
a7b15639
CH
68 BOOST_ASSERT( ( 0 <= PingFailLimitPercentage )
69 && ( PingFailLimitPercentage <= 100 ) );
70 BOOST_ASSERT( ( 0 <= PingCongestionLimitPercentage )
71 && ( PingCongestionLimitPercentage <= 100 ) );
ddf41c89
GMF
72}
73
6c14bbee 74HostStatus::~HostStatus()
ddf41c89
GMF
75{
76}
77
a7b15639 78
242e5fb3
CH
79void HostStatus::set_n_parallel_pings(const int n_parallel_pings)
80{
4d7db1af
CH
81 if (ExceededPingCongestionLimit)
82 InBurstMode = true;
83 else
84 InBurstMode = true;
85
242e5fb3
CH
86 if (NParallelPingers != n_parallel_pings)
87 {
88 NParallelPingers = n_parallel_pings;
89 reset_ping_counters();
90 }
91 GlobalLogger.debug() << log_prefix() << "#pingers set";
92}
93
94
a7b15639
CH
95std::string HostStatus::log_prefix()
96{
97 std::stringstream temp;
98 temp << "Stat(" << HostAddress << "): "
99 << PingsFailedCount << " fail," << PingCongestionCount << " cong/"
4d7db1af
CH
100 << PingsPerformedCount << " pings/" << NParallelPingers << "*"
101 << ResolvedIpCount << " IPs: ";
242e5fb3 102 return temp.str();
a7b15639
CH
103}
104
c01a6023 105/**
6c14bbee 106 * @param resolved_ip_count The number of IPs resolved for the host.
c01a6023 107 */
6c14bbee 108void HostStatus::set_resolved_ip_count( const int resolved_ip_count )
ddf41c89 109{
838e0acf 110 BOOST_ASSERT( 0 <= resolved_ip_count );
ddf41c89 111
db625177
CH
112 if (resolved_ip_count != ResolvedIpCount)
113 { // assume that the target has changed --> reset counters
114 reset_ping_counters();
115 }
ddf41c89 116 ResolvedIpCount = resolved_ip_count;
3f7c921f 117
a7b15639 118 GlobalLogger.debug() << log_prefix() << "#IPs set";
ddf41c89
GMF
119}
120
c01a6023
GMF
121/**
122 * @return true if the amount of failed pings given to the host exceeded the
123 * limit.
124 */
6c14bbee 125bool HostStatus::exceeded_ping_failed_limit() const
d8a91bd6 126{
a341119a 127 return ExceededPingFailedLimit;
d8a91bd6
GMF
128}
129
c01a6023 130/**
a7b15639
CH
131 * @return true if the amount of congested pings given to the host exceeded the
132 * limit.
133 */
134bool HostStatus::exceeded_ping_congestion_limit() const
135{
136 return ExceededPingCongestionLimit;
137}
138
139/**
96c4e7a4
CH
140 * Tells the status analyzer how the last ping went
141 *
142 * @param result: status of ping specifying success/failure and reason of fail
143 * @param ping_duration_us duration of ping in micro seconds
c01a6023 144 */
96c4e7a4
CH
145void HostStatus::update_ping_statistics( const PingStatus &result,
146 const long ping_duration_us )
ddf41c89 147{
242e5fb3 148 float ping_duration_ms = static_cast<float>(ping_duration_us) / 1000.0f;
96c4e7a4 149
a7b15639
CH
150 GlobalLogger.debug() << log_prefix() << "add ping with result "
151 << to_string(result) << " which took " << ping_duration_ms << " ms";
3f7c921f 152
ffa5cfe2 153 BOOST_ASSERT( 0 <= ResolvedIpCount );
ddf41c89
GMF
154 BOOST_ASSERT( 0 <= PingsPerformedCount );
155 BOOST_ASSERT( PingsFailedCount <= PingsPerformedCount );
a7b15639 156 BOOST_ASSERT( PingCongestionCount <= PingsPerformedCount );
ddf41c89 157
4d7db1af
CH
158 increase_ping_performed_count();
159
160 bool failed_because_congested = update_congestion_stats( result,
161 ping_duration_us );
162 update_fail_stats( result, failed_because_congested );
2c10f87b 163
c5e4bfa1
GMF
164 // after we tried all IPs resolved for this host, we can analyze how many
165 // failed
166 if ( tried_all_resolved_ip() )
ddf41c89 167 {
d8a91bd6 168 analyze_ping_statistics();
ddf41c89 169
c5e4bfa1 170 reset_ping_counters();
ddf41c89
GMF
171 }
172
173 BOOST_ASSERT( PingsFailedCount <= PingsPerformedCount );
a7b15639
CH
174 BOOST_ASSERT( PingCongestionCount <= PingsPerformedCount );
175}
176
177
4d7db1af
CH
178void HostStatus::update_fail_stats( const PingStatus &result,
179 const bool failed_because_congested )
a7b15639 180{
a7b15639 181 if ( result != PingStatus_SuccessReply
4d7db1af
CH
182 && result != PingStatus_SuccessOutdatedIP
183 && !failed_because_congested )
a7b15639
CH
184 {
185 increase_ping_failed_count();
186 }
187
188 analyze_ping_failed_count();
189}
190
191
4d7db1af 192bool HostStatus::update_congestion_stats( const PingStatus &result,
a7b15639
CH
193 const long ping_duration_us )
194{
4d7db1af 195 bool is_congested = false;
a7b15639 196 if (ping_duration_us > PingDurationCongestionsThresh)
4d7db1af 197 is_congested = true;
a7b15639 198 else if ( result == PingStatus_FailureTimeout )
4d7db1af 199 is_congested = true;
a7b15639
CH
200 // PingStatus_FailureNoIP, PingStatus_SuccessOutdatedIP could also be caused
201 // by congestion, but also by other reasons (e.g. firewall blocking port 53)
202
4d7db1af
CH
203 if (is_congested)
204 increase_ping_congestion_count();
205
a7b15639 206 analyze_ping_congestion_count();
4d7db1af
CH
207
208 return is_congested;
ddf41c89
GMF
209}
210
c1d776ba 211
6c14bbee 212bool HostStatus::tried_all_resolved_ip() const
d8a91bd6 213{
ffa5cfe2 214 BOOST_ASSERT( 0 < PingsPerformedCount );
d4793cc9 215
ffa5cfe2 216 return ( PingsPerformedCount >= ResolvedIpCount*NParallelPingers );
d8a91bd6
GMF
217}
218
4d7db1af
CH
219
220/** @brief called when tried_all_resolved_ip() */
6c14bbee 221void HostStatus::analyze_ping_statistics()
ddf41c89 222{
c1fff16a 223 BOOST_ASSERT( !HostAddress.empty() );
ffa5cfe2 224 BOOST_ASSERT( PingsPerformedCount >= ResolvedIpCount*NParallelPingers );
ddf41c89 225
4d7db1af
CH
226 // timeouts are not counted towards failures, only count as congestions
227 // However, if all pings timed out even in burst mode, then we still declare
228 // the line down
229 if (InBurstMode && PingCongestionCount >= PingsPerformedCount)
87758553
CH
230 {
231 GlobalLogger.notice() << log_prefix() << "All pings timed out despite "
232 << "using more pings per IP --> assume connection is really down";
81686580
CH
233 PingsFailedCount += PingCongestionCount;
234 PingCongestionCount = 0;
4d7db1af 235 ExceededPingFailedLimit = true;
81686580 236 ExceededPingCongestionLimit = false;
87758553 237 }
4d7db1af 238
c1fff16a 239 // notify if the amount of pings that failed exceed the limit
a341119a 240 if ( exceeded_ping_failed_limit() )
ddf41c89 241 {
a7b15639 242 GlobalLogger.debug() << log_prefix() << "notify down";
fb469ffa 243 LinkAnalyzer->notify_host_down( HostAddress );
ddf41c89 244 }
e5029552
CH
245 else if (exceeded_ping_congestion_limit() && !InBurstMode)
246 // only notify up if will not try burst mode next
247 // otherwise will continuously notify up and down if get timeouts
248 GlobalLogger.notice() << log_prefix() << "will not notify up because "
249 << " will go into burst mode next";
ddf41c89
GMF
250 else
251 {
a7b15639 252 GlobalLogger.debug() << log_prefix() << "notify up";
fb469ffa 253 LinkAnalyzer->notify_host_up( HostAddress );
ddf41c89 254 }
a7b15639 255
4d7db1af 256 // nothing else to do about congestion here, congestion is not forwarded to
a7b15639 257 // central LinkAnalyzer
6fd0993e 258} //lint !e1762
ddf41c89 259
6c14bbee 260void HostStatus::reset_ping_counters()
c1fff16a
GMF
261{
262 PingsPerformedCount = 0;
263 PingsFailedCount = 0;
a7b15639 264 PingCongestionCount = 0;
c1fff16a
GMF
265}
266
6c14bbee 267void HostStatus::increase_ping_performed_count()
c5e4bfa1
GMF
268{
269 ++PingsPerformedCount;
c1fff16a 270
ffa5cfe2 271 BOOST_ASSERT( 0 < PingsPerformedCount );
c5e4bfa1
GMF
272}
273
6c14bbee 274void HostStatus::increase_ping_failed_count()
c5e4bfa1
GMF
275{
276 ++PingsFailedCount;
c1fff16a
GMF
277
278 BOOST_ASSERT( ( 0 <= PingsFailedCount ) && ( PingsFailedCount <= PingsPerformedCount ) );
c5e4bfa1
GMF
279}
280
a7b15639
CH
281void HostStatus::increase_ping_congestion_count()
282{
283 ++PingCongestionCount;
284
285 BOOST_ASSERT( ( 0 <= PingCongestionCount )
286 && ( PingCongestionCount <= PingsPerformedCount ) );
287}
288
6c14bbee 289void HostStatus::analyze_ping_failed_count()
ddf41c89 290{
cd4048df 291 BOOST_ASSERT( ( 0 <= PingFailLimitPercentage ) && ( PingFailLimitPercentage <= 100 ) );
c1fff16a
GMF
292 BOOST_ASSERT( ( 0 <= PingsFailedCount ) && ( PingsFailedCount <= PingsPerformedCount ) );
293
4d7db1af 294 int limit = ( PingsPerformedCount * PingFailLimitPercentage) / 100;
c1fff16a 295
6827496c 296 // keep a boolean variable because the PingsFailedCount can be reseted
4d7db1af 297 if ( PingsFailedCount > limit )
1d7d7cb2
GMF
298 {
299 ExceededPingFailedLimit = true;
3f7c921f 300
4d7db1af 301 GlobalLogger.debug() << log_prefix() << "exceed fail limit=" << limit;
1d7d7cb2
GMF
302 }
303 else
304 {
305 ExceededPingFailedLimit = false;
3f7c921f 306
4d7db1af 307 GlobalLogger.debug() << log_prefix() << "below fail limit=" << limit;
a7b15639
CH
308 }
309}
310
311void HostStatus::analyze_ping_congestion_count()
312{
313 BOOST_ASSERT( ( 0 <= PingCongestionLimitPercentage )
314 && ( PingCongestionLimitPercentage <= 100 ) );
315 BOOST_ASSERT( ( 0 <= PingCongestionCount )
316 && ( PingCongestionCount <= PingsPerformedCount ) );
317
4d7db1af 318 int limit = ( PingsPerformedCount * PingCongestionLimitPercentage) / 100;
a7b15639
CH
319
320 // keep a boolean variable because the PingCongestionCount can be reseted
4d7db1af 321 if ( PingCongestionCount > limit )
a7b15639
CH
322 {
323 ExceededPingCongestionLimit = true;
324
325 GlobalLogger.debug() << log_prefix() << "exceed congestion limit="
4d7db1af 326 << limit;
a7b15639
CH
327 }
328 else
329 {
330 ExceededPingCongestionLimit = false;
331
332 GlobalLogger.debug() << log_prefix() << "below congestion limit="
4d7db1af 333 << limit;
1d7d7cb2 334 }
c5e4bfa1 335}