| 1 | /* |
| 2 | The software in this package is distributed under the GNU General |
| 3 | Public License version 2 (with a special exception described below). |
| 4 | |
| 5 | A copy of GNU General Public License (GPL) is included in this distribution, |
| 6 | in the file COPYING.GPL. |
| 7 | |
| 8 | As a special exception, if other files instantiate templates or use macros |
| 9 | or inline functions from this file, or you compile this file and link it |
| 10 | with other works to produce a work based on this file, this file |
| 11 | does not by itself cause the resulting work to be covered |
| 12 | by the GNU General Public License. |
| 13 | |
| 14 | However the source code for this file must still be made available |
| 15 | in accordance with section (3) of the GNU General Public License. |
| 16 | |
| 17 | This exception does not invalidate any other reasons why a work based |
| 18 | on this file might be covered by the GNU General Public License. |
| 19 | */ |
| 20 | #include "host/hoststatus.h" |
| 21 | |
| 22 | #include <iostream> |
| 23 | #include <iomanip> |
| 24 | #include <logfunc.hpp> |
| 25 | |
| 26 | #include "boost_assert_handler.h" |
| 27 | |
| 28 | using namespace std; |
| 29 | using I2n::Logger::GlobalLogger; |
| 30 | |
| 31 | //----------------------------------------------------------------------------- |
| 32 | // HostStatus |
| 33 | //----------------------------------------------------------------------------- |
| 34 | |
| 35 | /** |
| 36 | * @param host_address The address of the host it has to analyze. |
| 37 | * @param ping_fail_percentage_limit The percentage threshold of pings that can |
| 38 | * fail. |
| 39 | * @param ping_congestion_limit_percentage The percentage threshold of pings |
| 40 | * that can fail due to line congestion |
| 41 | * @param ping_duration_congestion_thresh Threshold in micro seconds that marks |
| 42 | * the difference between a "normal" and a congested line |
| 43 | * @param n_parallel_pings Number of pings that is sent for each IP |
| 44 | * @param link_analyzer The object used to notify the status of the host. |
| 45 | */ |
| 46 | HostStatus::HostStatus( |
| 47 | const string &host_address, |
| 48 | const int ping_fail_limit_percentage, |
| 49 | const int ping_congestion_limit_percentage, |
| 50 | const int congest_caused_by_fail_limit_percentage, |
| 51 | const int ping_duration_congestion_thresh, |
| 52 | const int n_parallel_pings, |
| 53 | const LinkStatusItem link_analyzer |
| 54 | ) : |
| 55 | HostAddress( host_address ), |
| 56 | LinkAnalyzer( link_analyzer ), |
| 57 | PingFailLimitPercentage( ping_fail_limit_percentage ), |
| 58 | PingCongestionLimitPercentage( ping_congestion_limit_percentage ), |
| 59 | CongestCausedByFailLimitPercentage(congest_caused_by_fail_limit_percentage), |
| 60 | PingDurationCongestionsThresh( ping_duration_congestion_thresh*1000000 ), |
| 61 | ResolvedIpCount( 0 ), |
| 62 | PingsPerformedCount( 0 ), |
| 63 | PingsFailedCount( 0 ), |
| 64 | PingCongestionCount( 0 ), |
| 65 | ExceededPingFailedLimit( false ), |
| 66 | ExceededPingCongestionLimit( false ), |
| 67 | NParallelPingers( n_parallel_pings), |
| 68 | InBurstMode( false ) |
| 69 | { |
| 70 | BOOST_ASSERT( !HostAddress.empty() ); |
| 71 | BOOST_ASSERT( ( 0 <= PingFailLimitPercentage ) |
| 72 | && ( PingFailLimitPercentage <= 100 ) ); |
| 73 | BOOST_ASSERT( ( 0 <= PingCongestionLimitPercentage ) |
| 74 | && ( PingCongestionLimitPercentage <= 100 ) ); |
| 75 | } |
| 76 | |
| 77 | HostStatus::~HostStatus() |
| 78 | { |
| 79 | } |
| 80 | |
| 81 | |
| 82 | void HostStatus::set_n_parallel_pings(const int n_parallel_pings) |
| 83 | { |
| 84 | if (ExceededPingCongestionLimit) |
| 85 | InBurstMode = true; |
| 86 | else |
| 87 | InBurstMode = false; |
| 88 | |
| 89 | if (NParallelPingers != n_parallel_pings) |
| 90 | { |
| 91 | NParallelPingers = n_parallel_pings; |
| 92 | reset_ping_counters(); |
| 93 | } |
| 94 | |
| 95 | log_status_count(); |
| 96 | } |
| 97 | |
| 98 | void HostStatus::log_status_count() |
| 99 | { |
| 100 | std::stringstream temp; |
| 101 | temp << "Stat(" << HostAddress << "): " << ResolvedIpCount << " IPs" |
| 102 | << "*" << NParallelPingers << " (burst=" << InBurstMode << "); " |
| 103 | << PingsPerformedCount << " pings; "; |
| 104 | temp << std::fixed << std::setprecision(2); |
| 105 | float limit = static_cast<float>( PingsPerformedCount |
| 106 | * PingFailLimitPercentage) / 100.f; |
| 107 | temp << PingsFailedCount << " fail (limit " << limit << "), "; |
| 108 | limit = static_cast<float>( PingsPerformedCount |
| 109 | * PingCongestionLimitPercentage) / 100.f; |
| 110 | float limitC = static_cast<float>( PingsPerformedCount |
| 111 | * CongestCausedByFailLimitPercentage)/100.f; |
| 112 | temp << PingCongestionCount << " congest (limits " << limit << "," |
| 113 | << limitC << ")"; |
| 114 | GlobalLogger.info() << temp.str(); |
| 115 | } |
| 116 | |
| 117 | |
| 118 | std::string HostStatus::log_prefix() |
| 119 | { |
| 120 | std::stringstream temp; |
| 121 | temp << "Stat(" << HostAddress; |
| 122 | if (InBurstMode) |
| 123 | temp << "!"; |
| 124 | temp << "): " |
| 125 | << PingsFailedCount << " fail," << PingCongestionCount << " cong/" |
| 126 | << PingsPerformedCount << " pings/" << NParallelPingers << "*" |
| 127 | << ResolvedIpCount << " IPs: "; |
| 128 | return temp.str(); |
| 129 | } |
| 130 | |
| 131 | /** |
| 132 | * @param resolved_ip_count The number of IPs resolved for the host. |
| 133 | */ |
| 134 | void HostStatus::set_resolved_ip_count( const int resolved_ip_count ) |
| 135 | { |
| 136 | BOOST_ASSERT( 0 <= resolved_ip_count ); |
| 137 | |
| 138 | if (resolved_ip_count != ResolvedIpCount) |
| 139 | { // assume that the target has changed --> reset counters |
| 140 | reset_ping_counters(); |
| 141 | } |
| 142 | ResolvedIpCount = resolved_ip_count; |
| 143 | |
| 144 | log_status_count(); |
| 145 | } |
| 146 | |
| 147 | /** |
| 148 | * @return true if the amount of failed pings given to the host exceeded the |
| 149 | * limit. |
| 150 | */ |
| 151 | bool HostStatus::exceeded_ping_failed_limit() const |
| 152 | { |
| 153 | return ExceededPingFailedLimit; |
| 154 | } |
| 155 | |
| 156 | /** |
| 157 | * @return true if the amount of congested pings given to the host exceeded the |
| 158 | * limit. |
| 159 | */ |
| 160 | bool HostStatus::exceeded_ping_congestion_limit() const |
| 161 | { |
| 162 | return ExceededPingCongestionLimit; |
| 163 | } |
| 164 | |
| 165 | /** |
| 166 | * Tells the status analyzer how the last ping went |
| 167 | * |
| 168 | * @param result: status of ping specifying success/failure and reason of fail |
| 169 | * @param ping_duration_us duration of ping in micro seconds |
| 170 | */ |
| 171 | void HostStatus::update_ping_statistics( const PingStatus &result, |
| 172 | const long ping_duration_us ) |
| 173 | { |
| 174 | float ping_duration_ms = static_cast<float>(ping_duration_us) / 1000.0f; |
| 175 | |
| 176 | BOOST_ASSERT( 0 <= ResolvedIpCount ); |
| 177 | BOOST_ASSERT( 0 <= PingsPerformedCount ); |
| 178 | BOOST_ASSERT( PingsFailedCount <= PingsPerformedCount ); |
| 179 | BOOST_ASSERT( PingCongestionCount <= PingsPerformedCount ); |
| 180 | |
| 181 | increase_ping_performed_count(); |
| 182 | |
| 183 | bool failed_because_congested = update_congestion_stats( result, |
| 184 | ping_duration_us ); |
| 185 | update_fail_stats( result, failed_because_congested ); |
| 186 | |
| 187 | log_status_count(); |
| 188 | |
| 189 | // after we tried all IPs resolved for this host, we can analyze how many |
| 190 | // failed |
| 191 | if ( tried_all_resolved_ip() ) |
| 192 | { |
| 193 | analyze_ping_statistics(); |
| 194 | |
| 195 | reset_ping_counters(); |
| 196 | } |
| 197 | |
| 198 | BOOST_ASSERT( PingsFailedCount <= PingsPerformedCount ); |
| 199 | BOOST_ASSERT( PingCongestionCount <= PingsPerformedCount ); |
| 200 | } |
| 201 | |
| 202 | |
| 203 | void HostStatus::update_fail_stats( const PingStatus &result, |
| 204 | const bool failed_because_congested ) |
| 205 | { |
| 206 | if ( result != PingStatus_SuccessReply |
| 207 | && result != PingStatus_SuccessOutdatedIP |
| 208 | && !failed_because_congested ) |
| 209 | { |
| 210 | increase_ping_failed_count(); |
| 211 | } |
| 212 | |
| 213 | analyze_ping_failed_count(); |
| 214 | } |
| 215 | |
| 216 | |
| 217 | bool HostStatus::update_congestion_stats( const PingStatus &result, |
| 218 | const long ping_duration_us ) |
| 219 | { |
| 220 | bool is_congested = false; |
| 221 | if (ping_duration_us > PingDurationCongestionsThresh) |
| 222 | is_congested = true; |
| 223 | else if ( result == PingStatus_FailureTimeout ) |
| 224 | is_congested = true; |
| 225 | // PingStatus_FailureNoIP, PingStatus_SuccessOutdatedIP could also be caused |
| 226 | // by congestion, but also by other reasons (e.g. firewall blocking port 53) |
| 227 | |
| 228 | if (is_congested) |
| 229 | increase_ping_congestion_count(); |
| 230 | |
| 231 | analyze_ping_congestion_count(); |
| 232 | |
| 233 | return is_congested; |
| 234 | } |
| 235 | |
| 236 | |
| 237 | bool HostStatus::tried_all_resolved_ip() const |
| 238 | { |
| 239 | BOOST_ASSERT( 0 < PingsPerformedCount ); |
| 240 | |
| 241 | return ( PingsPerformedCount >= ResolvedIpCount*NParallelPingers ); |
| 242 | } |
| 243 | |
| 244 | |
| 245 | /** @brief called when tried_all_resolved_ip() */ |
| 246 | void HostStatus::analyze_ping_statistics() |
| 247 | { |
| 248 | BOOST_ASSERT( !HostAddress.empty() ); |
| 249 | BOOST_ASSERT( PingsPerformedCount >= ResolvedIpCount*NParallelPingers ); |
| 250 | |
| 251 | // timeouts are not counted towards failures, only count as congestions |
| 252 | // However, if many pings timed out even in burst mode, then we still |
| 253 | // declare the line down |
| 254 | float limit = static_cast<float>( PingsPerformedCount |
| 255 | * CongestCausedByFailLimitPercentage)/100.f; |
| 256 | if (InBurstMode && PingCongestionCount > limit) |
| 257 | { |
| 258 | GlobalLogger.info() << log_prefix() |
| 259 | << "Assume congestion is actually caused by compromised connection " |
| 260 | << "to host because " << PingCongestionCount << " of " |
| 261 | << PingsPerformedCount << " burst pings timed out"; |
| 262 | PingsFailedCount += PingCongestionCount; |
| 263 | PingCongestionCount = 0; |
| 264 | ExceededPingFailedLimit = true; |
| 265 | ExceededPingCongestionLimit = false; |
| 266 | } |
| 267 | |
| 268 | // notify if the amount of pings that failed exceed the limit |
| 269 | if ( exceeded_ping_failed_limit() ) |
| 270 | { |
| 271 | GlobalLogger.debug() << log_prefix() << "notify down"; |
| 272 | LinkAnalyzer->notify_host_down( HostAddress ); |
| 273 | } |
| 274 | else if (exceeded_ping_congestion_limit() && !InBurstMode) |
| 275 | // only notify up if will not try burst mode next |
| 276 | // otherwise will continuously notify up and down if get timeouts |
| 277 | GlobalLogger.debug() << log_prefix() << "will not notify up because " |
| 278 | << " will go into burst mode next"; |
| 279 | else |
| 280 | { |
| 281 | GlobalLogger.debug() << log_prefix() << "notify up"; |
| 282 | LinkAnalyzer->notify_host_up( HostAddress ); |
| 283 | } |
| 284 | |
| 285 | // nothing else to do about congestion here, congestion is not forwarded to |
| 286 | // central LinkAnalyzer |
| 287 | } //lint !e1762 |
| 288 | |
| 289 | void HostStatus::reset_ping_counters() |
| 290 | { |
| 291 | PingsPerformedCount = 0; |
| 292 | PingsFailedCount = 0; |
| 293 | PingCongestionCount = 0; |
| 294 | } |
| 295 | |
| 296 | void HostStatus::increase_ping_performed_count() |
| 297 | { |
| 298 | ++PingsPerformedCount; |
| 299 | |
| 300 | BOOST_ASSERT( 0 < PingsPerformedCount ); |
| 301 | } |
| 302 | |
| 303 | void HostStatus::increase_ping_failed_count() |
| 304 | { |
| 305 | ++PingsFailedCount; |
| 306 | |
| 307 | BOOST_ASSERT( ( 0 <= PingsFailedCount ) && ( PingsFailedCount <= PingsPerformedCount ) ); |
| 308 | } |
| 309 | |
| 310 | void HostStatus::increase_ping_congestion_count() |
| 311 | { |
| 312 | ++PingCongestionCount; |
| 313 | |
| 314 | BOOST_ASSERT( ( 0 <= PingCongestionCount ) |
| 315 | && ( PingCongestionCount <= PingsPerformedCount ) ); |
| 316 | } |
| 317 | |
| 318 | void HostStatus::analyze_ping_failed_count() |
| 319 | { |
| 320 | BOOST_ASSERT( ( 0 <= PingFailLimitPercentage ) && ( PingFailLimitPercentage <= 100 ) ); |
| 321 | BOOST_ASSERT( ( 0 <= PingsFailedCount ) && ( PingsFailedCount <= PingsPerformedCount ) ); |
| 322 | |
| 323 | float limit = static_cast<float>( PingsPerformedCount |
| 324 | * PingFailLimitPercentage) / 100.f; |
| 325 | |
| 326 | // keep a boolean variable because the PingsFailedCount can be reseted |
| 327 | if ( PingsFailedCount > limit ) |
| 328 | ExceededPingFailedLimit = true; |
| 329 | else |
| 330 | ExceededPingFailedLimit = false; |
| 331 | } |
| 332 | |
| 333 | void HostStatus::analyze_ping_congestion_count() |
| 334 | { |
| 335 | BOOST_ASSERT( ( 0 <= PingCongestionLimitPercentage ) |
| 336 | && ( PingCongestionLimitPercentage <= 100 ) ); |
| 337 | BOOST_ASSERT( ( 0 <= PingCongestionCount ) |
| 338 | && ( PingCongestionCount <= PingsPerformedCount ) ); |
| 339 | |
| 340 | float limit = static_cast<float>( PingsPerformedCount |
| 341 | * PingCongestionLimitPercentage) / 100.f; |
| 342 | |
| 343 | // keep a boolean variable because the PingCongestionCount can be reseted |
| 344 | if ( PingCongestionCount > limit ) |
| 345 | ExceededPingCongestionLimit = true; |
| 346 | else |
| 347 | ExceededPingCongestionLimit = false; |
| 348 | } |