From: Christian Herdtweck Date: Tue, 26 May 2015 09:35:00 +0000 (+0200) Subject: added congestion analysis to HostStatus X-Git-Url: http://developer.intra2net.com/git/?p=pingcheck;a=commitdiff_plain;h=a7b156391f934bc05a2b73315ef0dedb14c027a5 added congestion analysis to HostStatus (make cast from time difference to long explicit in pinger callback) --- diff --git a/src/host/hoststatus.cpp b/src/host/hoststatus.cpp index 74178a6..4887bdc 100644 --- a/src/host/hoststatus.cpp +++ b/src/host/hoststatus.cpp @@ -35,31 +35,55 @@ using I2n::Logger::GlobalLogger; * @param host_address The address of the host it has to analyze. * @param ping_fail_percentage_limit The percentage threshold of pings that can * fail. + * @param ping_congestion_limit_percentage The percentage threshold of pings + * that can fail due to line congestion + * @param ping_duration_congestion_thresh Threshold in micro seconds that marks + * the difference between a "normal" and a congested line + * @param n_parallel_pings Number of pings that is sent for each IP * @param link_analyzer The object used to notify the status of the host. */ HostStatus::HostStatus( const string &host_address, const int ping_fail_limit_percentage, + const int ping_congestion_limit_percentage, + const int ping_duration_congestion_thresh, const int n_parallel_pings, const LinkStatusItem link_analyzer ) : HostAddress( host_address ), LinkAnalyzer( link_analyzer ), PingFailLimitPercentage( ping_fail_limit_percentage ), + PingCongestionLimitPercentage( ping_congestion_limit_percentage ), + PingDurationCongestionsThresh( ping_duration_congestion_thresh*1000000 ), ResolvedIpCount( 0 ), PingsPerformedCount( 0 ), PingsFailedCount( 0 ), + PingCongestionCount( 0 ), ExceededPingFailedLimit( false ), + ExceededPingCongestionLimit( false ), NParallelPingers( n_parallel_pings) { BOOST_ASSERT( !HostAddress.empty() ); - BOOST_ASSERT( ( 0 <= PingFailLimitPercentage ) && ( PingFailLimitPercentage <= 100 ) ); + BOOST_ASSERT( ( 0 <= PingFailLimitPercentage ) + && ( PingFailLimitPercentage <= 100 ) ); + BOOST_ASSERT( ( 0 <= PingCongestionLimitPercentage ) + && ( PingCongestionLimitPercentage <= 100 ) ); } HostStatus::~HostStatus() { } + +std::string HostStatus::log_prefix() +{ + std::stringstream temp; + temp << "Stat(" << HostAddress << "): " + << PingsFailedCount << " fail," << PingCongestionCount << " cong/" + << PingsPerformedCount << " pings/" << ResolvedIpCount << "*" + << NParallelPingers << " IPs: "; +} + /** * @param resolved_ip_count The number of IPs resolved for the host. */ @@ -73,9 +97,7 @@ void HostStatus::set_resolved_ip_count( const int resolved_ip_count ) } ResolvedIpCount = resolved_ip_count; - GlobalLogger.debug() << "Stat(" << HostAddress << "): " - << PingsFailedCount << " fail/" << PingsPerformedCount << " pings/" - << ResolvedIpCount << "*" << NParallelPingers << " IPs: #IPs set"; + GlobalLogger.debug() << log_prefix() << "#IPs set"; } /** @@ -88,6 +110,15 @@ bool HostStatus::exceeded_ping_failed_limit() const } /** + * @return true if the amount of congested pings given to the host exceeded the + * limit. + */ +bool HostStatus::exceeded_ping_congestion_limit() const +{ + return ExceededPingCongestionLimit; +} + +/** * Tells the status analyzer how the last ping went * * @param result: status of ping specifying success/failure and reason of fail @@ -98,24 +129,16 @@ void HostStatus::update_ping_statistics( const PingStatus &result, { float ping_duration_ms = static_cast(ping_duration_us) / 1000.; - GlobalLogger.debug() << "Stat(" << HostAddress << "): " - << PingsFailedCount << " fail/" << PingsPerformedCount << " pings/" - << ResolvedIpCount << "*" << NParallelPingers << " IPs: " - << "add ping with result " << to_string(result) << " which took " - << ping_duration_ms << " ms"; + GlobalLogger.debug() << log_prefix() << "add ping with result " + << to_string(result) << " which took " << ping_duration_ms << " ms"; BOOST_ASSERT( 1 <= ResolvedIpCount ); BOOST_ASSERT( 0 <= PingsPerformedCount ); BOOST_ASSERT( PingsFailedCount <= PingsPerformedCount ); + BOOST_ASSERT( PingCongestionCount <= PingsPerformedCount ); - increase_ping_performed_count(); - - if ( result != PingStatus_SuccessReply ) - { - increase_ping_failed_count(); - } - - analyze_ping_failed_count(); + update_fail_stats( result ); + update_congestion_stats( result, ping_duration_us ); // after we tried all IPs resolved for this host, we can analyze how many // failed @@ -127,6 +150,35 @@ void HostStatus::update_ping_statistics( const PingStatus &result, } BOOST_ASSERT( PingsFailedCount <= PingsPerformedCount ); + BOOST_ASSERT( PingCongestionCount <= PingsPerformedCount ); +} + + +void HostStatus::update_fail_stats( const PingStatus &result) +{ + increase_ping_performed_count(); + + if ( result != PingStatus_SuccessReply + && result != PingStatus_SuccessOutdatedIP) + { + increase_ping_failed_count(); + } + + analyze_ping_failed_count(); +} + + +void HostStatus::update_congestion_stats( const PingStatus &result, + const long ping_duration_us ) +{ + if (ping_duration_us > PingDurationCongestionsThresh) + increase_ping_congestion_count(); + else if ( result == PingStatus_FailureTimeout ) + increase_ping_congestion_count(); + // PingStatus_FailureNoIP, PingStatus_SuccessOutdatedIP could also be caused + // by congestion, but also by other reasons (e.g. firewall blocking port 53) + + analyze_ping_congestion_count(); } @@ -146,26 +198,24 @@ void HostStatus::analyze_ping_statistics() // notify if the amount of pings that failed exceed the limit if ( exceeded_ping_failed_limit() ) { - GlobalLogger.debug() << "Stat(" << HostAddress << "): " - << PingsFailedCount << " fail/" << PingsPerformedCount << " pings/" - << ResolvedIpCount << "*" << NParallelPingers << " IPs: " - << "notify down"; + GlobalLogger.debug() << log_prefix() << "notify down"; LinkAnalyzer->notify_host_down( HostAddress ); } else { - GlobalLogger.debug() << "Stat(" << HostAddress << "): " - << PingsFailedCount << " fail/" << PingsPerformedCount << " pings/" - << ResolvedIpCount << "*" << NParallelPingers << " IPs: " - << "notify up"; + GlobalLogger.debug() << log_prefix() << "notify up"; LinkAnalyzer->notify_host_up( HostAddress ); } + + // nothing to do about congestion here, congestion is not forwarded to + // central LinkAnalyzer } //lint !e1762 void HostStatus::reset_ping_counters() { PingsPerformedCount = 0; PingsFailedCount = 0; + PingCongestionCount = 0; } void HostStatus::increase_ping_performed_count() @@ -183,6 +233,14 @@ void HostStatus::increase_ping_failed_count() BOOST_ASSERT( ( 0 <= PingsFailedCount ) && ( PingsFailedCount <= PingsPerformedCount ) ); } +void HostStatus::increase_ping_congestion_count() +{ + ++PingCongestionCount; + + BOOST_ASSERT( ( 0 <= PingCongestionCount ) + && ( PingCongestionCount <= PingsPerformedCount ) ); +} + void HostStatus::analyze_ping_failed_count() { BOOST_ASSERT( ( 0 <= PingFailLimitPercentage ) && ( PingFailLimitPercentage <= 100 ) ); @@ -196,18 +254,41 @@ void HostStatus::analyze_ping_failed_count() { ExceededPingFailedLimit = true; - GlobalLogger.debug() << "Stat(" << HostAddress << "): " - << PingsFailedCount << " fail/" << PingsPerformedCount << " pings/" - << ResolvedIpCount << "*" << NParallelPingers << " IPs: " - << "exceed limit=" << ping_fail_limit_count; + GlobalLogger.debug() << log_prefix() << "exceed fail limit=" + << ping_fail_limit_count; } else { ExceededPingFailedLimit = false; - GlobalLogger.debug() << "Stat(" << HostAddress << "): " - << PingsFailedCount << " fail/" << PingsPerformedCount << " pings/" - << ResolvedIpCount << "*" << NParallelPingers << " IPs: " - << "below limit=" << ping_fail_limit_count; + GlobalLogger.debug() << log_prefix() << "below fail limit=" + << ping_fail_limit_count; + } +} + +void HostStatus::analyze_ping_congestion_count() +{ + BOOST_ASSERT( ( 0 <= PingCongestionLimitPercentage ) + && ( PingCongestionLimitPercentage <= 100 ) ); + BOOST_ASSERT( ( 0 <= PingCongestionCount ) + && ( PingCongestionCount <= PingsPerformedCount ) ); + + int ping_congestion_limit_count = ( ResolvedIpCount * NParallelPingers + * PingCongestionLimitPercentage ) / 100; + + // keep a boolean variable because the PingCongestionCount can be reseted + if ( PingCongestionCount > ping_congestion_limit_count ) + { + ExceededPingCongestionLimit = true; + + GlobalLogger.debug() << log_prefix() << "exceed congestion limit=" + << ping_congestion_limit_count; + } + else + { + ExceededPingCongestionLimit = false; + + GlobalLogger.debug() << log_prefix() << "below congestion limit=" + << ping_congestion_limit_count; } } diff --git a/src/host/hoststatus.h b/src/host/hoststatus.h index 3dba06c..1bb5a08 100644 --- a/src/host/hoststatus.h +++ b/src/host/hoststatus.h @@ -40,6 +40,8 @@ public: HostStatus( const std::string &host_address, const int ping_fail_limit_percentage, + const int ping_congestion_limit_percentage, + const int ping_duration_congestion_thresh, const int n_parallel_pings, const LinkStatusItem link_analyzer ); @@ -47,16 +49,24 @@ public: void set_resolved_ip_count( const int resolved_ip_count ); bool exceeded_ping_failed_limit() const; + bool exceeded_ping_congestion_limit() const; void update_ping_statistics( const PingStatus &ping_success, const long ping_duration_us ); private: + void update_fail_stats( const PingStatus &ping_success ); + void update_congestion_stats( const PingStatus &ping_success, + const long ping_duration_us ); bool tried_all_resolved_ip() const; void analyze_ping_statistics(); void reset_ping_counters(); void increase_ping_performed_count(); void increase_ping_failed_count(); + void increase_ping_congestion_count(); void analyze_ping_failed_count(); + void analyze_ping_congestion_count(); + + std::string log_prefix(); private: /// the DNS address of the host to analyze @@ -65,15 +75,24 @@ private: const LinkStatusItem LinkAnalyzer; /// the maximum amount of pings that can fail without warning int PingFailLimitPercentage; + /// the maximum amount of pings that can be congested without warning + int PingCongestionLimitPercentage; + /// the threshold in micro seconds that ping can take with/out congestion + long PingDurationCongestionsThresh; /// the amount of IPs that are aliases to the host DNS int ResolvedIpCount; /// the amount of pings sent until now int PingsPerformedCount; /// the amount of pings sent that failed until now int PingsFailedCount; + /// the amount of pings sent that indicate congestion until now + int PingCongestionCount; /// boolean flag that indicate if the last set of failed pings exceed the /// limit bool ExceededPingFailedLimit; + /// boolean flag that indicate if the last set of congested pings exceed the + /// limit + bool ExceededPingCongestionLimit; /// number of pingers that ping the same IP in parallel int NParallelPingers; diff --git a/src/host/pingscheduler.cpp b/src/host/pingscheduler.cpp index 81149f6..464373a 100644 --- a/src/host/pingscheduler.cpp +++ b/src/host/pingscheduler.cpp @@ -58,6 +58,10 @@ using I2n::Logger::GlobalLogger; * @param ping_protocol_list A list of protocols to use. * @param ping_interval_in_sec Amount of time between each ping. * @param ping_fail_percentage_limit Maximum amount of pings that can fail. + * @param ping_congestion_percentage_limit Amount of pings indication congested + * line + * @param ping_congestion_duration_thresh Duration in seconds that indicates a + * congested line * @param ping_reply_timeout Max amount time to wait for ping to finish * @param link_analyzer The object to monitor the link status. * @param first_delay Delay in seconds from start_pinging to first ping attempt @@ -71,6 +75,8 @@ PingScheduler::PingScheduler( const PingProtocolList &ping_protocol_list, const long ping_interval_in_sec, const int ping_fail_percentage_limit, + const int ping_congestion_percentage_limit, + const int ping_congestion_duration_thresh, const int ping_reply_timeout, LinkStatusItem link_analyzer, const int first_delay, @@ -89,7 +95,9 @@ PingScheduler::PingScheduler( TimeSentLastPing( microsec_clock::universal_time() ), PingReplyTimeout( ping_reply_timeout ), HostAnalyzer( destination_address, ping_fail_percentage_limit, - n_parallel_pings, link_analyzer ), + ping_congestion_percentage_limit, + ping_congestion_duration_thresh, n_parallel_pings, + link_analyzer ), Resolver(), Pingers(), NPingers( n_parallel_pings ), diff --git a/src/host/pingscheduler.h b/src/host/pingscheduler.h index 54392c3..adb4a97 100644 --- a/src/host/pingscheduler.h +++ b/src/host/pingscheduler.h @@ -58,6 +58,8 @@ public: const PingProtocolList &ping_protocol_list, const long ping_interval_in_sec, const int ping_fail_percentage_limit, + const int ping_congestion_percentage_limit, + const int ping_congestion_duration_thresh, const int ping_reply_timeout, LinkStatusItem link_analyzer, const int first_delay, diff --git a/src/icmp/icmppinger.cpp b/src/icmp/icmppinger.cpp index b14e1e2..c0415bf 100644 --- a/src/icmp/icmppinger.cpp +++ b/src/icmp/icmppinger.cpp @@ -274,9 +274,9 @@ void IcmpPinger::handle_timeout(const boost::system::error_code& error) } // Call ping-done handler - PingDoneCallback( PingerStatus, + PingDoneCallback( PingerStatus, static_cast( (microsec_clock::universal_time() - - TimeSent).total_microseconds() ); + - TimeSent).total_microseconds()) ); } diff --git a/src/main.cpp b/src/main.cpp index 22273bc..940f455 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -281,6 +281,8 @@ bool init_pingers( delays[ping_interval_in_sec] += delay_shifts[ping_interval_in_sec]; int n_parallel_pings = 10; int parallel_ping_delay = 100; // ms + int congestion_duration_thresh = 10; // seconds + int congestion_percentage_thresh = 75; PingSchedulerItem scheduler( new PingScheduler( @@ -291,6 +293,8 @@ bool init_pingers( protocol_list, ping_interval_in_sec, ping_fail_limit, + congestion_percentage_thresh, + congestion_duration_thresh, ping_reply_timeout, status_notifier, current_delay, diff --git a/src/tcp/tcppinger.cpp b/src/tcp/tcppinger.cpp index d583867..e7a6e95 100644 --- a/src/tcp/tcppinger.cpp +++ b/src/tcp/tcppinger.cpp @@ -252,8 +252,8 @@ void TcpPinger::handle_ping_done() } // Call ping-done handler - PingDoneCallback( PingerStatus, - (microsec_clock::universal_time() - TimeSent).total_microseconds() ); + PingDoneCallback( PingerStatus, static_cast( + (microsec_clock::universal_time() - TimeSent).total_microseconds()) ); } void TcpPinger::start_receive() diff --git a/test/test_hoststatus.cpp b/test/test_hoststatus.cpp index e6bfd5b..1eb5fc8 100644 --- a/test/test_hoststatus.cpp +++ b/test/test_hoststatus.cpp @@ -36,10 +36,17 @@ BOOST_AUTO_TEST_SUITE( TestHostStatus ) BOOST_AUTO_TEST_CASE( fail_percentage_10 ) { int ping_fail_percentage_limit = 10; + int ping_congestion_percentage_limit = 75; + int ping_congestion_duration_thresh = 5; + int n_parallel_pings = 1; int resolved_ip_count = 10; LinkStatusItem link_status( new LinkStatus ); - HostStatus host_status( "localhost", ping_fail_percentage_limit, 1, + HostStatus host_status( "localhost", + ping_fail_percentage_limit, + ping_congestion_percentage_limit, + ping_congestion_duration_thresh, + n_parallel_pings, link_status ); host_status.set_resolved_ip_count( resolved_ip_count ); @@ -77,10 +84,17 @@ BOOST_AUTO_TEST_CASE( fail_percentage_10 ) BOOST_AUTO_TEST_CASE( fail_percentage_50 ) { int ping_fail_percentage_limit = 50; + int ping_congestion_percentage_limit = 75; + int ping_congestion_duration_thresh = 5; + int n_parallel_pings = 1; int resolved_ip_count = 10; LinkStatusItem link_status( new LinkStatus ); - HostStatus host_status( "localhost", ping_fail_percentage_limit, 1, + HostStatus host_status( "localhost", + ping_fail_percentage_limit, + ping_congestion_percentage_limit, + ping_congestion_duration_thresh, + n_parallel_pings, link_status ); host_status.set_resolved_ip_count( resolved_ip_count ); @@ -118,10 +132,17 @@ BOOST_AUTO_TEST_CASE( fail_percentage_50 ) BOOST_AUTO_TEST_CASE( fail_percentage_80 ) { int ping_fail_percentage_limit = 80; + int ping_congestion_percentage_limit = 75; + int ping_congestion_duration_thresh = 5; + int n_parallel_pings = 1; int resolved_ip_count = 10; LinkStatusItem link_status( new LinkStatus ); - HostStatus host_status( "localhost", ping_fail_percentage_limit, 1, + HostStatus host_status( "localhost", + ping_fail_percentage_limit, + ping_congestion_percentage_limit, + ping_congestion_duration_thresh, + n_parallel_pings, link_status ); host_status.set_resolved_ip_count( resolved_ip_count );