From 941b5e251bba4436cf0e717cad6f427869b623cc Mon Sep 17 00:00:00 2001 From: Christian Herdtweck Date: Thu, 28 May 2015 14:40:03 +0200 Subject: [PATCH] added variable for threshold for switching from "all congested" --> "connection failed" --- src/host/hoststatus.cpp | 16 +++++++++++----- src/host/hoststatus.h | 3 +++ src/host/pingscheduler.cpp | 2 ++ src/host/pingscheduler.h | 1 + src/main.cpp | 14 +++++++++----- test/test_hoststatus.cpp | 6 ++++++ 6 files changed, 32 insertions(+), 10 deletions(-) diff --git a/src/host/hoststatus.cpp b/src/host/hoststatus.cpp index 401051c..e9410c0 100644 --- a/src/host/hoststatus.cpp +++ b/src/host/hoststatus.cpp @@ -46,6 +46,7 @@ HostStatus::HostStatus( const string &host_address, const int ping_fail_limit_percentage, const int ping_congestion_limit_percentage, + const int congest_caused_by_fail_limit_percentage, const int ping_duration_congestion_thresh, const int n_parallel_pings, const LinkStatusItem link_analyzer @@ -54,6 +55,7 @@ HostStatus::HostStatus( LinkAnalyzer( link_analyzer ), PingFailLimitPercentage( ping_fail_limit_percentage ), PingCongestionLimitPercentage( ping_congestion_limit_percentage ), + CongestCausedByFailLimitPercentage(congest_caused_by_fail_limit_percentage), PingDurationCongestionsThresh( ping_duration_congestion_thresh*1000000 ), ResolvedIpCount( 0 ), PingsPerformedCount( 0 ), @@ -224,12 +226,16 @@ void HostStatus::analyze_ping_statistics() BOOST_ASSERT( PingsPerformedCount >= ResolvedIpCount*NParallelPingers ); // timeouts are not counted towards failures, only count as congestions - // However, if all pings timed out even in burst mode, then we still declare - // the line down - if (InBurstMode && PingCongestionCount >= PingsPerformedCount) + // However, if many pings timed out even in burst mode, then we still + // declare the line down + float limit = static_cast( PingsPerformedCount + * CongestCausedByFailLimitPercentage)/100.f; + if (InBurstMode && PingCongestionCount > limit) { - GlobalLogger.notice() << log_prefix() << "All pings timed out despite " - << "using more pings per IP --> assume connection is really down"; + GlobalLogger.info() << log_prefix() + << "Assume congestion is actually caused by compromised connection " + << "to host because " << PingCongestionCount << " of " + << PingsPerformedCount << " burst pings timed out"; PingsFailedCount += PingCongestionCount; PingCongestionCount = 0; ExceededPingFailedLimit = true; diff --git a/src/host/hoststatus.h b/src/host/hoststatus.h index 5aa9911..d83134f 100644 --- a/src/host/hoststatus.h +++ b/src/host/hoststatus.h @@ -41,6 +41,7 @@ public: const std::string &host_address, const int ping_fail_limit_percentage, const int ping_congestion_limit_percentage, + const int congest_caused_by_fail_limit_percentage, const int ping_duration_congestion_thresh, const int n_parallel_pings, const LinkStatusItem link_analyzer @@ -79,6 +80,8 @@ private: int PingFailLimitPercentage; /// the maximum amount of pings that can be congested without warning int PingCongestionLimitPercentage; + /// threshold to decide when congestion is caused by failed connection + int CongestCausedByFailLimitPercentage; /// the threshold in micro seconds that ping can take with/out congestion long PingDurationCongestionsThresh; /// the amount of IPs that are aliases to the host DNS diff --git a/src/host/pingscheduler.cpp b/src/host/pingscheduler.cpp index b272379..e64725a 100644 --- a/src/host/pingscheduler.cpp +++ b/src/host/pingscheduler.cpp @@ -77,6 +77,7 @@ PingScheduler::PingScheduler( const long ping_interval_in_sec, const int ping_fail_percentage_limit, const int ping_congestion_percentage_limit, + const int congest_caused_by_fail_percentage_limit, const int ping_congestion_duration_thresh, const int ping_reply_timeout, LinkStatusItem link_analyzer, @@ -100,6 +101,7 @@ PingScheduler::PingScheduler( PingReplyTimeoutOrig( ping_reply_timeout ), HostAnalyzer( destination_address, ping_fail_percentage_limit, ping_congestion_percentage_limit, + congest_caused_by_fail_percentage_limit, ping_congestion_duration_thresh, n_parallel_pings, link_analyzer ), Resolver(), diff --git a/src/host/pingscheduler.h b/src/host/pingscheduler.h index 91da6e6..c5a9f24 100644 --- a/src/host/pingscheduler.h +++ b/src/host/pingscheduler.h @@ -60,6 +60,7 @@ public: const long ping_interval_in_sec, const int ping_fail_percentage_limit, const int ping_congestion_percentage_limit, + const int congest_caused_by_fail_percentage_limit, const int ping_congestion_duration_thresh, const int ping_reply_timeout, LinkStatusItem link_analyzer, diff --git a/src/main.cpp b/src/main.cpp index 5ad6319..5e96a72 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -265,6 +265,14 @@ bool init_pingers( if (hosts.empty()) return false; + // more variables for pingcheck, maybe should move to config? + int n_parallel_pings = 1; + int parallel_ping_delay = 100; // ms + int congestion_duration_thresh = 10; // seconds + int congestion_percentage_thresh = 75; + int congest_caused_by_fail_limit_percentage = 99; + int ping_timeout_factor = 5; + BOOST_FOREACH( const HostItem &host, hosts ) { string destination_address = host->get_address(); @@ -279,11 +287,6 @@ bool init_pingers( // get delay for this scheduler and update assigned delays int current_delay = boost::math::iround(delays[ping_interval_in_sec]); delays[ping_interval_in_sec] += delay_shifts[ping_interval_in_sec]; - int n_parallel_pings = 2; - int parallel_ping_delay = 100; // ms - int congestion_duration_thresh = 10; // seconds - int congestion_percentage_thresh = 75; - int ping_timeout_factor = 3; PingSchedulerItem scheduler( new PingScheduler( @@ -295,6 +298,7 @@ bool init_pingers( ping_interval_in_sec, ping_fail_limit, congestion_percentage_thresh, + congest_caused_by_fail_limit_percentage, congestion_duration_thresh, ping_reply_timeout, status_notifier, diff --git a/test/test_hoststatus.cpp b/test/test_hoststatus.cpp index 1eb5fc8..1b46612 100644 --- a/test/test_hoststatus.cpp +++ b/test/test_hoststatus.cpp @@ -37,6 +37,7 @@ BOOST_AUTO_TEST_CASE( fail_percentage_10 ) { int ping_fail_percentage_limit = 10; int ping_congestion_percentage_limit = 75; + int congest_caused_by_fail_percentage_limit = 90; int ping_congestion_duration_thresh = 5; int n_parallel_pings = 1; int resolved_ip_count = 10; @@ -45,6 +46,7 @@ BOOST_AUTO_TEST_CASE( fail_percentage_10 ) HostStatus host_status( "localhost", ping_fail_percentage_limit, ping_congestion_percentage_limit, + congest_caused_by_fail_percentage_limit, ping_congestion_duration_thresh, n_parallel_pings, link_status ); @@ -85,6 +87,7 @@ BOOST_AUTO_TEST_CASE( fail_percentage_50 ) { int ping_fail_percentage_limit = 50; int ping_congestion_percentage_limit = 75; + int congest_caused_by_fail_percentage_limit = 90; int ping_congestion_duration_thresh = 5; int n_parallel_pings = 1; int resolved_ip_count = 10; @@ -93,6 +96,7 @@ BOOST_AUTO_TEST_CASE( fail_percentage_50 ) HostStatus host_status( "localhost", ping_fail_percentage_limit, ping_congestion_percentage_limit, + congest_caused_by_fail_percentage_limit, ping_congestion_duration_thresh, n_parallel_pings, link_status ); @@ -133,6 +137,7 @@ BOOST_AUTO_TEST_CASE( fail_percentage_80 ) { int ping_fail_percentage_limit = 80; int ping_congestion_percentage_limit = 75; + int congest_caused_by_fail_percentage_limit = 90; int ping_congestion_duration_thresh = 5; int n_parallel_pings = 1; int resolved_ip_count = 10; @@ -141,6 +146,7 @@ BOOST_AUTO_TEST_CASE( fail_percentage_80 ) HostStatus host_status( "localhost", ping_fail_percentage_limit, ping_congestion_percentage_limit, + congest_caused_by_fail_percentage_limit, ping_congestion_duration_thresh, n_parallel_pings, link_status ); -- 1.7.1