added variable for threshold for switching from "all congested" --> "connection failed"
authorChristian Herdtweck <christian.herdtweck@intra2net.com>
Thu, 28 May 2015 12:40:03 +0000 (14:40 +0200)
committerChristian Herdtweck <christian.herdtweck@intra2net.com>
Thu, 28 May 2015 12:40:03 +0000 (14:40 +0200)
src/host/hoststatus.cpp
src/host/hoststatus.h
src/host/pingscheduler.cpp
src/host/pingscheduler.h
src/main.cpp
test/test_hoststatus.cpp

index 401051c..e9410c0 100644 (file)
@@ -46,6 +46,7 @@ HostStatus::HostStatus(
         const string &host_address,
         const int ping_fail_limit_percentage,
         const int ping_congestion_limit_percentage,
+        const int congest_caused_by_fail_limit_percentage,
         const int ping_duration_congestion_thresh,
         const int n_parallel_pings,
         const LinkStatusItem link_analyzer
@@ -54,6 +55,7 @@ HostStatus::HostStatus(
     LinkAnalyzer( link_analyzer ),
     PingFailLimitPercentage( ping_fail_limit_percentage ),
     PingCongestionLimitPercentage( ping_congestion_limit_percentage ),
+    CongestCausedByFailLimitPercentage(congest_caused_by_fail_limit_percentage),
     PingDurationCongestionsThresh( ping_duration_congestion_thresh*1000000 ),
     ResolvedIpCount( 0 ),
     PingsPerformedCount( 0 ),
@@ -224,12 +226,16 @@ void HostStatus::analyze_ping_statistics()
     BOOST_ASSERT( PingsPerformedCount >= ResolvedIpCount*NParallelPingers );
 
     // timeouts are not counted towards failures, only count as congestions
-    // However, if all pings timed out even in burst mode, then we still declare
-    // the line down
-    if (InBurstMode && PingCongestionCount >= PingsPerformedCount)
+    // However, if many pings timed out even in burst mode, then we still
+    // declare the line down
+    float limit = static_cast<float>( PingsPerformedCount
+                                    * CongestCausedByFailLimitPercentage)/100.f;
+    if (InBurstMode && PingCongestionCount > limit)
     {
-        GlobalLogger.notice() << log_prefix() << "All pings timed out despite "
-            << "using more pings per IP --> assume connection is really down";
+        GlobalLogger.info() << log_prefix()
+            << "Assume congestion is actually caused by compromised connection "
+            << "to host because " << PingCongestionCount << " of "
+            << PingsPerformedCount << " burst pings timed out";
         PingsFailedCount += PingCongestionCount;
         PingCongestionCount = 0;
         ExceededPingFailedLimit = true;
index 5aa9911..d83134f 100644 (file)
@@ -41,6 +41,7 @@ public:
             const std::string &host_address,
             const int ping_fail_limit_percentage,
             const int ping_congestion_limit_percentage,
+            const int congest_caused_by_fail_limit_percentage,
             const int ping_duration_congestion_thresh,
             const int n_parallel_pings,
             const LinkStatusItem link_analyzer
@@ -79,6 +80,8 @@ private:
     int PingFailLimitPercentage;
     /// the maximum amount of pings that can be congested without warning
     int PingCongestionLimitPercentage;
+    /// threshold to decide when congestion is caused by failed connection
+    int CongestCausedByFailLimitPercentage;
     /// the threshold in micro seconds that ping can take with/out congestion
     long PingDurationCongestionsThresh;
     /// the amount of IPs that are aliases to the host DNS
index b272379..e64725a 100644 (file)
@@ -77,6 +77,7 @@ PingScheduler::PingScheduler(
         const long ping_interval_in_sec,
         const int ping_fail_percentage_limit,
         const int ping_congestion_percentage_limit,
+        const int congest_caused_by_fail_percentage_limit,
         const int ping_congestion_duration_thresh,
         const int ping_reply_timeout,
         LinkStatusItem link_analyzer,
@@ -100,6 +101,7 @@ PingScheduler::PingScheduler(
     PingReplyTimeoutOrig( ping_reply_timeout ),
     HostAnalyzer( destination_address, ping_fail_percentage_limit,
                   ping_congestion_percentage_limit,
+                  congest_caused_by_fail_percentage_limit,
                   ping_congestion_duration_thresh, n_parallel_pings,
                   link_analyzer ),
     Resolver(),
index 91da6e6..c5a9f24 100644 (file)
@@ -60,6 +60,7 @@ public:
             const long ping_interval_in_sec,
             const int ping_fail_percentage_limit,
             const int ping_congestion_percentage_limit,
+            const int congest_caused_by_fail_percentage_limit,
             const int ping_congestion_duration_thresh,
             const int ping_reply_timeout,
             LinkStatusItem link_analyzer,
index 5ad6319..5e96a72 100644 (file)
@@ -265,6 +265,14 @@ bool init_pingers(
     if (hosts.empty())
         return false;
 
+    // more variables for pingcheck, maybe should move to config?
+    int n_parallel_pings = 1;
+    int parallel_ping_delay = 100;   // ms
+    int congestion_duration_thresh = 10; // seconds
+    int congestion_percentage_thresh = 75;
+    int congest_caused_by_fail_limit_percentage = 99;
+    int ping_timeout_factor = 5;
+
     BOOST_FOREACH( const HostItem &host, hosts )
     {
         string destination_address = host->get_address();
@@ -279,11 +287,6 @@ bool init_pingers(
         // get delay for this scheduler and update assigned delays
         int current_delay = boost::math::iround(delays[ping_interval_in_sec]);
         delays[ping_interval_in_sec] += delay_shifts[ping_interval_in_sec];
-        int n_parallel_pings = 2;
-        int parallel_ping_delay = 100;   // ms
-        int congestion_duration_thresh = 10; // seconds
-        int congestion_percentage_thresh = 75;
-        int ping_timeout_factor = 3;
 
         PingSchedulerItem scheduler(
                 new PingScheduler(
@@ -295,6 +298,7 @@ bool init_pingers(
                         ping_interval_in_sec,
                         ping_fail_limit,
                         congestion_percentage_thresh,
+                        congest_caused_by_fail_limit_percentage,
                         congestion_duration_thresh,
                         ping_reply_timeout,
                         status_notifier,
index 1eb5fc8..1b46612 100644 (file)
@@ -37,6 +37,7 @@ BOOST_AUTO_TEST_CASE( fail_percentage_10 )
 {
     int ping_fail_percentage_limit = 10;
     int ping_congestion_percentage_limit = 75;
+    int congest_caused_by_fail_percentage_limit = 90;
     int ping_congestion_duration_thresh = 5;
     int n_parallel_pings = 1;
     int resolved_ip_count = 10;
@@ -45,6 +46,7 @@ BOOST_AUTO_TEST_CASE( fail_percentage_10 )
     HostStatus host_status( "localhost",
                             ping_fail_percentage_limit,
                             ping_congestion_percentage_limit,
+                            congest_caused_by_fail_percentage_limit,
                             ping_congestion_duration_thresh,
                             n_parallel_pings,
                             link_status );
@@ -85,6 +87,7 @@ BOOST_AUTO_TEST_CASE( fail_percentage_50 )
 {
     int ping_fail_percentage_limit = 50;
     int ping_congestion_percentage_limit = 75;
+    int congest_caused_by_fail_percentage_limit = 90;
     int ping_congestion_duration_thresh = 5;
     int n_parallel_pings = 1;
     int resolved_ip_count = 10;
@@ -93,6 +96,7 @@ BOOST_AUTO_TEST_CASE( fail_percentage_50 )
     HostStatus host_status( "localhost",
                             ping_fail_percentage_limit,
                             ping_congestion_percentage_limit,
+                            congest_caused_by_fail_percentage_limit,
                             ping_congestion_duration_thresh,
                             n_parallel_pings,
                             link_status );
@@ -133,6 +137,7 @@ BOOST_AUTO_TEST_CASE( fail_percentage_80 )
 {
     int ping_fail_percentage_limit = 80;
     int ping_congestion_percentage_limit = 75;
+    int congest_caused_by_fail_percentage_limit = 90;
     int ping_congestion_duration_thresh = 5;
     int n_parallel_pings = 1;
     int resolved_ip_count = 10;
@@ -141,6 +146,7 @@ BOOST_AUTO_TEST_CASE( fail_percentage_80 )
     HostStatus host_status( "localhost",
                             ping_fail_percentage_limit,
                             ping_congestion_percentage_limit,
+                            congest_caused_by_fail_percentage_limit,
                             ping_congestion_duration_thresh,
                             n_parallel_pings,
                             link_status );