tested new DNS with internal server, make more robust against caching; works nicely now
[pingcheck] / src / host / pingscheduler.cpp
index b38271a..15f10fa 100644 (file)
@@ -170,12 +170,12 @@ void PingScheduler::ping_when_ready()
 {
     if ( !WantToPing )
     {
-        GlobalLogger.info() << LogPrefix << "not pinging (not requested to)";
+        GlobalLogger.info() << LogPrefix << "waiting for ping request";
         return;
     }
     else if ( Resolver && Resolver->is_resolving() )
     {
-        GlobalLogger.info() << LogPrefix << "not pinging (DNS not finished)";
+        GlobalLogger.info() << LogPrefix << "waiting for DNS to finish";
         return;
     }
     else if ( !Resolver )
@@ -185,26 +185,44 @@ void PingScheduler::ping_when_ready()
     GlobalLogger.info() << LogPrefix << "start ping";
     WantToPing = false;
 
-    // try to get an up-to-date IP
+    // try to get an up-to-date IP (ContinueOnOutdatedIps may only be set
+    //   because a CNAME was out of date -- IPs may still be current)
     HostAddress ip = Resolver->get_next_ip();
 
-    if ( !ip.is_valid() && ContinueOnOutdatedIps)
-    {   // we failed to resolve --> try to use outdated IP
+    if ( !ip.is_valid() )
+    {   // this can happen in 2 cases: if ContinueOnOutdatedIps==true
+        // or when ip went out of date between resolve and now
+        // --> try to use outdated IP
         GlobalLogger.info() << LogPrefix << "Checking for outdated IPs";
         bool check_up_to_date = false;
         ip = Resolver->get_next_ip(check_up_to_date);
     }
-    if ( ip.is_valid() )
+    if ( !ip.is_valid() )
+    {   // still no valid IP --> should not happen
+        GlobalLogger.error() << LogPrefix << "Not even outdated IP to ping "
+             << "-- this should not have happened!! Treat like a failed ping.";
+        if (DnsMaster::get_instance()->get_resolved_ip_ttl_threshold() > 0)
+            GlobalLogger.warning() << LogPrefix << "This probably happened "
+                << "because you specified a TTL threshold > 0 but resolving"
+                << " had no effect on TTLs since external cache is only "
+                << "updated when TTL=0 is reached.";
+
+        // skip the ping and directly call ping_done_handler
+        ping_done_handler(false);
+    }
+    else
+    {
+        uint32_t ttl = ip.get_ttl().get_updated_value();
+        boost::posix_time::ptime now =
+                                  boost::posix_time::second_clock::local_time();
+        std::string expiry =
+                        boost::posix_time::to_simple_string(now + seconds(ttl));
+
+        GlobalLogger.info() << LogPrefix << "pinging IP " << ip.get_ip()
+            << " with TTL " << ttl << "s (until " << expiry << ")";
         Ping->ping( ip.get_ip(),
                     DestinationPort,
                     boost::bind(&PingScheduler::ping_done_handler, this, _1) );
-    else
-    {   // should not happen
-        GlobalLogger.error() << LogPrefix << "No IP to ping "
-                             << "-- this should not have happened!!";
-        WantToPing = true;
-        if ( !Resolver->is_resolving() )
-            start_resolving_ping_address();
     }
 }
 
@@ -295,6 +313,8 @@ void PingScheduler::update_ping_protocol()
 
 void PingScheduler::get_next_ping_protocol()
 {
+    GlobalLogger.debug() << LogPrefix
+        << "------------------------------------------------------------------";
     ++ProtocolIter;
     if (ProtocolIter == Protocols.end())
         ProtocolIter = Protocols.begin();
@@ -351,9 +371,16 @@ void PingScheduler::update_dns_resolver( PingProtocol current_protocol )
     // requested protocol ( ICMP/TCP is ignored, only IPv4/v6 is important)
     Resolver = DnsMaster::get_instance()->get_resolver_for(DestinationAddress,
                                                            current_protocol);
-    // start resolving if no ips available
-    if ( Resolver->have_up_to_date_ip() )
+
+    // get number of up-to-date IPs
+    bool check_up_to_date = true;
+    int ip_count = Resolver->get_resolved_ip_count(check_up_to_date);
+    if (ip_count > 0)
     {
+        GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
+            << ip_count << " where IPs may be outdated: " << false;
+        HostAnalyzer.set_resolved_ip_count( ip_count );
+
         if (Resolver->is_resolving())
             GlobalLogger.warning() << LogPrefix << "have up to date IPs but "
                 << "resolver seems to be resolving all the same... "
@@ -362,6 +389,7 @@ void PingScheduler::update_dns_resolver( PingProtocol current_protocol )
     }
     else
         start_resolving_ping_address();
+        // set resolved_ip_count in resolve callback
 }
 
 void PingScheduler::start_resolving_ping_address()
@@ -379,7 +407,27 @@ void PingScheduler::dns_resolve_callback(const bool was_success,
 
     if ( was_success )
     {
-        HostAnalyzer.set_resolved_ip_count( Resolver->get_resolved_ip_count());
+        // trust that a successfull DNS resolve means we have an IP with TTL>0
+        int ip_count = Resolver->get_resolved_ip_count(!ContinueOnOutdatedIps);
+        if (ip_count == 0)
+        {   // this will create trouble in HostAnalyzer
+            GlobalLogger.warning() << LogPrefix
+                << "Should not have reached this case: resolve was "
+                << "successfull but still have no IPs (up-to-date="
+                << !ContinueOnOutdatedIps << ")!";
+            if (DnsMaster::get_instance()->get_resolved_ip_ttl_threshold() > 0)
+                GlobalLogger.warning() << LogPrefix << "This probably happened "
+                    << "because you specified a TTL threshold > 0 but resolving"
+                    << " had no effect on TTLs since external cache is only "
+                    << "updated when TTL=0 is reached.";
+        }
+        else
+        {
+            GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
+                << ip_count << " where IPs may be outdated: "
+                << ContinueOnOutdatedIps << " --> could ping now";
+            HostAnalyzer.set_resolved_ip_count( ip_count );
+        }
         ping_when_ready();
     }
     else
@@ -394,7 +442,12 @@ void PingScheduler::dns_resolve_callback(const bool was_success,
         {   // continue with IP
             GlobalLogger.notice() << LogPrefix << "DNS failed, "
                 << "try anyway with cached data";
-            HostAnalyzer.set_resolved_ip_count(0);
+
+            int ip_count = Resolver->get_resolved_ip_count(false);
+            GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
+                << ip_count << " where IPs may be outdated: " << true;
+            HostAnalyzer.set_resolved_ip_count( ip_count );
+
             ping_when_ready();
         }
         else