}
void Configuration::set_resolved_ip_ttl_threshold( const int resolved_ip_ttl_threshold )
{
- BOOST_ASSERT(resolved_ip_ttl_threshold > 0 );
+ BOOST_ASSERT(resolved_ip_ttl_threshold >= 0 );
ResolvedIpTtlThreshold = resolved_ip_ttl_threshold;
}
<< " --> " << result.size() << "-list";
BOOST_FOREACH( const HostAddress &addr, result )
GlobalLogger.debug() << "DnsCache: " << addr.get_ip().to_string()
- << " (TTL " << addr.get_ttl().get_value() << "s)";
+ << " (TTL " << addr.get_ttl().get_updated_value()
+ << "s)";
return result;
}
std::string key = key_for_hostname(hostname);
Cname result_obj = CnameCache[key];
GlobalLogger.debug() << "DnsCache: request CNAME for " << key
- << " --> \"" << result_obj.Host << "\"";
+ << " --> \"" << result_obj.Host << "\" (TTL "
+ << result_obj.Ttl.get_updated_value() << "s)";
if (result_obj.Host.empty())
return result_obj;
}
else
{
- min_cname_ttl = min(min_cname_ttl, current_cname.Ttl.get_value());
+ min_cname_ttl = min(min_cname_ttl,
+ current_cname.Ttl.get_updated_value());
result = get_ips(current_host, check_up_to_date);
}
}
BOOST_FOREACH( HostAddress &addr, result )
{
- if (addr.get_ttl().get_value() > min_cname_ttl)
+ if (addr.get_ttl().get_updated_value() > min_cname_ttl)
{
GlobalLogger.debug() << "DnsCache: using shorter CNAME TTL";
addr.set_ttl(cname_ttl);
using I2n::Logger::GlobalLogger;
using boost::posix_time::seconds;
-using boost::posix_time::minutes;
namespace Config
{
- const int ResolveTimeoutSeconds = 3;
- const int PauseBeforeRetrySeconds = 10;
- const int StaleDataLongtermMinutes = 15;
+ const int ResolveTimeoutSeconds = 2;
+ const int PauseBeforeRetrySeconds = 1;
+ const int StaleDataLongtermSeconds = 5*60;
const int DNS_PORT = 53;
}
, RandomIdGenerator()
, RequestId( 0 )
, OperationCancelled( false )
- , LongermTimerIsActive( false )
+ , LongtermTimerIsActive( false )
{
std::stringstream temp;
temp << "Dns(" << ResolverBase::Hostname << "): ";
ResolveTimeoutTimer.cancel();
PauseBeforeRetryTimer.cancel();
StaleDataLongtermTimer.cancel();
- LongermTimerIsActive = false;
+ LongtermTimerIsActive = false;
// create DNS request
boost::net::dns::message dns_message( ResolverBase::Hostname, Protocol );
if (error)
{
GlobalLogger.info() << LogPrefix << "DNS resolve resulted in error "
- << error << " --> try again after a little while";
+ << error << " --> request retry";
schedule_retry();
return;
}
else
{ // no answers --> cannot proceed
GlobalLogger.warning() << LogPrefix << "No IP nor CNAME received! "
- << "--> schedule retry";
+ << "--> request retry";
schedule_retry();
}
}
{
// schedule new attempt in quite a while
StaleDataLongtermTimer.expires_from_now(
- minutes(Config::StaleDataLongtermMinutes));
+ seconds(Config::StaleDataLongtermSeconds));
StaleDataLongtermTimer.async_wait(
boost::bind( &DnsResolver::wait_timer_timeout_handler,
this, boost::asio::placeholders::error
)
);
- LongermTimerIsActive = true;
+ LongtermTimerIsActive = true;
// for now, admit failure
bool was_success = false;
if (was_success)
{
StaleDataLongtermTimer.cancel();
- LongermTimerIsActive = false;
+ LongtermTimerIsActive = false;
}
// clean up
*/
bool DnsResolver::is_waiting_to_resolve() const
{
- return IsResolving || LongermTimerIsActive;
+ return IsResolving || LongtermTimerIsActive;
}
*/
void DnsResolver::cancel_resolve()
{
- if ( !IsResolving )
+ if ( !IsResolving && !LongtermTimerIsActive)
{
- GlobalLogger.info() << LogPrefix
- << "Cancel called on non-resolving resolver -- ignore";
+ GlobalLogger.info() << LogPrefix << "Cancel called on non-resolving, "
+ << "non-waiting resolver -- ignore";
return;
}
else if (OperationCancelled)
<< "Cancel called on cancelled resolver -- ignore";
return;
}
+ GlobalLogger.info() << LogPrefix << "Cancel resolver";
// set before finalize_resolve so can check in finalize_resolve that ID is
// always 0; ID is not used any more since handle_dns_result stops if
// OperationCancelled is true
RequestId = 0;
- bool was_success = false;
- int cname_count = 1;
- finalize_resolve(was_success, cname_count);
+ if ( IsResolving )
+ {
+ bool was_success = false;
+ int cname_count = 1;
+ finalize_resolve(was_success, cname_count);
+ }
// also cancel the long-term timer
StaleDataLongtermTimer.cancel();
+ LongtermTimerIsActive = false;
// set after finalize_resolve, so can check in finalize_resolve that
// OperationCancelled is never true
{
if ( error == boost::asio::error::operation_aborted ) // cancelled
{
- GlobalLogger.info() << LogPrefix
- << "Resolve timeout timer was cancelled!";
+ GlobalLogger.debug() << LogPrefix
+ << "Resolve timeout timer was cancelled!";
return;
}
else if (error)
{
GlobalLogger.warning() << LogPrefix
<< "resolve timeout handler received error "
- << error << " --> retry";
+ << error << " --> request retry";
schedule_retry();
}
else if ( OperationCancelled )
if ( error == boost::asio::error::operation_aborted ) // cancelled
{ // assume that our code cancelled this timer, so callbacks will be
// taken care of!
- GlobalLogger.info() << LogPrefix
- << "Resolve wait timer was cancelled! ";
+ GlobalLogger.debug() << LogPrefix
+ << "Resolve wait timer was cancelled! ";
}
else if (error)
{ // not sure what to do here, but callers waiting forever for a callback
else
{
GlobalLogger.info() << LogPrefix << "Done waiting --> re-try resolve";
+ IsResolving = false; // will be set to true immediately in do_resolve
do_resolve();
}
}
{
// get cached data
// (do not use arg check_up_to_date here in order to give NextIpIndex
- // a chance to stay above number of outdate IPs)
+ // a chance to stay above number of outdated IPs)
HostAddressVec cached_data = ResolverBase::get_cached_ips_recursively();
// if no results cached, return default-constructed HostAddress (0.0.0.0)
return return_candidate;
}
- int n_iter = 0;
+ std::size_t n_iter = 0;
std::size_t n_ips = cached_data.size();
uint32_t ttl_thresh = static_cast<uint32_t>( DnsMaster::get_instance()
->get_resolved_ip_ttl_threshold() );
}
else
{ // there are candidates left to consider
+ GlobalLogger.debug() << LogPrefix << "Check IP candidate at index "
+ << NextIpIndex;
return_candidate = cached_data[NextIpIndex++];
if (!check_up_to_date)
+ {
+ GlobalLogger.debug() << LogPrefix << "not checking ttl, accept";
return return_candidate;
- else if (cached_data[NextIpIndex].get_ttl().get_updated_value()
+ }
+ else if (return_candidate.get_ttl().get_updated_value()
> ttl_thresh)
+ {
+ GlobalLogger.debug() << LogPrefix << "is up to date, accept";
return return_candidate;
+ }
else
+ {
+ GlobalLogger.debug() << LogPrefix << "is out of date ("
+ << return_candidate.get_ttl().get_updated_value()
+ << "s <= " << ttl_thresh << "s), continue";
++n_iter;
+ }
}
}
}
bool DnsResolver::have_up_to_date_ip()
{
- return get_resolved_ip_count() > 0;
+ return get_resolved_ip_count(true) > 0;
}
-int DnsResolver::get_resolved_ip_count()
+int DnsResolver::get_resolved_ip_count(const bool check_up_to_date)
{
- // run with empty hostname (--> uses internal var Hostname)
- // and check_up_to_date = true
- return ResolverBase::get_cached_ips_recursively("", true).size();
+ // run with empty hostname --> uses internal var Hostname
+ return ResolverBase::get_cached_ips_recursively("",check_up_to_date).size();
}
public:
HostAddress get_next_ip(const bool check_up_to_date=true);
bool have_up_to_date_ip();
- int get_resolved_ip_count();
+ int get_resolved_ip_count(const bool check_up_to_date=true);
void cancel_resolve();
bool is_resolving() const;
bool is_waiting_to_resolve() const;
boost::uuids::random_generator RandomIdGenerator;
uint16_t RequestId;
bool OperationCancelled;
- bool LongermTimerIsActive;
+ bool LongtermTimerIsActive;
};
#endif
HostAddress get_next_ip(const bool check_up_to_date=true)
{ return IpAddress; }
bool have_up_to_date_ip() { return true; }
- int get_resolved_ip_count() { return 1; }
+ int get_resolved_ip_count(const bool check_up_to_date=true)
+ { return 1; }
bool is_resolving() const { return false; }
bool is_waiting_to_resolve() const { return false; }
void cancel_resolve() {}
virtual HostAddress get_next_ip(const bool check_up_to_date=true) = 0;
virtual bool have_up_to_date_ip() = 0;
- virtual int get_resolved_ip_count() = 0;
+ virtual int get_resolved_ip_count(const bool check_up_to_date=true) = 0;
std::string get_hostname() const;
{
if ( !WantToPing )
{
- GlobalLogger.info() << LogPrefix << "not pinging (not requested to)";
+ GlobalLogger.info() << LogPrefix << "waiting for ping request";
return;
}
else if ( Resolver && Resolver->is_resolving() )
{
- GlobalLogger.info() << LogPrefix << "not pinging (DNS not finished)";
+ GlobalLogger.info() << LogPrefix << "waiting for DNS to finish";
return;
}
else if ( !Resolver )
GlobalLogger.info() << LogPrefix << "start ping";
WantToPing = false;
- // try to get an up-to-date IP
+ // try to get an up-to-date IP (ContinueOnOutdatedIps may only be set
+ // because a CNAME was out of date -- IPs may still be current)
HostAddress ip = Resolver->get_next_ip();
- if ( !ip.is_valid() && ContinueOnOutdatedIps)
- { // we failed to resolve --> try to use outdated IP
+ if ( !ip.is_valid() )
+ { // this can happen in 2 cases: if ContinueOnOutdatedIps==true
+ // or when ip went out of date between resolve and now
+ // --> try to use outdated IP
GlobalLogger.info() << LogPrefix << "Checking for outdated IPs";
bool check_up_to_date = false;
ip = Resolver->get_next_ip(check_up_to_date);
}
- if ( ip.is_valid() )
+ if ( !ip.is_valid() )
+ { // still no valid IP --> should not happen
+ GlobalLogger.error() << LogPrefix << "Not even outdated IP to ping "
+ << "-- this should not have happened!! Treat like a failed ping.";
+ if (DnsMaster::get_instance()->get_resolved_ip_ttl_threshold() > 0)
+ GlobalLogger.warning() << LogPrefix << "This probably happened "
+ << "because you specified a TTL threshold > 0 but resolving"
+ << " had no effect on TTLs since external cache is only "
+ << "updated when TTL=0 is reached.";
+
+ // skip the ping and directly call ping_done_handler
+ ping_done_handler(false);
+ }
+ else
+ {
+ uint32_t ttl = ip.get_ttl().get_updated_value();
+ boost::posix_time::ptime now =
+ boost::posix_time::second_clock::local_time();
+ std::string expiry =
+ boost::posix_time::to_simple_string(now + seconds(ttl));
+
+ GlobalLogger.info() << LogPrefix << "pinging IP " << ip.get_ip()
+ << " with TTL " << ttl << "s (until " << expiry << ")";
Ping->ping( ip.get_ip(),
DestinationPort,
boost::bind(&PingScheduler::ping_done_handler, this, _1) );
- else
- { // should not happen
- GlobalLogger.error() << LogPrefix << "No IP to ping "
- << "-- this should not have happened!!";
- WantToPing = true;
- if ( !Resolver->is_resolving() )
- start_resolving_ping_address();
}
}
void PingScheduler::get_next_ping_protocol()
{
+ GlobalLogger.debug() << LogPrefix
+ << "------------------------------------------------------------------";
++ProtocolIter;
if (ProtocolIter == Protocols.end())
ProtocolIter = Protocols.begin();
// requested protocol ( ICMP/TCP is ignored, only IPv4/v6 is important)
Resolver = DnsMaster::get_instance()->get_resolver_for(DestinationAddress,
current_protocol);
- // start resolving if no ips available
- if ( Resolver->have_up_to_date_ip() )
+
+ // get number of up-to-date IPs
+ bool check_up_to_date = true;
+ int ip_count = Resolver->get_resolved_ip_count(check_up_to_date);
+ if (ip_count > 0)
{
+ GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
+ << ip_count << " where IPs may be outdated: " << false;
+ HostAnalyzer.set_resolved_ip_count( ip_count );
+
if (Resolver->is_resolving())
GlobalLogger.warning() << LogPrefix << "have up to date IPs but "
<< "resolver seems to be resolving all the same... "
}
else
start_resolving_ping_address();
+ // set resolved_ip_count in resolve callback
}
void PingScheduler::start_resolving_ping_address()
if ( was_success )
{
- HostAnalyzer.set_resolved_ip_count( Resolver->get_resolved_ip_count());
+ // trust that a successfull DNS resolve means we have an IP with TTL>0
+ int ip_count = Resolver->get_resolved_ip_count(!ContinueOnOutdatedIps);
+ if (ip_count == 0)
+ { // this will create trouble in HostAnalyzer
+ GlobalLogger.warning() << LogPrefix
+ << "Should not have reached this case: resolve was "
+ << "successfull but still have no IPs (up-to-date="
+ << !ContinueOnOutdatedIps << ")!";
+ if (DnsMaster::get_instance()->get_resolved_ip_ttl_threshold() > 0)
+ GlobalLogger.warning() << LogPrefix << "This probably happened "
+ << "because you specified a TTL threshold > 0 but resolving"
+ << " had no effect on TTLs since external cache is only "
+ << "updated when TTL=0 is reached.";
+ }
+ else
+ {
+ GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
+ << ip_count << " where IPs may be outdated: "
+ << ContinueOnOutdatedIps << " --> could ping now";
+ HostAnalyzer.set_resolved_ip_count( ip_count );
+ }
ping_when_ready();
}
else
{ // continue with IP
GlobalLogger.notice() << LogPrefix << "DNS failed, "
<< "try anyway with cached data";
- HostAnalyzer.set_resolved_ip_count(0);
+
+ int ip_count = Resolver->get_resolved_ip_count(false);
+ GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
+ << ip_count << " where IPs may be outdated: " << true;
+ HostAnalyzer.set_resolved_ip_count( ip_count );
+
ping_when_ready();
}
else
void IcmpPacketDistributor::clean_up()
{
- if (PingerList.size() > 0)
+ if (PingerList.size() == 0)
+ GlobalLogger.notice() << "All IcmpPingers have de-registered :-)"
+ << std::endl;
+ else
GlobalLogger.warning() << "There were still " << PingerList.size()
<< " pingers registered in IcmpPacketDistributor!" << std::endl;
PingerList.clear();