/* The software in this package is distributed under the GNU General Public License version 2 (with a special exception described below). A copy of GNU General Public License (GPL) is included in this distribution, in the file COPYING.GPL. As a special exception, if other files instantiate templates or use macros or inline functions from this file, or you compile this file and link it with other works to produce a work based on this file, this file does not by itself cause the resulting work to be covered by the GNU General Public License. However the source code for this file must still be made available in accordance with section (3) of the GNU General Public License. This exception does not invalidate any other reasons why a work based on this file might be covered by the GNU General Public License. Christian Herdtweck, Intra2net AG 2015 with code copied from boost::net::dns::resolve.hpp by Andreas Haberstroh (andreas at ibusy dot com) from https://github.com/softwareace/Boost.DNS */ #include "dns/dnsresolver.h" #include #include #include #include #include #include #include #include #include #include using I2n::Logger::GlobalLogger; using boost::posix_time::seconds; namespace Config { const int ResolveTimeoutSeconds = 2; const int PauseBeforeRetrySeconds = 1; const int StaleDataLongtermSeconds = 5*60; const int DNS_PORT = 53; const std::size_t MAX_IPS_PER_HOST = 2; } DnsResolver::DnsResolver(IoServiceItem &io_serv, const std::string &hostname, const DnsIpProtocol &protocol, const DnsCacheItem cache, const boost::asio::ip::address &name_server) : ResolverBase( io_serv, hostname, protocol, cache ) , Socket( *io_serv, ip::udp::endpoint(ip::udp::v4(), 0)) // just connect to anything, will specify sender/receiver later , ReceiveBuffer() , RequestBuffer() , NameServer( name_server, Config::DNS_PORT ) , ResolveTimeoutTimer( *io_serv ) , PauseBeforeRetryTimer( *io_serv ) , StaleDataLongtermTimer( *io_serv ) , NextIpIndex( 0 ) , RetryCount( 0 ) , IsResolving( false ) , LogPrefix( "DnsResolver" ) , RandomIdGenerator() , RequestId( 0 ) , OperationCancelled( false ) , LongtermTimerIsActive( false ) { std::stringstream temp; temp << "Dns(" << ResolverBase::Hostname << "): "; LogPrefix = temp.str(); } DnsResolver::~DnsResolver() { boost::system::error_code error; //Socket.shutdown(boost::asio::ip::udp::socket::shutdown_both, error); //if ( error ) // GlobalLogger.info() << LogPrefix << "Received error " << error // << " when shutting down socket for DNS"; // in IcmpPinger always gave an error system:9 (EBADF: Bad file descriptor) // Here gives error system:107 ENOTCONN: Transport endpoint is not connected Socket.close(error); if ( error ) GlobalLogger.info() << LogPrefix << "Received error " << error << " when closing socket for DNS"; } //============================================================================== // ASYNC RESOLVE //============================================================================== /** * copied here code from boost::net::dns::resolve.hpp, since want async * operation and that is used only internally, there */ void DnsResolver::do_resolve(const int recursion_count) { // check if resolving already if (IsResolving) { GlobalLogger.info() << LogPrefix << "Call to do_resolve ignored since resolving already"; return; } IsResolving = true; OperationCancelled = false; GlobalLogger.info() << LogPrefix << "start resolving for IPs of type " << to_string(Protocol) << " using name server " << NameServer; // just to be sure: cancel timers ResolveTimeoutTimer.cancel(); PauseBeforeRetryTimer.cancel(); StaleDataLongtermTimer.cancel(); LongtermTimerIsActive = false; // create DNS request boost::net::dns::message dns_message( ResolverBase::Hostname, Protocol ); dns_message.recursive(true); dns_message.action(boost::net::dns::message::query); dns_message.opcode(boost::net::dns::message::squery); // create random ID for message boost::uuids::uuid message_id = RandomIdGenerator(); memcpy( &RequestId, message_id.data, sizeof(RequestId) ); dns_message.id( RequestId ); GlobalLogger.debug() << LogPrefix << "Request has ID " << std::showbase << std::hex << dns_message.id(); // setup receipt of reply Socket.async_receive_from( boost::asio::buffer(ReceiveBuffer.get_array()), NameServer, boost::bind( &DnsResolver::handle_dns_result, this, recursion_count, boost::asio::placeholders::error, boost::asio::placeholders::bytes_transferred) ); // schedule timeout (void) ResolveTimeoutTimer.expires_from_now( seconds(Config::ResolveTimeoutSeconds)); ResolveTimeoutTimer.async_wait( boost::bind( &DnsResolver::handle_resolve_timeout, this, recursion_count, boost::asio::placeholders::error) ); // send dns request dns_message.encode(RequestBuffer); size_t bytes_sent; try { bytes_sent = Socket.send_to( boost::asio::buffer(RequestBuffer.get_array()), NameServer ); } catch (boost::system::system_error &err) { GlobalLogger.info() << LogPrefix << "Sending of DNS request message failed: " << err.what(); schedule_retry(recursion_count); return; } if ( bytes_sent == 0 ) { GlobalLogger.info() << LogPrefix << "Empty DNS request sent!"; schedule_retry(recursion_count); return; } } void DnsResolver::handle_dns_result(const int recursion_count, const boost::system::error_code &error, const std::size_t bytes_transferred) { if (error) { GlobalLogger.info() << LogPrefix << "DNS resolve resulted in error " << error << " --> request retry"; schedule_retry(recursion_count); return; } else if ( OperationCancelled ) { // async_resolve was cancelled --> callbacks already called GlobalLogger.info() << LogPrefix << "Ignoring DNS results since we were cancelled"; return; } GlobalLogger.debug() << LogPrefix << "Handling DNS result (" << bytes_transferred << " bytes transferred)"; // next 3(+1) lines copied from boost/net/dns/resolver.hpp: // clamp the recvBuffer with the number of bytes transferred or decode buffr ReceiveBuffer.length(bytes_transferred); boost::net::dns::message result_message; result_message.decode( ReceiveBuffer ); // check ID if (RequestId == 0) { // system DNS or firewall might have longer timeout than me // --> might receive replies for old requests whose time-out has expired // --> have already called callbacks etc, so nothing to do with result // TODO: did receive probably same data several times --> reset buffer? GlobalLogger.info() << LogPrefix << "Ignoring reply to old DNS request " << "(reply has ID " << std::showbase << std::hex << result_message.id() << " or buffer not reset)"; return; } else if (RequestId != result_message.id()) { GlobalLogger.info() << LogPrefix << "Received answer for request ID " << std::showbase << std::hex << result_message.id() << " but expected ID " << RequestId << " --> ignore and retry"; schedule_retry(recursion_count); return; } else GlobalLogger.debug() << LogPrefix << "Result has correct ID " << std::showbase << std::hex << RequestId; RequestId = 0; // loop over answers, remembering ips and cnames // work with a regular pointer to list of answers since result_message is // owner of data and that exists until end of function // Items in answers list are shared_ptr to resource_base_t std::vector result_ips; std::vector result_cnames; std::vector result_name_servers; GlobalLogger.debug() << LogPrefix <<"Checking ANSWERS section of dns reply"; gather_results(result_message.answers(), &result_ips, &result_cnames, &result_name_servers); // remember cname list (if there were any) // results should have the logical order // Hostname [ --> cname1 --> cname2 --> ... --> cnameN ] [ --> ips ]; // otherwise just have unneccessary cnames in cache BOOST_FOREACH( const src_cname_pair &host_and_cname, result_cnames ) ResolverBase::update_cache(host_and_cname.first, host_and_cname.second); if ( !result_ips.empty() ) handle_ips( recursion_count, result_ips ); else if ( !result_cnames.empty() ) // no IPs but at least one cname --> find the "last" cname and // re-start resolving with that handle_cname(recursion_count, result_cnames); else { // no answers --> cannot proceed GlobalLogger.info() << LogPrefix << "No IP nor CNAME received! " << "--> request retry"; schedule_retry(recursion_count); } } /** * gather IPs, CNAMEs and name servers from list of resource records; * * can be run on anwers(), autorities() and additional() sections of dns reply * messages * * @param rr_list: input list of resource records * @param result_ips: output vector of ips * @param result_cnames: output vector of cnames * @param result_name_servers: output vector of name servers */ void DnsResolver::gather_results(const boost::net::dns::rr_list_t *rr_list, std::vector *result_ips, std::vector *result_cnames, std::vector *result_name_servers) const { using boost::net::dns::resource_base_t; boost::posix_time::ptime now =boost::posix_time::second_clock::local_time(); BOOST_FOREACH( boost::shared_ptr rr_item, *rr_list ) { boost::net::dns::type_t rr_type = rr_item->rtype(); uint32_t ttl = rr_item->ttl(); std::string domain = rr_item->domain(); std::string expiry = boost::posix_time::to_simple_string(now + seconds(ttl)); if (rr_type == boost::net::dns::type_a) { // 'A' resource records carry IPv4 addresses if (Protocol == DNS_IPv6) { GlobalLogger.info() << LogPrefix << "Ignoring IPv4 address " << "because resolver was configured to only use IPv6."; continue; } boost::asio::ip::address_v4 ip = ( dynamic_cast (rr_item.get()) ) ->address(); result_ips->push_back(host_addr_pair(domain, HostAddress(ip, ttl))); GlobalLogger.debug() << LogPrefix << domain << ": IPv4 " << ip << " with TTL " << ttl << "s (until " << expiry << ")"; } else if (rr_type == boost::net::dns::type_a6) { // 'AAAA' resource records carry IPv6 addresses if (Protocol == DNS_IPv4) { GlobalLogger.info() << LogPrefix << "Ignoring IPv6 address " << "because resolver was configured to only use IPv4."; continue; } boost::asio::ip::address_v6 ip = ( dynamic_cast (rr_item.get()) ) ->address(); result_ips->push_back(host_addr_pair(domain, HostAddress(ip, ttl))); GlobalLogger.debug() << LogPrefix << domain << ": IPv6 " << ip << " with TTL " << ttl << "s (until " << expiry << ")"; } else if (rr_type == boost::net::dns::type_cname) { // 'CNAME' resource records that carry aliases std::string cname = (dynamic_cast(rr_item.get())) ->canonicalname(); result_cnames->push_back( src_cname_pair(domain, Cname(cname, ttl)) ); GlobalLogger.debug() << LogPrefix << domain << ": CNAME to " << cname << " with TTL " << ttl << "s (until " << expiry << ")"; } else if (rr_type == boost::net::dns::type_ns) { // NS (name_server) resource records std::string name_server = (dynamic_cast(rr_item.get())) ->nameserver(); result_name_servers->push_back( string_pair(domain, name_server) ); GlobalLogger.debug() << LogPrefix << "NameServer " << name_server << " for " << domain << " with TTL " << ttl << "s (until " << expiry << ")"; } else if (rr_type == boost::net::dns::type_soa) GlobalLogger.debug() << LogPrefix << "SOA resource"; else if (rr_type == boost::net::dns::type_ptr) GlobalLogger.debug() << LogPrefix << "ptr resource"; else if (rr_type == boost::net::dns::type_hinfo) GlobalLogger.debug() << LogPrefix << "hinfo resource"; else if (rr_type == boost::net::dns::type_mx) GlobalLogger.debug() << LogPrefix << "mx resource"; else if (rr_type == boost::net::dns::type_txt) GlobalLogger.debug() << LogPrefix << "txt resource"; else if (rr_type == boost::net::dns::type_srv) GlobalLogger.debug() << LogPrefix << "srv resource"; else if (rr_type == boost::net::dns::type_axfr) GlobalLogger.debug() << LogPrefix << "axfr resource"; else GlobalLogger.debug() << LogPrefix << "unknown resource type: " << std::showbase << std::hex << static_cast(rr_item->rtype()); } } void DnsResolver::handle_unavailable(const int recursion_count) { // schedule new attempt in quite a while StaleDataLongtermTimer.expires_from_now( seconds(Config::StaleDataLongtermSeconds)); StaleDataLongtermTimer.async_wait( boost::bind( &DnsResolver::wait_timer_timeout_handler, this, recursion_count, boost::asio::placeholders::error ) ); LongtermTimerIsActive = true; // for now, admit failure RequestId = 0; // do not accept answers from old requests bool was_success = false; finalize_resolve(was_success, recursion_count); } void DnsResolver::handle_ips(const int recursion_count, const std::vector &result_ips) { // received at least one IP which could be for the queried host name // or the cname at the "end" of the cname list; // but all IPs should be for the same HostAddressVec addr_list; std::string only_host_for_ips = result_ips[0].first; BOOST_FOREACH( const host_addr_pair &host_and_addr, result_ips) { if ( host_and_addr.first != only_host_for_ips ) GlobalLogger.info() << LogPrefix << "Received IPs for different hosts " << only_host_for_ips << " and " << host_and_addr.first << " in one DNS result! " << "--> ignore second"; else { GlobalLogger.info() << LogPrefix << "Found IP " << host_and_addr.second.get_ip() << " with TTL " << host_and_addr.second.get_ttl().get_value() << "s"; addr_list.push_back(host_and_addr.second); } } // limit number of IPs to be saved if (addr_list.size() > Config::MAX_IPS_PER_HOST) { GlobalLogger.info() << LogPrefix << "Limit list of IPs from " << addr_list.size() << " to " << Config::MAX_IPS_PER_HOST; addr_list.resize(Config::MAX_IPS_PER_HOST); } // now save in cache ResolverBase::update_cache( only_host_for_ips, addr_list ); // clean up bool was_success = true; finalize_resolve(was_success, recursion_count); } void DnsResolver::handle_cname(const int recursion_count, const std::vector &result_cnames) { // find the "last" cname in the list // Hostname --> cname1 --> cname2 --> ... --> cnameN // We assume here that this list might not be in order but that all cnames // form a single list (form one connected list and not several independent // lists) std::string last_cname = ""; bool could_be_last; BOOST_REVERSE_FOREACH( const src_cname_pair &host_and_cname, result_cnames ) { could_be_last = true; BOOST_REVERSE_FOREACH( const src_cname_pair &other, result_cnames ) { if (other.first == host_and_cname.second.Host) { // found cname for current cname could_be_last = false; break; } } if (could_be_last) { last_cname = host_and_cname.second.Host; break; } } if (last_cname.empty()) { GlobalLogger.info() << LogPrefix << "Could not identify \"last\" CNAME to handle -- " << "maybe we encountered a CNAME loop? Anyway, cannot proceed!"; GlobalLogger.info() << LogPrefix << "Result CNAMEs were:"; BOOST_FOREACH( const src_cname_pair &host_and_cname, result_cnames ) GlobalLogger.info() << LogPrefix << host_and_cname.first << " --> " << host_and_cname.second.Host; handle_unavailable(recursion_count); } else { // check cache for IP for this cname bool check_up_to_date = true; HostAddressVec cached_data = get_cached_ips_recursively(last_cname, check_up_to_date); if ( !cached_data.empty() ) { bool was_success = true; finalize_resolve(was_success, recursion_count+1); } else { // get resolver for canonical name ResolverItem resolver = DnsMaster::get_instance() ->get_resolver_for(last_cname, Protocol); callback_type callback = boost::bind( &DnsResolver::cname_resolve_callback, this, _1, _2 ); resolver->async_resolve( callback, recursion_count+1 ); // treat a CNAME as a partial result: not enough to run callbacks // from finalize_resolve, but enough to stop timers and reset // RetryCount --> name resolution can take longer stop_trying(true); } } } /** * the recursion_count here is really the one from the recursion, not the one * forwarded from async_resolve! */ void DnsResolver::cname_resolve_callback(const bool was_success, const int recursion_count) { if ( OperationCancelled ) { // async_resolve was cancelled --> callbacks already called GlobalLogger.info() << LogPrefix << "Ignoring CNAME results since we were cancelled"; return; } else if (was_success) { GlobalLogger.debug() << LogPrefix << "CNAME resolution succeeded after " << recursion_count << " recursions"; finalize_resolve(was_success, recursion_count); } else { GlobalLogger.info() << LogPrefix << "CNAME resolution failed after " << recursion_count << " recursions"; // no use to schedule retry in this case since cname resolver must have // failed several times and we can only re-start the same procedure with // the same information. But can re-try later handle_unavailable(recursion_count); } } /** * @brief always called at end of resolving process * * runs callbacks, resets timers and checks state consistency; only thing that * is "left alive" is the long-term timer that might cause a re-start of * resolution after a while * * @param was_success: indicates whether resolution was successfull * @param recursion_count number of recursions or (if not successfull) negative * value indicating who called this function */ void DnsResolver::finalize_resolve(const bool was_success, const int recursion_count) { // some consistency checks; failure might indicate a situation I had not // anticipated during programming but might not be harmfull yet if ( !IsResolving ) GlobalLogger.warning() << LogPrefix << "Consistency check failed: " << "not resolving any more!"; if ( OperationCancelled ) GlobalLogger.warning() << LogPrefix << "Consistency check failed: " << "was cancelled!"; if ( RequestId != 0 ) GlobalLogger.warning() << LogPrefix << "Consistency check failed: " << "waiting for DNS reply!"; // stop timers stop_trying(was_success); // schedule callbacks, clearing callback list ResolverBase::schedule_callbacks(was_success, recursion_count); // finalize GlobalLogger.info() << LogPrefix << "finalized resolve" << " with success = " << was_success << " and recursion_count = " << recursion_count; IsResolving = false; } /** * arg was_success determines if stop trying forever or just for the moment * --> determines if we cancel StaleDataLongtermTimer or not */ void DnsResolver::stop_trying(bool was_success) { // cancel timers GlobalLogger.debug() << LogPrefix << "Cancelling timers"; ResolveTimeoutTimer.cancel(); PauseBeforeRetryTimer.cancel(); if (was_success) { StaleDataLongtermTimer.cancel(); LongtermTimerIsActive = false; } // clean up RetryCount = 0; } /** * return true if resolver is currently resolving * * Is true from call to async_resolve until callbacks * --> returns true if waiting for result or (short-term) retry * * However, does NOT tell you if the (long-term) stale timeout is active! * That timer has no effect on result, need to check is_waiting_to_resolve * for that */ bool DnsResolver::is_resolving() const { return IsResolving; } /** * returns true if either is_resolving or the long-term timer is active * * is_resolving returns true if the short-term retry timer is active */ bool DnsResolver::is_waiting_to_resolve() const { return IsResolving || LongtermTimerIsActive; } /** * cancel a earlier call to async_resolve * * callbacks will be called with was_success=false; all internal operations * will be cancelled and internal callbacks (timers, dns results) have no * effect any more; cancels also the long-term stale-data timer */ void DnsResolver::cancel_resolve() { if ( !IsResolving && !LongtermTimerIsActive) { GlobalLogger.info() << LogPrefix << "Cancel called on non-resolving, " << "non-waiting resolver -- ignore"; return; } else if (OperationCancelled) { GlobalLogger.info() << LogPrefix << "Cancel called on cancelled resolver -- ignore"; return; } GlobalLogger.info() << LogPrefix << "Cancel resolver"; // set before finalize_resolve so can check in finalize_resolve that ID is // always 0; ID is not used any more since handle_dns_result stops if // OperationCancelled is true RequestId = 0; if ( IsResolving ) { bool was_success = false; int recursion_count = -1; finalize_resolve(was_success, recursion_count); } // also cancel the long-term timer StaleDataLongtermTimer.cancel(); LongtermTimerIsActive = false; // set after finalize_resolve, so can check in finalize_resolve that // OperationCancelled is never true OperationCancelled = true; } void DnsResolver::handle_resolve_timeout(const int recursion_count, const boost::system::error_code &error) { if ( error == boost::asio::error::operation_aborted ) // cancelled { GlobalLogger.debug() << LogPrefix << "Resolve timeout timer was cancelled!"; return; } else if (error) { GlobalLogger.info() << LogPrefix << "resolve timeout handler received error " << error << " --> request retry"; schedule_retry(recursion_count); } else if ( OperationCancelled ) { // async_resolve was cancelled --> callbacks already called GlobalLogger.info() << LogPrefix << "Ignoring DNS timeout since we were cancelled"; return; } else { GlobalLogger.info() << LogPrefix << "DNS resolving timed out"; schedule_retry(recursion_count); } } void DnsResolver::schedule_retry(const int recursion_count) { // cancel timers ResolveTimeoutTimer.cancel(); PauseBeforeRetryTimer.cancel(); // increment timer ++RetryCount; if ( RetryCount > DnsMaster::get_instance() ->get_max_address_resolution_attempts() ) { // too many re-tries GlobalLogger.info() << LogPrefix << "Not scheduling a retry since " << "RetryCount " << RetryCount << " too high"; handle_unavailable(recursion_count); // will call stop_trying } // --> reset RetryCount else { // schedule retry GlobalLogger.info() << LogPrefix << "Scheduling a retry (RetryCount=" << RetryCount << ")"; PauseBeforeRetryTimer.expires_from_now( seconds(Config::PauseBeforeRetrySeconds)); PauseBeforeRetryTimer.async_wait( boost::bind( &DnsResolver::wait_timer_timeout_handler, this, recursion_count, boost::asio::placeholders::error) ); } } void DnsResolver::wait_timer_timeout_handler( const int recursion_count, const boost::system::error_code &error) { if ( error == boost::asio::error::operation_aborted ) // cancelled { // assume that our code cancelled this timer, so callbacks will be // taken care of! GlobalLogger.debug() << LogPrefix << "Resolve wait timer was cancelled! "; } else if (error) { // not sure what to do here, but callers waiting forever for a callback // is probably the worst thing to happen, so call finalize_resolve GlobalLogger.info() << LogPrefix << "resolve wait handler received error " << error << "! Try to finalize resolve"; bool was_success = false; finalize_resolve(was_success, recursion_count); } else if ( OperationCancelled ) { // async_resolve was cancelled --> callbacks already called GlobalLogger.info() << LogPrefix << "Ignoring waiting timeout since we were cancelled"; return; } else { GlobalLogger.info() << LogPrefix << "Long-term timer expired --> re-try resolve"; IsResolving = false; // will be set to true immediately in do_resolve do_resolve(recursion_count); } } //============================================================================== // RETRIEVAL //============================================================================== HostAddress DnsResolver::get_next_ip(bool check_up_to_date) { // get cached data // (do not use arg check_up_to_date here in order to give NextIpIndex // a chance to stay above number of outdated IPs) HostAddressVec cached_data = ResolverBase::get_cached_ips_recursively(); // if no results cached, return default-constructed HostAddress (0.0.0.0) HostAddress return_candidate; if ( cached_data.empty() ) { GlobalLogger.debug() << LogPrefix << "Get next IP: nothing cached"; return return_candidate; } std::size_t n_iter = 0; std::size_t n_ips = cached_data.size(); uint32_t ttl_thresh = static_cast( DnsMaster::get_instance() ->get_resolved_ip_ttl_threshold() ); GlobalLogger.info() << LogPrefix << "Get next IP from cached result of " << n_ips << " IPs; first index to consider is " << NextIpIndex << "; TTL thresh=" << ttl_thresh << "s is used: " << check_up_to_date; // loop until we have found a cached result (that is up to date) // or until we have tried all cached IPs while (true) { // check index since cache size may have changed since last call if (NextIpIndex >= n_ips) { GlobalLogger.debug() << LogPrefix << "Reset NextIpIndex"; NextIpIndex = 0; } else if ( n_iter >= n_ips) { GlobalLogger.debug() << LogPrefix << "No IP found"; return HostAddress(); // have checked all candidates } else { // there are candidates left to consider GlobalLogger.debug() << LogPrefix << "Check IP candidate at index " << NextIpIndex; return_candidate = cached_data[NextIpIndex++]; if (!check_up_to_date) { GlobalLogger.debug() << LogPrefix << "not checking ttl, accept"; return return_candidate; } else if (return_candidate.get_ttl().get_updated_value() > ttl_thresh) { GlobalLogger.debug() << LogPrefix << "is up to date, accept"; return return_candidate; } else { GlobalLogger.debug() << LogPrefix << "is out of date (" << return_candidate.get_ttl().get_updated_value() << "s <= " << ttl_thresh << "s), continue"; ++n_iter; } } } } bool DnsResolver::have_up_to_date_ip() { return get_resolved_ip_count(true) > 0; } int DnsResolver::get_resolved_ip_count(const bool check_up_to_date) { // run with empty hostname --> uses internal var Hostname return ResolverBase::get_cached_ips_recursively("",check_up_to_date).size(); }