2 The software in this package is distributed under the GNU General
3 Public License version 2 (with a special exception described below).
5 A copy of GNU General Public License (GPL) is included in this distribution,
6 in the file COPYING.GPL.
8 As a special exception, if other files instantiate templates or use macros
9 or inline functions from this file, or you compile this file and link it
10 with other works to produce a work based on this file, this file
11 does not by itself cause the resulting work to be covered
12 by the GNU General Public License.
14 However the source code for this file must still be made available
15 in accordance with section (3) of the GNU General Public License.
17 This exception does not invalidate any other reasons why a work based
18 on this file might be covered by the GNU General Public License.
20 Christian Herdtweck, Intra2net AG 2015
22 with code copied from boost::net::dns::resolve.hpp
23 by Andreas Haberstroh (andreas at ibusy dot com)
24 from https://github.com/softwareace/Boost.DNS
27 #include "dns/dnsresolver.h"
32 #include <boost/foreach.hpp>
33 #include <boost/bind.hpp>
34 #include <boost/function.hpp>
35 #include <boost/net/dns.hpp>
36 #include <boost/date_time/posix_time/posix_time.hpp>
37 #include <boost/uuid/uuid.hpp>
38 #include <boost/uuid/uuid_io.hpp>
40 #include <logfunc.hpp>
42 using I2n::Logger::GlobalLogger;
43 using boost::posix_time::seconds;
47 const int ResolveTimeoutSeconds = 2;
48 const int PauseBeforeRetrySeconds = 1;
49 const int StaleDataLongtermSeconds = 5*60;
50 const int DNS_PORT = 53;
53 DnsResolver::DnsResolver(IoServiceItem &io_serv,
54 const std::string &hostname,
55 const DnsIpProtocol &protocol,
56 const DnsCacheItem cache,
57 const boost::asio::ip::address &name_server)
58 : ResolverBase( io_serv, hostname, cache )
59 , Socket( *io_serv, ip::udp::endpoint(ip::udp::v4(), 0))
60 // just connect to anything, will specify sender/receiver later
63 , Protocol( protocol )
64 , NameServer( name_server, Config::DNS_PORT )
65 , ResolveTimeoutTimer( *io_serv )
66 , PauseBeforeRetryTimer( *io_serv )
67 , StaleDataLongtermTimer( *io_serv )
70 , IsResolving( false )
71 , LogPrefix( "DnsResolver" )
74 , OperationCancelled( false )
75 , LongtermTimerIsActive( false )
77 std::stringstream temp;
78 temp << "Dns(" << ResolverBase::Hostname << "): ";
79 LogPrefix = temp.str();
83 DnsResolver::~DnsResolver()
85 boost::system::error_code error;
86 //Socket.shutdown(boost::asio::ip::udp::socket::shutdown_both, error);
88 // GlobalLogger.warning() << LogPrefix << "Received error " << error
89 // << " when shutting down socket for DNS";
90 // in IcmpPinger always gave an error system:9 (EBADF: Bad file descriptor)
91 // Here gives error system:107 ENOTCONN: Transport endpoint is not connected
95 GlobalLogger.warning() << LogPrefix << "Received error " << error
96 << " when closing socket for DNS";
101 //==============================================================================
103 //==============================================================================
106 * copied here code from boost::net::dns::resolve.hpp, since want async
107 * operation and that is used only internally, there
109 void DnsResolver::do_resolve()
111 // check if resolving already
114 GlobalLogger.info() << LogPrefix
115 << "Call to do_resolve ignored since resolving already";
119 OperationCancelled = false;
121 GlobalLogger.info() << LogPrefix << "start resolving for IPs of type "
122 << to_string(Protocol) << " using name server " << NameServer;
124 // just to be sure: cancel timers
125 ResolveTimeoutTimer.cancel();
126 PauseBeforeRetryTimer.cancel();
127 StaleDataLongtermTimer.cancel();
128 LongtermTimerIsActive = false;
130 // create DNS request
131 boost::net::dns::message dns_message( ResolverBase::Hostname, Protocol );
132 dns_message.recursive(true);
133 dns_message.action(boost::net::dns::message::query);
134 dns_message.opcode(boost::net::dns::message::squery);
136 // create random ID for message
137 boost::uuids::uuid message_id = RandomIdGenerator();
138 memcpy( &RequestId, message_id.data, sizeof(RequestId) );
139 dns_message.id( RequestId );
140 GlobalLogger.debug() << LogPrefix << "Request has ID "
141 << std::showbase << std::hex << dns_message.id();
143 // setup receipt of reply
144 Socket.async_receive_from(
145 boost::asio::buffer(ReceiveBuffer.get_array()),
147 boost::bind( &DnsResolver::handle_dns_result, this,
148 boost::asio::placeholders::error,
149 boost::asio::placeholders::bytes_transferred)
153 (void) ResolveTimeoutTimer.expires_from_now(
154 seconds(Config::ResolveTimeoutSeconds));
155 ResolveTimeoutTimer.async_wait( boost::bind(
156 &DnsResolver::handle_resolve_timeout,
157 this, boost::asio::placeholders::error) );
160 dns_message.encode(RequestBuffer);
164 bytes_sent = Socket.send_to(
165 boost::asio::buffer(RequestBuffer.get_array()),
168 catch (boost::system::system_error &err)
170 GlobalLogger.warning() << LogPrefix
171 << "Sending of DNS request message failed: "
177 if ( bytes_sent == 0 )
179 GlobalLogger.warning() << LogPrefix << "Empty DNS request sent!";
186 void DnsResolver::handle_dns_result(const boost::system::error_code &error,
187 const std::size_t bytes_transferred)
191 GlobalLogger.info() << LogPrefix << "DNS resolve resulted in error "
192 << error << " --> request retry";
196 else if ( OperationCancelled )
197 { // async_resolve was cancelled --> callbacks already called
198 GlobalLogger.info() << LogPrefix
199 << "Ignoring DNS results since we were cancelled";
203 GlobalLogger.debug() << LogPrefix << "Handling DNS result ("
204 << bytes_transferred << " bytes transferred)";
206 // next 3(+1) lines copied from boost/net/dns/resolver.hpp:
207 // clamp the recvBuffer with the number of bytes transferred or decode buffr
208 ReceiveBuffer.length(bytes_transferred);
209 boost::net::dns::message result_message;
210 result_message.decode( ReceiveBuffer );
213 if (RequestId != result_message.id())
214 GlobalLogger.warning() << LogPrefix << "Received answer for request ID "
215 << std::showbase << std::hex << result_message.id()
216 << " but expected ID " << RequestId;
218 GlobalLogger.debug() << LogPrefix << "Result has correct ID "
219 << std::showbase << std::hex << RequestId;
222 // loop over answers, remembering ips and cnames
223 // work with a regular pointer to list of answers since result_message is
224 // owner of data and that exists until end of function
225 // Items in answers list are shared_ptr to resource_base_t
226 std::vector<host_addr_pair> result_ips;
227 std::vector<src_cname_pair> result_cnames;
228 std::vector<string_pair> result_name_servers;
230 GlobalLogger.debug() << LogPrefix <<"Checking ANSWERS section of dns reply";
231 gather_results(result_message.answers(), &result_ips, &result_cnames,
232 &result_name_servers);
233 // results should have the logical order
234 // Hostname [ --> cname1 --> cname2 --> ... --> cnameN ] [ --> ips ]
235 // for cname count to be correct; rest of code should not be affected if not
237 // remember cname list (if there were any)
238 BOOST_FOREACH( const src_cname_pair &host_and_cname, result_cnames )
239 ResolverBase::update_cache(host_and_cname.first, host_and_cname.second);
241 if ( !result_ips.empty() )
242 handle_ips( result_ips );
243 else if ( !result_cnames.empty() )
244 // no IPs but at least one cname --> find the "last" cname and
245 // re-start resolving with that
246 handle_cname(result_cnames);
248 { // no answers --> cannot proceed
249 GlobalLogger.warning() << LogPrefix << "No IP nor CNAME received! "
250 << "--> request retry";
256 * gather IPs, CNAMEs and name servers from list of resource records;
258 * can be run on anwers(), autorities() and additional() sections of dns reply
261 * @param rr_list: input list of resource records
262 * @param result_ips: output vector of ips
263 * @param result_cnames: output vector of cnames
264 * @param result_name_servers: output vector of name servers
266 void DnsResolver::gather_results(const boost::net::dns::rr_list_t *rr_list,
267 std::vector<host_addr_pair> *result_ips,
268 std::vector<src_cname_pair> *result_cnames,
269 std::vector<string_pair> *result_name_servers)
272 using boost::net::dns::resource_base_t;
273 boost::posix_time::ptime now =boost::posix_time::second_clock::local_time();
274 BOOST_FOREACH( boost::shared_ptr<resource_base_t> rr_item, *rr_list )
276 boost::net::dns::type_t rr_type = rr_item->rtype();
277 uint32_t ttl = rr_item->ttl();
278 std::string domain = rr_item->domain();
280 boost::posix_time::to_simple_string(now + seconds(ttl));
282 if (rr_type == boost::net::dns::type_a)
283 { // 'A' resource records carry IPv4 addresses
284 if (Protocol == DNS_IPv6)
286 GlobalLogger.info() << LogPrefix << "Ignoring IPv4 address "
287 << "because resolver was configured to only use IPv6.";
290 boost::asio::ip::address_v4 ip =
291 ( dynamic_cast<boost::net::dns::a_resource *> (rr_item.get()) )
293 result_ips->push_back(host_addr_pair(domain, HostAddress(ip, ttl)));
294 GlobalLogger.debug() << LogPrefix << domain << ": IPv4 " << ip
295 << " with TTL " << ttl << "s (until "
298 else if (rr_type == boost::net::dns::type_a6)
299 { // 'AAAA' resource records carry IPv6 addresses
300 if (Protocol == DNS_IPv4)
302 GlobalLogger.info() << LogPrefix << "Ignoring IPv6 address "
303 << "because resolver was configured to only use IPv4.";
306 boost::asio::ip::address_v6 ip =
307 ( dynamic_cast<boost::net::dns::a6_resource *> (rr_item.get()) )
309 result_ips->push_back(host_addr_pair(domain, HostAddress(ip, ttl)));
310 GlobalLogger.debug() << LogPrefix << domain << ": IPv6 " << ip
311 << " with TTL " << ttl << "s (until "
314 else if (rr_type == boost::net::dns::type_cname)
315 { // 'CNAME' resource records that carry aliases
317 (dynamic_cast<boost::net::dns::cname_resource *>(rr_item.get()))
319 result_cnames->push_back( src_cname_pair(domain,
320 Cname(cname, ttl)) );
321 GlobalLogger.debug() << LogPrefix << domain << ": CNAME to "
322 << cname << " with TTL " << ttl << "s (until "
325 else if (rr_type == boost::net::dns::type_ns)
326 { // NS (name_server) resource records
327 std::string name_server =
328 (dynamic_cast<boost::net::dns::ns_resource *>(rr_item.get()))
330 result_name_servers->push_back( string_pair(domain, name_server) );
331 GlobalLogger.debug() << LogPrefix << "NameServer " << name_server
332 << " for " << domain << " with TTL " << ttl
333 << "s (until " << expiry << ")";
335 else if (rr_type == boost::net::dns::type_soa)
336 GlobalLogger.debug() << LogPrefix << "SOA resource";
337 else if (rr_type == boost::net::dns::type_ptr)
338 GlobalLogger.debug() << LogPrefix << "ptr resource";
339 else if (rr_type == boost::net::dns::type_hinfo)
340 GlobalLogger.debug() << LogPrefix << "hinfo resource";
341 else if (rr_type == boost::net::dns::type_mx)
342 GlobalLogger.debug() << LogPrefix << "mx resource";
343 else if (rr_type == boost::net::dns::type_txt)
344 GlobalLogger.debug() << LogPrefix << "txt resource";
345 else if (rr_type == boost::net::dns::type_srv)
346 GlobalLogger.debug() << LogPrefix << "srv resource";
347 else if (rr_type == boost::net::dns::type_axfr)
348 GlobalLogger.debug() << LogPrefix << "axfr resource";
350 GlobalLogger.debug() << LogPrefix << "unknown resource type: "
351 << std::showbase << std::hex
352 << static_cast<unsigned>(rr_item->rtype());
357 void DnsResolver::handle_unavailable()
359 // schedule new attempt in quite a while
360 StaleDataLongtermTimer.expires_from_now(
361 seconds(Config::StaleDataLongtermSeconds));
362 StaleDataLongtermTimer.async_wait(
363 boost::bind( &DnsResolver::wait_timer_timeout_handler,
364 this, boost::asio::placeholders::error
367 LongtermTimerIsActive = true;
369 // for now, admit failure
370 bool was_success = false;
371 finalize_resolve(was_success);
375 void DnsResolver::handle_ips(const std::vector<host_addr_pair> &result_ips)
377 // received at least one IP which could be for the queried host name
378 // or the cname at the "end" of the cname list;
379 // but all IPs should be for the same
380 HostAddressVec addr_list;
381 std::string only_host_for_ips = result_ips[0].first;
382 BOOST_FOREACH( const host_addr_pair &host_and_addr, result_ips)
384 if ( host_and_addr.first != only_host_for_ips )
385 GlobalLogger.warning() << LogPrefix
386 << "Received IPs for different hosts " << only_host_for_ips
387 << " and " << host_and_addr.first << " in one DNS result! "
388 << "--> ignore second";
391 GlobalLogger.notice() << LogPrefix << "Found IP "
392 << host_and_addr.second.get_ip() << " with TTL "
393 << host_and_addr.second.get_ttl().get_value() << "s";
394 addr_list.push_back(host_and_addr.second);
397 ResolverBase::update_cache( only_host_for_ips, addr_list );
400 bool was_success = true;
401 finalize_resolve(was_success);
405 void DnsResolver::handle_cname(const std::vector<src_cname_pair> &result_cnames)
407 // find the "last" cname in the list
408 // Hostname --> cname1 --> cname2 --> ... --> cnameN
409 // We assume here that this list might not be in order but that all cnames
410 // form a single list (form one connected list and not several isolated)
412 std::string last_cname = "";
414 BOOST_REVERSE_FOREACH( const src_cname_pair &host_and_cname, result_cnames )
416 could_be_last = true;
417 BOOST_REVERSE_FOREACH( const src_cname_pair &other, result_cnames )
419 if (other.first == host_and_cname.second.Host)
420 { // found cname for current cname
421 could_be_last = false;
427 last_cname = host_and_cname.second.Host;
432 if (last_cname.empty())
434 GlobalLogger.error() << LogPrefix
435 << "Could not identify \"last\" CNAME to handle -- "
436 << "maybe we encountered a CNAME loop? Anyway, cannot proceed!";
437 GlobalLogger.info() << LogPrefix << "Result CNAMEs were:";
438 BOOST_FOREACH( const src_cname_pair &host_and_cname, result_cnames )
439 GlobalLogger.info() << LogPrefix << host_and_cname.first << " --> "
440 << host_and_cname.second.Host;
441 handle_unavailable();
444 { // check cache for IP for this cname
445 bool check_up_to_date = true;
446 HostAddressVec cached_data = Cache->get_ips_recursive(last_cname,
448 if ( !cached_data.empty() )
450 bool was_success = true;
451 int cname_count = 1; // define cache access as only 1
452 finalize_resolve(was_success, cname_count);
455 { // get resolver for canonical name
456 ResolverItem resolver = DnsMaster::get_instance()
457 ->get_resolver_for(last_cname, Protocol);
458 callback_type callback = boost::bind(
459 &DnsResolver::cname_resolve_callback,
461 resolver->async_resolve( callback );
463 // treat a CNAME as a partial result: not enough to run callbacks
464 // from finalize_resolve, but enough to stop timers and reset
465 // RetryCount --> name resolution can take longer
472 void DnsResolver::cname_resolve_callback(const bool was_success,
473 const int cname_count)
475 if ( OperationCancelled )
476 { // async_resolve was cancelled --> callbacks already called
477 GlobalLogger.info() << LogPrefix
478 << "Ignoring CNAME results since we were cancelled";
481 else if (was_success)
483 GlobalLogger.debug() << LogPrefix << "CNAME resolution succeeded";
484 finalize_resolve(was_success, cname_count+1);
488 GlobalLogger.info() << LogPrefix << "CNAME resolution failed";
489 // no use to schedule retry in this case since cname resolver must have
490 // failed several times and we can only re-start the same procedure with
491 // the same information. But can re-try later
492 handle_unavailable();
497 void DnsResolver::finalize_resolve(const bool was_success,
498 const int cname_count)
500 // some consistency checks; failure might indicate a situation I had not
501 // anticipated during programming but might not be harmfull yet
503 GlobalLogger.warning() << LogPrefix << "Consistency check failed: "
504 << "not resolving any more!";
505 if ( OperationCancelled )
506 GlobalLogger.warning() << LogPrefix << "Consistency check failed: "
507 << " was cancelled!";
508 if ( ResolverBase::CallbackList.empty() )
509 GlobalLogger.warning() << LogPrefix << "Consistency check failed: "
511 if ( RequestId != 0 )
512 GlobalLogger.warning() << LogPrefix << "Consistency check failed: "
513 << "waiting for DNS reply!";
516 stop_trying(was_success);
518 // schedule callbacks, clearing callback list
519 ResolverBase::schedule_callbacks(was_success, cname_count);
522 GlobalLogger.notice() << LogPrefix << "finalized resolve"
523 << " with success = " << was_success
524 << " and cname_count = " << cname_count;
530 * arg was_success determines if stop trying forever or just for the moment
531 * --> determines if we cancel StaleDataLongtermTimer or not
533 void DnsResolver::stop_trying(bool was_success)
536 GlobalLogger.debug() << LogPrefix << "Cancelling timers";
537 ResolveTimeoutTimer.cancel();
538 PauseBeforeRetryTimer.cancel();
542 StaleDataLongtermTimer.cancel();
543 LongtermTimerIsActive = false;
552 * return true if resolver is currently resolving
554 * Is true from call to async_resolve until callbacks
555 * --> returns true if waiting for result or (short-term) retry
557 * However, does NOT tell you if the (long-term) stale timeout is active!
558 * That timer has no effect on result, need to check is_waiting_to_resolve
561 bool DnsResolver::is_resolving() const
568 * returns true if either is_resolving or the long-term timer is active
570 * is_resolving returns true if the short-term retry timer is active
572 bool DnsResolver::is_waiting_to_resolve() const
574 return IsResolving || LongtermTimerIsActive;
579 * cancel a earlier call to async_resolve
581 * callbacks will be called with was_success=false; all internal operations
582 * will be cancelled and internal callbacks (timers, dns results) have no
583 * effect any more; cancels also the long-term stale-data timer
585 void DnsResolver::cancel_resolve()
587 if ( !IsResolving && !LongtermTimerIsActive)
589 GlobalLogger.info() << LogPrefix << "Cancel called on non-resolving, "
590 << "non-waiting resolver -- ignore";
593 else if (OperationCancelled)
595 GlobalLogger.info() << LogPrefix
596 << "Cancel called on cancelled resolver -- ignore";
599 GlobalLogger.info() << LogPrefix << "Cancel resolver";
601 // set before finalize_resolve so can check in finalize_resolve that ID is
602 // always 0; ID is not used any more since handle_dns_result stops if
603 // OperationCancelled is true
608 bool was_success = false;
610 finalize_resolve(was_success, cname_count);
613 // also cancel the long-term timer
614 StaleDataLongtermTimer.cancel();
615 LongtermTimerIsActive = false;
617 // set after finalize_resolve, so can check in finalize_resolve that
618 // OperationCancelled is never true
619 OperationCancelled = true;
624 void DnsResolver::handle_resolve_timeout(const boost::system::error_code &error)
626 if ( error == boost::asio::error::operation_aborted ) // cancelled
628 GlobalLogger.debug() << LogPrefix
629 << "Resolve timeout timer was cancelled!";
634 GlobalLogger.warning() << LogPrefix
635 << "resolve timeout handler received error "
636 << error << " --> request retry";
639 else if ( OperationCancelled )
640 { // async_resolve was cancelled --> callbacks already called
641 GlobalLogger.info() << LogPrefix
642 << "Ignoring DNS timeout since we were cancelled";
647 GlobalLogger.notice() << LogPrefix << "DNS resolving timed out";
653 void DnsResolver::schedule_retry()
656 ResolveTimeoutTimer.cancel();
657 PauseBeforeRetryTimer.cancel();
662 if ( RetryCount > DnsMaster::get_instance()
663 ->get_max_address_resolution_attempts() )
664 { // too many re-tries
665 GlobalLogger.info() << LogPrefix << "Not scheduling a retry since "
666 << "RetryCount " << RetryCount << " too high";
667 handle_unavailable(); // will call stop_trying i.e. reset RetryCount
671 GlobalLogger.info() << LogPrefix << "Scheduling a retry (RetryCount="
672 << RetryCount << ")";
673 PauseBeforeRetryTimer.expires_from_now(
674 seconds(Config::PauseBeforeRetrySeconds));
675 PauseBeforeRetryTimer.async_wait(
676 boost::bind( &DnsResolver::wait_timer_timeout_handler,
677 this, boost::asio::placeholders::error) );
681 void DnsResolver::wait_timer_timeout_handler(
682 const boost::system::error_code &error)
684 if ( error == boost::asio::error::operation_aborted ) // cancelled
685 { // assume that our code cancelled this timer, so callbacks will be
687 GlobalLogger.debug() << LogPrefix
688 << "Resolve wait timer was cancelled! ";
691 { // not sure what to do here, but callers waiting forever for a callback
692 // is probably the worst thing to happen, so call finalize_resolve
693 GlobalLogger.warning() << LogPrefix
694 << "resolve wait handler received error "
695 << error << "! Try to finalize resolve";
696 bool was_success = false;
697 finalize_resolve(was_success);
699 else if ( OperationCancelled )
700 { // async_resolve was cancelled --> callbacks already called
701 GlobalLogger.info() << LogPrefix
702 << "Ignoring waiting timeout since we were cancelled";
707 GlobalLogger.info() << LogPrefix << "Done waiting --> re-try resolve";
708 IsResolving = false; // will be set to true immediately in do_resolve
714 //==============================================================================
716 //==============================================================================
718 HostAddress DnsResolver::get_next_ip(bool check_up_to_date)
721 // (do not use arg check_up_to_date here in order to give NextIpIndex
722 // a chance to stay above number of outdated IPs)
723 HostAddressVec cached_data = ResolverBase::get_cached_ips_recursively();
725 // if no results cached, return default-constructed HostAddress (0.0.0.0)
726 HostAddress return_candidate;
727 if ( cached_data.empty() )
729 GlobalLogger.debug() << LogPrefix << "Get next IP: nothing cached";
730 return return_candidate;
733 std::size_t n_iter = 0;
734 std::size_t n_ips = cached_data.size();
735 uint32_t ttl_thresh = static_cast<uint32_t>( DnsMaster::get_instance()
736 ->get_resolved_ip_ttl_threshold() );
738 GlobalLogger.info() << LogPrefix << "Get next IP from cached result of "
739 << n_ips << " IPs; first index to consider is " << NextIpIndex
740 << "; TTL thresh=" << ttl_thresh << "s is used: " << check_up_to_date;
742 // loop until we have found a cached result (that is up to date)
743 // or until we have tried all cached IPs
746 // check index since cache size may have changed since last call
747 if (NextIpIndex >= n_ips)
749 GlobalLogger.debug() << LogPrefix << "Reset NextIpIndex";
752 else if ( n_iter >= n_ips)
754 GlobalLogger.debug() << LogPrefix << "No IP found";
755 return HostAddress(); // have checked all candidates
758 { // there are candidates left to consider
759 GlobalLogger.debug() << LogPrefix << "Check IP candidate at index "
761 return_candidate = cached_data[NextIpIndex++];
762 if (!check_up_to_date)
764 GlobalLogger.debug() << LogPrefix << "not checking ttl, accept";
765 return return_candidate;
767 else if (return_candidate.get_ttl().get_updated_value()
770 GlobalLogger.debug() << LogPrefix << "is up to date, accept";
771 return return_candidate;
775 GlobalLogger.debug() << LogPrefix << "is out of date ("
776 << return_candidate.get_ttl().get_updated_value()
777 << "s <= " << ttl_thresh << "s), continue";
784 bool DnsResolver::have_up_to_date_ip()
786 return get_resolved_ip_count(true) > 0;
789 int DnsResolver::get_resolved_ip_count(const bool check_up_to_date)
791 // run with empty hostname --> uses internal var Hostname
792 return ResolverBase::get_cached_ips_recursively("",check_up_to_date).size();