resolver.go 54 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665
  1. /*
  2. * Copyright (c) 2022, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. // Package resolver implements a DNS stub resolver, or DNS client, which
  20. // resolves domain names.
  21. //
  22. // The resolver is Psiphon-specific and oriented towards blocking resistance.
  23. // See ResolveIP for more details.
  24. package resolver
  25. import (
  26. "context"
  27. "encoding/hex"
  28. "fmt"
  29. "net"
  30. "sync"
  31. "sync/atomic"
  32. "syscall"
  33. "time"
  34. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  35. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
  36. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/parameters"
  37. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
  38. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/transforms"
  39. lrucache "github.com/cognusion/go-cache-lru"
  40. "github.com/miekg/dns"
  41. )
  42. const (
  43. resolverCacheDefaultTTL = 1 * time.Minute
  44. resolverCacheReapFrequency = 1 * time.Minute
  45. resolverCacheMaxEntries = 10000
  46. resolverServersUpdateTTL = 5 * time.Second
  47. resolverDefaultAttemptsPerServer = 2
  48. resolverDefaultRequestTimeout = 5 * time.Second
  49. resolverDefaultAwaitTimeout = 10 * time.Millisecond
  50. resolverDefaultAnswerTTL = 1 * time.Minute
  51. resolverDNSPort = "53"
  52. udpPacketBufferSize = 1232
  53. )
  54. // NetworkConfig specifies network-level configuration for a Resolver.
  55. type NetworkConfig struct {
  56. // GetDNSServers returns a list of system DNS server addresses (IP:port, or
  57. // IP only with port 53 assumed), as determined via OS APIs, in priority
  58. // order. GetDNSServers may be nil.
  59. GetDNSServers func() []string
  60. // BindToDevice should ensure the input file descriptor, a UDP socket, is
  61. // excluded from VPN routing. BindToDevice may be nil.
  62. BindToDevice func(fd int) (string, error)
  63. // AllowDefaultResolverWithBindToDevice indicates that it's safe to use
  64. // the default resolver when BindToDevice is configured, as the host OS
  65. // will automatically exclude DNS requests from the VPN.
  66. AllowDefaultResolverWithBindToDevice bool
  67. // IPv6Synthesize should apply NAT64 synthesis to the input IPv4 address,
  68. // returning a synthesized IPv6 address that will route to the same
  69. // endpoint. IPv6Synthesize may be nil.
  70. IPv6Synthesize func(IPv4 string) string
  71. // HasIPv6Route should return true when the host has an IPv6 route.
  72. // Resolver has an internal implementation, hasRoutableIPv6Interface, to
  73. // determine this, but it can fail on some platforms ("route ip+net:
  74. // netlinkrib: permission denied" on Android, for example; see Go issue
  75. // 40569). When HasIPv6Route is nil, the internal implementation is used.
  76. HasIPv6Route func() bool
  77. // LogWarning is an optional callback which is used to log warnings and
  78. // transient errors which would otherwise not be recorded or returned.
  79. LogWarning func(error)
  80. // LogHostnames indicates whether to log hostname in errors or not.
  81. LogHostnames bool
  82. // CacheExtensionInitialTTL specifies a minimum TTL to use when caching
  83. // domain resolution results. This minimum will override any TTL in the
  84. // DNS response. CacheExtensionInitialTTL is off when 0.
  85. CacheExtensionInitialTTL time.Duration
  86. // CacheExtensionVerifiedTTL specifies the minimum TTL to set for a cached
  87. // domain resolution result after the result has been verified.
  88. // CacheExtensionVerifiedTTL is off when 0.
  89. //
  90. // DNS cache extension is a workaround to partially mitigate issues with
  91. // obtaining underlying system DNS server IPs on platforms such as iOS
  92. // once a VPN is running and after network changes, such as changing from
  93. // Wi-Fi to mobile. While ResolveParameters.AlternateDNSServer can be
  94. // used to specify a known public DNS server, it may be the case that
  95. // public DNS servers are blocked or always falling back to a public DNS
  96. // server creates unusual traffic. And while it may be possible to use
  97. // the default system resolver, it lacks certain circumvention
  98. // capabilities.
  99. //
  100. // Extending the TTL for cached responses allows Psiphon to redial domains
  101. // using recently successful IPs.
  102. //
  103. // CacheExtensionInitialTTL allows for a greater initial minimum TTL, so
  104. // that the response entry remains in the cache long enough for a dial to
  105. // fully complete and verify the endpoint. Psiphon will call
  106. // Resolver.VerifyExtendCacheTTL once a dial has authenticated, for
  107. // example, the destination Psiphon server. VerifyCacheExtension will
  108. // further extend the corresponding TTL to CacheExtensionVerifiedTTL, a
  109. // longer TTL. CacheExtensionInitialTTL is intended to be on the order of
  110. // minutes and CacheExtensionVerifiedTTL may be on the order of hours.
  111. //
  112. // When CacheExtensionVerifiedTTL is on, the DNS cache is not flushed on
  113. // network changes, to allow for the previously cached entries to remain
  114. // available in the problematic scenario. Like adjusting TTLs, this is an
  115. // explicit trade-off which doesn't adhere to standard best practise, but
  116. // is expected to be more blocking resistent; this approach also assumes
  117. // that endpoints such as CDN IPs are typically available on any network.
  118. CacheExtensionVerifiedTTL time.Duration
  119. }
  120. func (c *NetworkConfig) allowDefaultResolver() bool {
  121. // When BindToDevice is configured, the standard library resolver is not
  122. // used, as the system resolver may not route outside of the VPN.
  123. return c.BindToDevice == nil || c.AllowDefaultResolverWithBindToDevice
  124. }
  125. func (c *NetworkConfig) logWarning(err error) {
  126. if c.LogWarning != nil {
  127. c.LogWarning(err)
  128. }
  129. }
  130. // ResolveParameters specifies the configuration and behavior of a single
  131. // ResolveIP call, a single domain name resolution.
  132. //
  133. // New ResolveParameters may be generated by calling MakeResolveParameters,
  134. // which takes tactics parameters as an input.
  135. //
  136. // ResolveParameters may be persisted for replay.
  137. type ResolveParameters struct {
  138. // AttemptsPerServer specifies how many requests to send to each DNS
  139. // server before trying the next server. IPv4 and IPv6 requests are sent
  140. // concurrently and count as one attempt.
  141. AttemptsPerServer int
  142. // AttemptsPerPreferredServer is AttemptsPerServer for a preferred
  143. // alternate DNS server.
  144. AttemptsPerPreferredServer int
  145. // RequestTimeout specifies how long to wait for a valid response before
  146. // moving on to the next attempt.
  147. RequestTimeout time.Duration
  148. // AwaitTimeout specifies how long to await an additional response after
  149. // the first response is received. This additional wait time applies only
  150. // when there is either no IPv4 or IPv6 response.
  151. AwaitTimeout time.Duration
  152. // PreresolvedIPAddress specifies an IP address result to be used in place
  153. // of making a request.
  154. PreresolvedIPAddress string
  155. // AlternateDNSServer specifies an alterate DNS server (IP:port, or IP
  156. // only with port 53 assumed) to be used when either no system DNS
  157. // servers are available or when PreferAlternateDNSServer is set.
  158. AlternateDNSServer string
  159. // PreferAlternateDNSServer indicates whether to prioritize using the
  160. // AlternateDNSServer. When set, the AlternateDNSServer is attempted
  161. // before any system DNS servers.
  162. PreferAlternateDNSServer bool
  163. // ProtocolTransformName specifies the name associated with
  164. // ProtocolTransformSpec and is used for metrics.
  165. ProtocolTransformName string
  166. // ProtocolTransformSpec specifies a transform to apply to the DNS request packet.
  167. // See: "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/transforms".
  168. //
  169. // As transforms operate on strings and DNS requests are binary,
  170. // transforms should be expressed using hex characters.
  171. //
  172. // DNS transforms include strategies discovered by the Geneva team,
  173. // https://geneva.cs.umd.edu.
  174. ProtocolTransformSpec transforms.Spec
  175. // ProtocolTransformSeed specifies the seed to use for generating random
  176. // data in the ProtocolTransformSpec transform. To replay a transform,
  177. // specify the same seed.
  178. ProtocolTransformSeed *prng.Seed
  179. // IncludeEDNS0 indicates whether to include the EDNS(0) UDP maximum
  180. // response size extension in DNS requests. The resolver can handle
  181. // responses larger than 512 bytes (RFC 1035 maximum) regardless of
  182. // whether the extension is included; the extension may be included as
  183. // part of appearing similar to other DNS traffic.
  184. IncludeEDNS0 bool
  185. firstAttemptWithAnswer int32
  186. }
  187. // GetFirstAttemptWithAnswer returns the index of the first request attempt
  188. // that received a valid response, for the most recent ResolveIP call using
  189. // this ResolveParameters. This information is used for logging metrics. The
  190. // first attempt has index 1. GetFirstAttemptWithAnswer return 0 when no
  191. // request attempt has reported a valid response.
  192. //
  193. // The caller is responsible for synchronizing use of a ResolveParameters
  194. // instance (e.g, use a distinct ResolveParameters per ResolveIP to ensure
  195. // GetFirstAttemptWithAnswer refers to a specific ResolveIP).
  196. func (r *ResolveParameters) GetFirstAttemptWithAnswer() int {
  197. return int(atomic.LoadInt32(&r.firstAttemptWithAnswer))
  198. }
  199. func (r *ResolveParameters) setFirstAttemptWithAnswer(attempt int) {
  200. atomic.StoreInt32(&r.firstAttemptWithAnswer, int32(attempt))
  201. }
  202. // Implementation note: Go's standard net.Resolver supports specifying a
  203. // custom Dial function. This could be used to implement at least a large
  204. // subset of the Resolver functionality on top of Go's standard library
  205. // resolver. However, net.Resolver is limited to using the CGO resolver on
  206. // Android, https://github.com/golang/go/issues/8877, in which case the
  207. // custom Dial function is not used. Furthermore, the the pure Go resolver in
  208. // net/dnsclient_unix.go appears to not be used on Windows at this time.
  209. //
  210. // Go also provides golang.org/x/net/dns/dnsmessage, a DNS message marshaller,
  211. // which could potentially be used in place of github.com/miekg/dns.
  212. // Resolver is a DNS stub resolver, or DNS client, which resolves domain
  213. // names. A Resolver instance maintains a cache, a network state snapshot,
  214. // and metrics. All ResolveIP calls will share the same cache and state.
  215. // Multiple concurrent ResolveIP calls are supported.
  216. type Resolver struct {
  217. networkConfig *NetworkConfig
  218. mutex sync.Mutex
  219. networkID string
  220. hasIPv6Route bool
  221. systemServers []string
  222. lastServersUpdate time.Time
  223. cache *lrucache.Cache
  224. metrics resolverMetrics
  225. }
  226. type resolverMetrics struct {
  227. resolves int
  228. cacheHits int
  229. verifiedCacheExtensions int
  230. requestsIPv4 int
  231. requestsIPv6 int
  232. responsesIPv4 int
  233. responsesIPv6 int
  234. defaultResolves int
  235. defaultSuccesses int
  236. peakInFlight int64
  237. minRTT time.Duration
  238. maxRTT time.Duration
  239. }
  240. func newResolverMetrics() resolverMetrics {
  241. return resolverMetrics{minRTT: -1}
  242. }
  243. // NewResolver creates a new Resolver instance.
  244. func NewResolver(networkConfig *NetworkConfig, networkID string) *Resolver {
  245. r := &Resolver{
  246. networkConfig: networkConfig,
  247. metrics: newResolverMetrics(),
  248. }
  249. // updateNetworkState will initialize the cache and network state,
  250. // including system DNS servers.
  251. r.updateNetworkState(networkID)
  252. return r
  253. }
  254. // Stop clears the Resolver cache and resets metrics. Stop must be called only
  255. // after ceasing all in-flight ResolveIP goroutines, or else the cache or
  256. // metrics may repopulate. A Resolver may be resumed after calling Stop, but
  257. // Update must be called first.
  258. func (r *Resolver) Stop() {
  259. r.mutex.Lock()
  260. defer r.mutex.Unlock()
  261. // r.networkConfig is not set to nil to avoid possible nil pointer
  262. // dereferences by concurrent ResolveIP calls.
  263. r.networkID = ""
  264. r.hasIPv6Route = false
  265. r.systemServers = nil
  266. r.cache.Flush()
  267. r.metrics = newResolverMetrics()
  268. }
  269. // MakeResolveParameters generates ResolveParameters using the input tactics
  270. // parameters and optional frontingProviderID context.
  271. func (r *Resolver) MakeResolveParameters(
  272. p parameters.ParametersAccessor,
  273. frontingProviderID string) (*ResolveParameters, error) {
  274. params := &ResolveParameters{
  275. AttemptsPerServer: p.Int(parameters.DNSResolverAttemptsPerServer),
  276. AttemptsPerPreferredServer: p.Int(parameters.DNSResolverAttemptsPerPreferredServer),
  277. RequestTimeout: p.Duration(parameters.DNSResolverRequestTimeout),
  278. AwaitTimeout: p.Duration(parameters.DNSResolverAwaitTimeout),
  279. }
  280. // When a frontingProviderID is specified, generate a pre-resolved IP
  281. // address, based on tactics configuration.
  282. if frontingProviderID != "" {
  283. if p.WeightedCoinFlip(parameters.DNSResolverPreresolvedIPAddressProbability) {
  284. CIDRs := p.LabeledCIDRs(parameters.DNSResolverPreresolvedIPAddressCIDRs, frontingProviderID)
  285. if len(CIDRs) > 0 {
  286. CIDR := CIDRs[prng.Intn(len(CIDRs))]
  287. IP, err := generateIPAddressFromCIDR(CIDR)
  288. if err != nil {
  289. return nil, errors.Trace(err)
  290. }
  291. params.PreresolvedIPAddress = IP.String()
  292. }
  293. }
  294. }
  295. // When PreresolvedIPAddress is set, there's no DNS request and the
  296. // following params can be skipped.
  297. if params.PreresolvedIPAddress != "" {
  298. return params, nil
  299. }
  300. // When preferring an alternate DNS server, select the alternate from
  301. // DNSResolverPreferredAlternateServers. This list is for circumvention
  302. // operations, such as using a public DNS server with a protocol
  303. // transform. Otherwise, select from DNSResolverAlternateServers, which
  304. // is a fallback list of DNS servers to be used when the system DNS
  305. // servers cannot be obtained.
  306. preferredServers := p.Strings(parameters.DNSResolverPreferredAlternateServers)
  307. preferAlternateDNSServer := len(preferredServers) > 0 && p.WeightedCoinFlip(
  308. parameters.DNSResolverPreferAlternateServerProbability)
  309. alternateServers := preferredServers
  310. if !preferAlternateDNSServer {
  311. alternateServers = p.Strings(parameters.DNSResolverAlternateServers)
  312. }
  313. // Select an alternate DNS server, typically a public DNS server. Ensure
  314. // tactics is configured with an empty DNSResolverAlternateServers list
  315. // in cases where attempts to public DNS server are unwanted.
  316. if len(alternateServers) > 0 {
  317. alternateServer := alternateServers[prng.Intn(len(alternateServers))]
  318. // Check that the alternateServer has a well-formed IP address; and add
  319. // a default port if none it present.
  320. host, _, err := net.SplitHostPort(alternateServer)
  321. if err != nil {
  322. // Assume the SplitHostPort error is due to missing port.
  323. host = alternateServer
  324. alternateServer = net.JoinHostPort(alternateServer, resolverDNSPort)
  325. }
  326. if net.ParseIP(host) == nil {
  327. // Log warning and proceed without this DNS server.
  328. r.networkConfig.logWarning(
  329. errors.TraceNew("invalid alternate DNS server IP address"))
  330. } else {
  331. params.AlternateDNSServer = alternateServer
  332. params.PreferAlternateDNSServer = preferAlternateDNSServer
  333. }
  334. }
  335. // Select a DNS transform. DNS request transforms are "scoped" by
  336. // alternate DNS server (IP address without port); that is, when an
  337. // alternate DNS server is certain to be attempted first, a transform
  338. // associated with and known to work with that DNS server will be
  339. // selected. Otherwise, a transform from the default scope
  340. // (transforms.SCOPE_ANY == "") is selected.
  341. //
  342. // In any case, ResolveIP will only apply a transform on the first request
  343. // attempt.
  344. if p.WeightedCoinFlip(parameters.DNSResolverProtocolTransformProbability) {
  345. specs := p.ProtocolTransformSpecs(
  346. parameters.DNSResolverProtocolTransformSpecs)
  347. scopedSpecNames := p.ProtocolTransformScopedSpecNames(
  348. parameters.DNSResolverProtocolTransformScopedSpecNames)
  349. // The alternate DNS server will be the first attempt if
  350. // PreferAlternateDNSServer or the list of system DNS servers is empty.
  351. //
  352. // Limitation: the system DNS server list may change, due to a later
  353. // Resolver.update call when ResolveIP is called with these
  354. // ResolveParameters.
  355. _, systemServers := r.getNetworkState()
  356. scope := transforms.SCOPE_ANY
  357. if params.AlternateDNSServer != "" &&
  358. (params.PreferAlternateDNSServer || len(systemServers) == 0) {
  359. // Remove the port number, as the scope key is an IP address only.
  360. //
  361. // TODO: when we only just added the default port above, which is
  362. // the common case, we could avoid this extra split.
  363. host, _, err := net.SplitHostPort(params.AlternateDNSServer)
  364. if err != nil {
  365. return nil, errors.Trace(err)
  366. }
  367. scope = host
  368. }
  369. name, spec := specs.Select(scope, scopedSpecNames)
  370. if spec != nil {
  371. params.ProtocolTransformName = name
  372. params.ProtocolTransformSpec = spec
  373. var err error
  374. params.ProtocolTransformSeed, err = prng.NewSeed()
  375. if err != nil {
  376. return nil, errors.Trace(err)
  377. }
  378. }
  379. }
  380. if p.WeightedCoinFlip(parameters.DNSResolverIncludeEDNS0Probability) {
  381. params.IncludeEDNS0 = true
  382. }
  383. return params, nil
  384. }
  385. // ResolveAddress splits the input host:port address, calls ResolveIP to
  386. // resolve the IP address of the host, selects an IP if there are multiple,
  387. // and returns a rejoined IP:port.
  388. func (r *Resolver) ResolveAddress(
  389. ctx context.Context,
  390. networkID string,
  391. params *ResolveParameters,
  392. address string) (string, error) {
  393. hostname, port, err := net.SplitHostPort(address)
  394. if err != nil {
  395. return "", errors.Trace(err)
  396. }
  397. IPs, err := r.ResolveIP(ctx, networkID, params, hostname)
  398. if err != nil {
  399. return "", errors.Trace(err)
  400. }
  401. return net.JoinHostPort(IPs[prng.Intn(len(IPs))].String(), port), nil
  402. }
  403. // ResolveIP resolves a domain name.
  404. //
  405. // The input params may be nil, in which case default timeouts are used.
  406. //
  407. // ResolveIP performs concurrent A and AAAA lookups, returns any valid
  408. // response IPs, and caches results. An error is returned when there are
  409. // no valid response IPs.
  410. //
  411. // ResolveIP is not a general purpose resolver and is Psiphon-specific. For
  412. // example, resolved domains are expected to exist; ResolveIP does not
  413. // fallback to TCP; does not consult any "hosts" file; does not perform RFC
  414. // 3484 sorting logic (see Go issue 18518); only implements a subset of
  415. // Go/glibc/resolv.conf(5) resolver parameters (attempts and timeouts, but
  416. // not rotate, single-request etc.) ResolveIP does not implement singleflight
  417. // logic, as the Go resolver does, and allows multiple concurrent request for
  418. // the same domain -- Psiphon won't often resolve the exact same domain
  419. // multiple times concurrently, and, when it does, there's a circumvention
  420. // benefit to attempting different DNS servers and protocol transforms.
  421. //
  422. // ResolveIP does not currently support DoT, DoH, or TCP; those protocols are
  423. // often blocked or less common. Instead, ResolveIP makes a best effort to
  424. // evade plaintext UDP DNS interference by ignoring invalid responses and by
  425. // optionally applying protocol transforms that may evade blocking.
  426. func (r *Resolver) ResolveIP(
  427. ctx context.Context,
  428. networkID string,
  429. params *ResolveParameters,
  430. hostname string) ([]net.IP, error) {
  431. // ResolveIP does _not_ lock r.mutex for the lifetime of the function, to
  432. // ensure many ResolveIP calls can run concurrently.
  433. // If the hostname is already an IP address, just return that. For
  434. // metrics, this does not count as a resolve, as the caller may invoke
  435. // ResolveIP for all dials.
  436. IP := net.ParseIP(hostname)
  437. if IP != nil {
  438. return []net.IP{IP}, nil
  439. }
  440. // Count all resolves of an actual domain, including cached and
  441. // pre-resolved cases.
  442. r.updateMetricResolves()
  443. // Call updateNetworkState immediately before resolving, as a best effort
  444. // to ensure that system DNS servers and IPv6 routing network state
  445. // reflects the current network. updateNetworkState locks the Resolver
  446. // mutex for its duration, and so concurrent ResolveIP calls may block at
  447. // this point. However, all updateNetworkState operations are local to
  448. // the host or device; and, if the networkID is unchanged since the last
  449. // call, updateNetworkState may not perform any operations; and after the
  450. // updateNetworkState call, ResolveIP proceeds without holding the mutex
  451. // lock. As a result, this step should not prevent ResolveIP concurrency.
  452. r.updateNetworkState(networkID)
  453. if params == nil {
  454. // Supply default ResolveParameters
  455. params = &ResolveParameters{
  456. AttemptsPerServer: resolverDefaultAttemptsPerServer,
  457. AttemptsPerPreferredServer: resolverDefaultAttemptsPerServer,
  458. RequestTimeout: resolverDefaultRequestTimeout,
  459. AwaitTimeout: resolverDefaultAwaitTimeout,
  460. }
  461. }
  462. // When PreresolvedIPAddress is set, tactics parameters determined the IP address
  463. // in this case.
  464. if params.PreresolvedIPAddress != "" {
  465. IP := net.ParseIP(params.PreresolvedIPAddress)
  466. if IP == nil {
  467. // Unexpected case, as MakeResolveParameters selects the IP address.
  468. return nil, errors.TraceNew("invalid IP address")
  469. }
  470. return []net.IP{IP}, nil
  471. }
  472. // Use a snapshot of the current network state, including IPv6 routing and
  473. // system DNS servers.
  474. //
  475. // Limitation: these values are used even if the network changes in the
  476. // middle of a ResolveIP call; ResolveIP is not interrupted if the
  477. // network changes.
  478. hasIPv6Route, systemServers := r.getNetworkState()
  479. // Use the standard library resolver when there's no GetDNSServers, or the
  480. // system server list is otherwise empty, and no alternate DNS server is
  481. // configured.
  482. //
  483. // Note that in the case where there are no system DNS servers and there
  484. // is an AlternateDNSServer, if the AlternateDNSServer attempt fails,
  485. // control does not flow back to defaultResolverLookupIP. On platforms
  486. // without GetDNSServers, the caller must arrange for distinct attempts
  487. // that try a AlternateDNSServer, or just use the standard library
  488. // resolver.
  489. //
  490. // ResolveIP should always be called, even when defaultResolverLookupIP is
  491. // expected to be used, to ensure correct metrics counts and ensure a
  492. // consistent error message log stack for all DNS-related failures.
  493. //
  494. if len(systemServers) == 0 &&
  495. params.AlternateDNSServer == "" &&
  496. r.networkConfig.allowDefaultResolver() {
  497. IPs, err := defaultResolverLookupIP(ctx, hostname, r.networkConfig.LogHostnames)
  498. r.updateMetricDefaultResolver(err == nil)
  499. if err != nil {
  500. return nil, errors.Trace(err)
  501. }
  502. return IPs, err
  503. }
  504. // Consult the cache before making queries. This comes after the standard
  505. // library case, to allow the standard library to provide its own caching
  506. // logic.
  507. IPs := r.getCache(hostname)
  508. if IPs != nil {
  509. return IPs, nil
  510. }
  511. // Set the list of DNS servers to attempt. AlternateDNSServer is used
  512. // first when PreferAlternateDNSServer is set; otherwise
  513. // AlternateDNSServer is used only when there is no system DNS server.
  514. var servers []string
  515. if params.AlternateDNSServer != "" &&
  516. (len(systemServers) == 0 || params.PreferAlternateDNSServer) {
  517. servers = []string{params.AlternateDNSServer}
  518. }
  519. servers = append(servers, systemServers...)
  520. if len(servers) == 0 {
  521. return nil, errors.TraceNew("no DNS servers")
  522. }
  523. // Set the request timeout and set up a reusable timer for handling
  524. // request and await timeouts.
  525. //
  526. // We expect to always have a request timeout. Handle the unexpected no
  527. // timeout, 0, case by setting the longest timeout possible, ~290 years;
  528. // always having a non-zero timeout makes the following code marginally
  529. // simpler.
  530. requestTimeout := params.RequestTimeout
  531. if requestTimeout == 0 {
  532. requestTimeout = 1<<63 - 1
  533. }
  534. var timer *time.Timer
  535. timerDrained := true
  536. resetTimer := func(timeout time.Duration) {
  537. if timer == nil {
  538. timer = time.NewTimer(timeout)
  539. } else {
  540. if !timerDrained && !timer.Stop() {
  541. <-timer.C
  542. }
  543. timer.Reset(timeout)
  544. }
  545. timerDrained = false
  546. }
  547. // Orchestrate the DNS requests
  548. resolveCtx, cancelFunc := context.WithCancel(ctx)
  549. defer cancelFunc()
  550. waitGroup := new(sync.WaitGroup)
  551. conns := common.NewConns()
  552. type answer struct {
  553. attempt int
  554. IPs []net.IP
  555. TTLs []time.Duration
  556. }
  557. var maxAttempts int
  558. if params.PreferAlternateDNSServer {
  559. maxAttempts = params.AttemptsPerPreferredServer
  560. maxAttempts += (len(servers) - 1) * params.AttemptsPerServer
  561. } else {
  562. maxAttempts = len(servers) * params.AttemptsPerServer
  563. }
  564. answerChan := make(chan *answer, maxAttempts*2)
  565. inFlight := int64(0)
  566. awaitA := int32(1)
  567. awaitAAAA := int32(1)
  568. if !hasIPv6Route {
  569. awaitAAAA = 0
  570. }
  571. var result *answer
  572. var lastErr atomic.Value
  573. stop := false
  574. for i := 0; !stop && i < maxAttempts; i++ {
  575. var index int
  576. if params.PreferAlternateDNSServer {
  577. if i < params.AttemptsPerPreferredServer {
  578. index = 0
  579. } else {
  580. index = 1 + ((i - params.AttemptsPerPreferredServer) / params.AttemptsPerServer)
  581. }
  582. } else {
  583. index = i / params.AttemptsPerServer
  584. }
  585. server := servers[index]
  586. // Only the first attempt pair tries transforms, as it's not certain
  587. // the transforms will be compatible with DNS servers.
  588. useProtocolTransform := (i == 0 && params.ProtocolTransformSpec != nil)
  589. // Send A and AAAA requests concurrently.
  590. questionTypes := []resolverQuestionType{resolverQuestionTypeA, resolverQuestionTypeAAAA}
  591. if !hasIPv6Route {
  592. questionTypes = questionTypes[0:1]
  593. }
  594. for _, questionType := range questionTypes {
  595. waitGroup.Add(1)
  596. // For metrics, track peak concurrent in-flight requests for
  597. // a _single_ ResolveIP. inFlight for this ResolveIP is also used
  598. // to determine whether to await additional responses once the
  599. // first, valid response is received. For that logic to be
  600. // correct, we must increment inFlight in this outer goroutine to
  601. // ensure the await logic sees either inFlight > 0 or an answer
  602. // in the channel.
  603. r.updateMetricPeakInFlight(atomic.AddInt64(&inFlight, 1))
  604. go func(attempt int, questionType resolverQuestionType, useProtocolTransform bool) {
  605. defer waitGroup.Done()
  606. // We must decrement inFlight only after sending an answer and
  607. // setting awaitA or awaitAAAA to ensure that the await logic
  608. // in the outer goroutine will see inFlight 0 only once those
  609. // operations are complete.
  610. //
  611. // We cannot wait and decrement inFlight when the outer
  612. // goroutine receives answers, as no answer is sent in some
  613. // cases, such as when the resolve fails due to NXDOMAIN.
  614. defer atomic.AddInt64(&inFlight, -1)
  615. // The request count metric counts the _intention_ to send
  616. // requests, as there's a possibility that newResolverConn or
  617. // performDNSQuery fail locally before sending a request packet.
  618. switch questionType {
  619. case resolverQuestionTypeA:
  620. r.updateMetricRequestsIPv4()
  621. case resolverQuestionTypeAAAA:
  622. r.updateMetricRequestsIPv6()
  623. }
  624. // While it's possible, and potentially more optimal, to use
  625. // the same UDP socket for both the A and AAAA request, we
  626. // use a distinct socket per request, as common DNS clients do.
  627. conn, err := r.newResolverConn(r.networkConfig.logWarning, server)
  628. if err != nil {
  629. lastErr.Store(errors.Trace(err))
  630. return
  631. }
  632. defer conn.Close()
  633. // There's no context.Context support in the underlying API
  634. // used by performDNSQuery, so instead collect all the
  635. // request conns so that they can be closed, and any blocking
  636. // network I/O interrupted, below, if resolveCtx is done.
  637. if !conns.Add(conn) {
  638. // Add fails when conns is already closed.
  639. return
  640. }
  641. // performDNSQuery will send the request and read a response.
  642. // performDNSQuery will continue reading responses until it
  643. // receives a valid response, which can mitigate a subset of
  644. // DNS injection attacks (to the limited extent possible for
  645. // plaintext DNS).
  646. //
  647. // For IPv4, NXDOMAIN or a response with no IPs is not
  648. // expected for domains resolved by Psiphon, so
  649. // performDNSQuery treats such a response as invalid. For
  650. // IPv6, a response with no IPs, may be valid(even though the
  651. // response could be forged); the resolver will continue its
  652. // attempts loop if it has no other IPs.
  653. //
  654. // Each performDNSQuery has no timeout and runs
  655. // until it has read a valid response or the requestCtx is
  656. // done. This allows for slow arriving, valid responses to
  657. // eventually succeed, even if the read time exceeds
  658. // requestTimeout, as long as the read time is less than the
  659. // requestCtx timeout.
  660. //
  661. // With this approach, the overall ResolveIP call may have
  662. // more than 2 performDNSQuery requests in-flight at a time,
  663. // as requestTimeout is used to schedule sending the next
  664. // attempt but not cancel the current attempt. For
  665. // connectionless UDP, the resulting network traffic should
  666. // be similar to common DNS clients which do cancel request
  667. // before beginning the next attempt.
  668. IPs, TTLs, RTT, err := performDNSQuery(
  669. resolveCtx,
  670. r.networkConfig.logWarning,
  671. params,
  672. useProtocolTransform,
  673. conn,
  674. questionType,
  675. hostname)
  676. // Update the min/max RTT metric when reported (>=0) even if
  677. // the result is an error; i.e., the even if there was an
  678. // invalid response.
  679. //
  680. // Limitation: since individual requests aren't cancelled
  681. // after requestTimeout, RTT metrics won't reflect
  682. // no-response cases, although request and response count
  683. // disparities will still show up in the metrics.
  684. if RTT >= 0 {
  685. r.updateMetricRTT(RTT)
  686. }
  687. if err != nil {
  688. lastErr.Store(errors.Trace(err))
  689. return
  690. }
  691. if len(IPs) > 0 {
  692. select {
  693. case answerChan <- &answer{attempt: attempt, IPs: IPs, TTLs: TTLs}:
  694. default:
  695. }
  696. }
  697. // Mark no longer awaiting A or AAAA as long as there is a
  698. // valid response, even if there are no IPs in the IPv6 case.
  699. switch questionType {
  700. case resolverQuestionTypeA:
  701. r.updateMetricResponsesIPv4()
  702. atomic.StoreInt32(&awaitA, 0)
  703. case resolverQuestionTypeAAAA:
  704. r.updateMetricResponsesIPv6()
  705. atomic.StoreInt32(&awaitAAAA, 0)
  706. default:
  707. }
  708. }(i+1, questionType, useProtocolTransform)
  709. }
  710. resetTimer(requestTimeout)
  711. select {
  712. case result = <-answerChan:
  713. // When the first answer, a response with valid IPs, arrives, exit
  714. // the attempts loop. The following await branch may collect
  715. // additional answers.
  716. params.setFirstAttemptWithAnswer(result.attempt)
  717. stop = true
  718. case <-timer.C:
  719. // When requestTimeout arrives, loop around and launch the next
  720. // attempt; leave the existing requests running in case they
  721. // eventually respond.
  722. timerDrained = true
  723. case <-resolveCtx.Done():
  724. // When resolveCtx is done, exit the attempts loop.
  725. //
  726. // Append the existing lastErr, which may convey useful
  727. // information to be reported in a failed_tunnel error message.
  728. lastErr.Store(errors.Tracef("%v (lastErr: %v)", ctx.Err(), lastErr.Load()))
  729. stop = true
  730. }
  731. }
  732. // Receive any additional answers, now present in the channel, which
  733. // arrived concurrent with the first answer. This receive avoids a race
  734. // condition where inFlight may now be 0, with additional answers
  735. // enqueued, in which case the following await branch is not taken.
  736. //
  737. // It's possible for the attempts loop to exit with no received answer due
  738. // to timeouts or cancellation while, concurrently, an answer is sent to
  739. // the channel. In this case, when result == nil, we ignore the answers
  740. // and leave this as a failed resolve.
  741. if result != nil {
  742. for loop := true; loop; {
  743. select {
  744. case nextAnswer := <-answerChan:
  745. result.IPs = append(result.IPs, nextAnswer.IPs...)
  746. result.TTLs = append(result.TTLs, nextAnswer.TTLs...)
  747. default:
  748. loop = false
  749. }
  750. }
  751. }
  752. // When we have an answer, await -- for a short time,
  753. // params.AwaitTimeout -- extra answers from any remaining in-flight
  754. // requests. Only await if the request isn't cancelled and we don't
  755. // already have at least one IPv4 and one IPv6 response; only await AAAA
  756. // if it was sent; note that a valid AAAA response may include no IPs
  757. // lastErr is not set in timeout/cancelled cases here, since we already
  758. // have an answer.
  759. if result != nil &&
  760. resolveCtx.Err() == nil &&
  761. atomic.LoadInt64(&inFlight) > 0 &&
  762. (atomic.LoadInt32(&awaitA) != 0 || atomic.LoadInt32(&awaitAAAA) != 0) &&
  763. params.AwaitTimeout > 0 {
  764. resetTimer(params.AwaitTimeout)
  765. for {
  766. stop := false
  767. select {
  768. case nextAnswer := <-answerChan:
  769. result.IPs = append(result.IPs, nextAnswer.IPs...)
  770. result.TTLs = append(result.TTLs, nextAnswer.TTLs...)
  771. case <-timer.C:
  772. timerDrained = true
  773. stop = true
  774. case <-resolveCtx.Done():
  775. stop = true
  776. }
  777. if stop ||
  778. atomic.LoadInt64(&inFlight) == 0 ||
  779. (atomic.LoadInt32(&awaitA) == 0 && atomic.LoadInt32(&awaitAAAA) == 0) {
  780. break
  781. }
  782. }
  783. }
  784. if timer != nil {
  785. timer.Stop()
  786. }
  787. // Interrupt all workers.
  788. cancelFunc()
  789. conns.CloseAll()
  790. waitGroup.Wait()
  791. // When there's no answer, return the last error.
  792. if result == nil {
  793. err := lastErr.Load()
  794. if err == nil {
  795. err = errors.TraceNew("unexpected missing error")
  796. }
  797. if r.networkConfig.LogHostnames {
  798. err = fmt.Errorf("resolve %s : %w", hostname, err.(error))
  799. }
  800. return nil, errors.Trace(err.(error))
  801. }
  802. if len(result.IPs) == 0 {
  803. // Unexpected, since a len(IPs) > 0 check precedes sending to answerChan.
  804. return nil, errors.TraceNew("unexpected no IPs")
  805. }
  806. // Update the cache now, after all results are gathered.
  807. r.setCache(hostname, result.IPs, result.TTLs)
  808. return result.IPs, nil
  809. }
  810. // VerifyCacheExtension extends the TTL for any cached result for the
  811. // specified hostname to at least NetworkConfig.CacheExtensionVerifiedTTL.
  812. func (r *Resolver) VerifyCacheExtension(hostname string) {
  813. r.mutex.Lock()
  814. defer r.mutex.Unlock()
  815. if r.networkConfig.CacheExtensionVerifiedTTL == 0 {
  816. return
  817. }
  818. if net.ParseIP(hostname) != nil {
  819. return
  820. }
  821. entry, expires, ok := r.cache.GetWithExpiration(hostname)
  822. if !ok {
  823. return
  824. }
  825. // Change the TTL only if the entry expires and the existing TTL isn't
  826. // longer than the extension.
  827. neverExpires := time.Time{}
  828. if expires == neverExpires ||
  829. expires.After(time.Now().Add(r.networkConfig.CacheExtensionVerifiedTTL)) {
  830. return
  831. }
  832. r.cache.Set(hostname, entry, r.networkConfig.CacheExtensionVerifiedTTL)
  833. r.metrics.verifiedCacheExtensions += 1
  834. }
  835. // GetMetrics returns a summary of DNS metrics.
  836. func (r *Resolver) GetMetrics() string {
  837. r.mutex.Lock()
  838. defer r.mutex.Unlock()
  839. // When r.metrics.minRTT < 0, min/maxRTT is unset.
  840. minRTT := "n/a"
  841. maxRTT := minRTT
  842. if r.metrics.minRTT >= 0 {
  843. minRTT = fmt.Sprintf("%d", r.metrics.minRTT/time.Millisecond)
  844. maxRTT = fmt.Sprintf("%d", r.metrics.maxRTT/time.Millisecond)
  845. }
  846. extend := ""
  847. if r.networkConfig.CacheExtensionVerifiedTTL > 0 {
  848. extend = fmt.Sprintf("| extend %d ", r.metrics.verifiedCacheExtensions)
  849. }
  850. defaultResolves := ""
  851. if r.networkConfig.allowDefaultResolver() {
  852. defaultResolves = fmt.Sprintf(
  853. " | def %d/%d", r.metrics.defaultResolves, r.metrics.defaultSuccesses)
  854. }
  855. // Note that the number of system resolvers is a point-in-time value,
  856. // while the others are cumulative.
  857. return fmt.Sprintf("resolves %d | hit %d %s| req v4/v6 %d/%d | resp %d/%d | peak %d | rtt %s - %s ms. | sys %d%s",
  858. r.metrics.resolves,
  859. r.metrics.cacheHits,
  860. extend,
  861. r.metrics.requestsIPv4,
  862. r.metrics.requestsIPv6,
  863. r.metrics.responsesIPv4,
  864. r.metrics.responsesIPv6,
  865. r.metrics.peakInFlight,
  866. minRTT,
  867. maxRTT,
  868. len(r.systemServers),
  869. defaultResolves)
  870. }
  871. // updateNetworkState updates the system DNS server list, IPv6 state, and the
  872. // cache.
  873. //
  874. // Any errors that occur while querying network state are logged; in error
  875. // conditions the functionality of the resolver may be reduced, but the
  876. // resolver remains operational.
  877. func (r *Resolver) updateNetworkState(networkID string) {
  878. r.mutex.Lock()
  879. defer r.mutex.Unlock()
  880. // Only perform blocking/expensive update operations when necessary.
  881. updateAll := false
  882. updateIPv6Route := false
  883. updateServers := false
  884. flushCache := false
  885. // If r.cache is nil, this is the first update call in NewResolver. Create
  886. // the cache and perform all updates.
  887. if r.cache == nil {
  888. r.cache = lrucache.NewWithLRU(
  889. resolverCacheDefaultTTL,
  890. resolverCacheReapFrequency,
  891. resolverCacheMaxEntries)
  892. updateAll = true
  893. }
  894. // Perform all updates when the networkID has changed, which indicates a
  895. // different network.
  896. if r.networkID != networkID {
  897. updateAll = true
  898. }
  899. if updateAll {
  900. updateIPv6Route = true
  901. updateServers = true
  902. flushCache = true
  903. }
  904. // Even when the networkID has not changed, update DNS servers
  905. // periodically. This is similar to how other DNS clients
  906. // poll /etc/resolv.conf, including the period of 5s.
  907. if time.Since(r.lastServersUpdate) > resolverServersUpdateTTL {
  908. updateServers = true
  909. }
  910. // Update hasIPv6Route, which indicates whether the current network has an
  911. // IPv6 route and so if DNS requests for AAAA records will be sent.
  912. // There's no use for AAAA records on IPv4-only networks; and other
  913. // common DNS clients omit AAAA requests on IPv4-only records, so these
  914. // requests would otherwise be unusual.
  915. //
  916. // There's no hasIPv4Route as we always need to resolve A records,
  917. // particularly for IPv4-only endpoints; for IPv6-only networks,
  918. // NetworkConfig.IPv6Synthesize should be used to accomodate IPv4 DNS
  919. // server addresses, and dials performed outside the Resolver will
  920. // similarly use NAT 64 (on iOS; on Android, 464XLAT will handle this
  921. // transparently).
  922. if updateIPv6Route {
  923. if r.networkConfig.HasIPv6Route != nil {
  924. r.hasIPv6Route = r.networkConfig.HasIPv6Route()
  925. } else {
  926. hasIPv6Route, err := hasRoutableIPv6Interface()
  927. if err != nil {
  928. // Log warning and proceed without IPv6.
  929. r.networkConfig.logWarning(
  930. errors.Tracef("unable to determine IPv6 route: %v", err))
  931. hasIPv6Route = false
  932. }
  933. r.hasIPv6Route = hasIPv6Route
  934. }
  935. }
  936. // Update the list of system DNS servers. It's not an error condition here
  937. // if the list is empty: a subsequent ResolveIP may use
  938. // ResolveParameters which specifies an AlternateDNSServer.
  939. if updateServers && r.networkConfig.GetDNSServers != nil {
  940. systemServers := []string{}
  941. for _, systemServer := range r.networkConfig.GetDNSServers() {
  942. host, _, err := net.SplitHostPort(systemServer)
  943. if err != nil {
  944. // Assume the SplitHostPort error is due to systemServer being
  945. // an IP only, and append the default port, 53. If
  946. // systemServer _isn't_ an IP, the following ParseIP will fail.
  947. host = systemServer
  948. systemServer = net.JoinHostPort(systemServer, resolverDNSPort)
  949. }
  950. if net.ParseIP(host) == nil {
  951. // Log warning and proceed without this DNS server.
  952. r.networkConfig.logWarning(
  953. errors.TraceNew("invalid DNS server IP address"))
  954. continue
  955. }
  956. systemServers = append(systemServers, systemServer)
  957. }
  958. // Check if the list of servers has changed, including order. If
  959. // changed, flush the cache even if the networkID has not changed.
  960. // Cached results are only considered valid as long as the system DNS
  961. // configuration remains the same.
  962. equal := len(r.systemServers) == len(systemServers)
  963. if equal {
  964. for i := 0; i < len(r.systemServers); i++ {
  965. if r.systemServers[i] != systemServers[i] {
  966. equal = false
  967. break
  968. }
  969. }
  970. }
  971. flushCache = flushCache || !equal
  972. // Concurrency note: once the r.systemServers slice is set, the
  973. // contents of the backing array must not be modified due to
  974. // concurrent ResolveIP calls.
  975. r.systemServers = systemServers
  976. r.lastServersUpdate = time.Now()
  977. }
  978. // Skip cache flushes when the extended DNS caching mechanism is enabled.
  979. // TODO: retain only verified cache entries?
  980. if flushCache && r.networkConfig.CacheExtensionVerifiedTTL == 0 {
  981. r.cache.Flush()
  982. }
  983. // Set r.networkID only after all operations complete without errors; if
  984. // r.networkID were set earlier, a subsequent
  985. // ResolveIP/updateNetworkState call might proceed as if the network
  986. // state were updated for the specified network ID.
  987. r.networkID = networkID
  988. }
  989. func (r *Resolver) getNetworkState() (bool, []string) {
  990. r.mutex.Lock()
  991. defer r.mutex.Unlock()
  992. return r.hasIPv6Route, r.systemServers
  993. }
  994. func (r *Resolver) setCache(hostname string, IPs []net.IP, TTLs []time.Duration) {
  995. r.mutex.Lock()
  996. defer r.mutex.Unlock()
  997. // The shortest TTL is used. In some cases, a DNS server may omit the TTL
  998. // or set a 0 TTL, in which case the default is used.
  999. TTL := resolverDefaultAnswerTTL
  1000. for _, answerTTL := range TTLs {
  1001. if answerTTL > 0 && answerTTL < TTL {
  1002. TTL = answerTTL
  1003. }
  1004. }
  1005. // When NetworkConfig.CacheExtensionInitialTTL configured, ensure the TTL
  1006. // is no shorter than CacheExtensionInitialTTL.
  1007. if r.networkConfig.CacheExtensionInitialTTL != 0 &&
  1008. TTL < r.networkConfig.CacheExtensionInitialTTL {
  1009. TTL = r.networkConfig.CacheExtensionInitialTTL
  1010. }
  1011. // Limitation: with concurrent ResolveIPs for the same domain, the last
  1012. // setCache call determines the cache value. The results are not merged.
  1013. r.cache.Set(hostname, IPs, TTL)
  1014. }
  1015. func (r *Resolver) getCache(hostname string) []net.IP {
  1016. r.mutex.Lock()
  1017. defer r.mutex.Unlock()
  1018. entry, ok := r.cache.Get(hostname)
  1019. if !ok {
  1020. return nil
  1021. }
  1022. r.metrics.cacheHits += 1
  1023. return entry.([]net.IP)
  1024. }
  1025. // newResolverConn creates a UDP socket that will send packets to serverAddr.
  1026. // serverAddr is an IP:port, which allows specifying the port for testing or
  1027. // in rare cases where the port isn't 53.
  1028. func (r *Resolver) newResolverConn(
  1029. logWarning func(error),
  1030. serverAddr string) (retConn net.Conn, retErr error) {
  1031. defer func() {
  1032. if retErr != nil {
  1033. logWarning(retErr)
  1034. }
  1035. }()
  1036. // When configured, attempt to synthesize an IPv6 address from
  1037. // an IPv4 address for compatibility on DNS64/NAT64 networks.
  1038. // If synthesize fails, try the original address.
  1039. if r.networkConfig.IPv6Synthesize != nil {
  1040. serverIPStr, port, err := net.SplitHostPort(serverAddr)
  1041. if err != nil {
  1042. return nil, errors.Trace(err)
  1043. }
  1044. serverIP := net.ParseIP(serverIPStr)
  1045. if serverIP != nil && serverIP.To4() != nil {
  1046. synthesized := r.networkConfig.IPv6Synthesize(serverIPStr)
  1047. if synthesized != "" && net.ParseIP(synthesized) != nil {
  1048. serverAddr = net.JoinHostPort(synthesized, port)
  1049. }
  1050. }
  1051. }
  1052. dialer := &net.Dialer{}
  1053. if r.networkConfig.BindToDevice != nil {
  1054. dialer.Control = func(_, _ string, c syscall.RawConn) error {
  1055. var controlErr error
  1056. err := c.Control(func(fd uintptr) {
  1057. _, err := r.networkConfig.BindToDevice(int(fd))
  1058. if err != nil {
  1059. controlErr = errors.Tracef("BindToDevice failed: %v", err)
  1060. return
  1061. }
  1062. })
  1063. if controlErr != nil {
  1064. return errors.Trace(controlErr)
  1065. }
  1066. return errors.Trace(err)
  1067. }
  1068. }
  1069. // context.Background is ok in this case as the UDP dial is just a local
  1070. // syscall to create the socket.
  1071. conn, err := dialer.DialContext(context.Background(), "udp", serverAddr)
  1072. if err != nil {
  1073. return nil, errors.Trace(err)
  1074. }
  1075. return conn, nil
  1076. }
  1077. func (r *Resolver) updateMetricResolves() {
  1078. r.mutex.Lock()
  1079. defer r.mutex.Unlock()
  1080. r.metrics.resolves += 1
  1081. }
  1082. func (r *Resolver) updateMetricRequestsIPv4() {
  1083. r.mutex.Lock()
  1084. defer r.mutex.Unlock()
  1085. r.metrics.requestsIPv4 += 1
  1086. }
  1087. func (r *Resolver) updateMetricRequestsIPv6() {
  1088. r.mutex.Lock()
  1089. defer r.mutex.Unlock()
  1090. r.metrics.requestsIPv6 += 1
  1091. }
  1092. func (r *Resolver) updateMetricResponsesIPv4() {
  1093. r.mutex.Lock()
  1094. defer r.mutex.Unlock()
  1095. r.metrics.responsesIPv4 += 1
  1096. }
  1097. func (r *Resolver) updateMetricResponsesIPv6() {
  1098. r.mutex.Lock()
  1099. defer r.mutex.Unlock()
  1100. r.metrics.responsesIPv6 += 1
  1101. }
  1102. func (r *Resolver) updateMetricDefaultResolver(success bool) {
  1103. r.mutex.Lock()
  1104. defer r.mutex.Unlock()
  1105. r.metrics.defaultResolves += 1
  1106. if success {
  1107. r.metrics.defaultSuccesses += 1
  1108. }
  1109. }
  1110. func (r *Resolver) updateMetricPeakInFlight(inFlight int64) {
  1111. r.mutex.Lock()
  1112. defer r.mutex.Unlock()
  1113. if inFlight > r.metrics.peakInFlight {
  1114. r.metrics.peakInFlight = inFlight
  1115. }
  1116. }
  1117. func (r *Resolver) updateMetricRTT(rtt time.Duration) {
  1118. r.mutex.Lock()
  1119. defer r.mutex.Unlock()
  1120. if rtt < 0 {
  1121. // Ignore invalid input.
  1122. return
  1123. }
  1124. // When r.metrics.minRTT < 0, min/maxRTT is unset.
  1125. if r.metrics.minRTT < 0 || rtt < r.metrics.minRTT {
  1126. r.metrics.minRTT = rtt
  1127. }
  1128. if rtt > r.metrics.maxRTT {
  1129. r.metrics.maxRTT = rtt
  1130. }
  1131. }
  1132. func hasRoutableIPv6Interface() (bool, error) {
  1133. interfaces, err := net.Interfaces()
  1134. if err != nil {
  1135. return false, errors.Trace(err)
  1136. }
  1137. for _, in := range interfaces {
  1138. if (in.Flags&net.FlagUp == 0) ||
  1139. (in.Flags&(net.FlagLoopback|net.FlagPointToPoint)) != 0 {
  1140. continue
  1141. }
  1142. addrs, err := in.Addrs()
  1143. if err != nil {
  1144. return false, errors.Trace(err)
  1145. }
  1146. for _, addr := range addrs {
  1147. if IPNet, ok := addr.(*net.IPNet); ok &&
  1148. IPNet.IP.To4() == nil &&
  1149. !IPNet.IP.IsLinkLocalUnicast() {
  1150. return true, nil
  1151. }
  1152. }
  1153. }
  1154. return false, nil
  1155. }
  1156. func generateIPAddressFromCIDR(CIDR string) (net.IP, error) {
  1157. _, IPNet, err := net.ParseCIDR(CIDR)
  1158. if err != nil {
  1159. return nil, errors.Trace(err)
  1160. }
  1161. // A retry is required, since a CIDR may include broadcast IPs (a.b.c.0) or
  1162. // other invalid values. The number of retries is limited to ensure we
  1163. // don't hang in the case of a misconfiguration.
  1164. for i := 0; i < 10; i++ {
  1165. randBytes := prng.Bytes(len(IPNet.IP))
  1166. IP := make(net.IP, len(IPNet.IP))
  1167. // The 1 bits in the mask must apply to the IP in the CIDR and the 0
  1168. // bits in the mask are available to randomize.
  1169. for i := 0; i < len(IP); i++ {
  1170. IP[i] = (IPNet.IP[i] & IPNet.Mask[i]) | (randBytes[i] & ^IPNet.Mask[i])
  1171. }
  1172. if IP.IsGlobalUnicast() && !common.IsBogon(IP) {
  1173. return IP, nil
  1174. }
  1175. }
  1176. return nil, errors.TraceNew("failed to generate random IP")
  1177. }
  1178. type resolverQuestionType int
  1179. const (
  1180. resolverQuestionTypeA = 0
  1181. resolverQuestionTypeAAAA = 1
  1182. )
  1183. func performDNSQuery(
  1184. resolveCtx context.Context,
  1185. logWarning func(error),
  1186. params *ResolveParameters,
  1187. useProtocolTransform bool,
  1188. conn net.Conn,
  1189. questionType resolverQuestionType,
  1190. hostname string) ([]net.IP, []time.Duration, time.Duration, error) {
  1191. if useProtocolTransform {
  1192. if params.ProtocolTransformSpec == nil ||
  1193. params.ProtocolTransformSeed == nil {
  1194. return nil, nil, -1, errors.TraceNew("invalid protocol transform configuration")
  1195. }
  1196. // miekg/dns expects conn to be a net.PacketConn or else it writes the
  1197. // TCP length prefix
  1198. udpConn, ok := conn.(*net.UDPConn)
  1199. if !ok {
  1200. return nil, nil, -1, errors.TraceNew("conn is not a *net.UDPConn")
  1201. }
  1202. conn = &transformDNSPacketConn{
  1203. UDPConn: udpConn,
  1204. transform: params.ProtocolTransformSpec,
  1205. seed: params.ProtocolTransformSeed,
  1206. }
  1207. }
  1208. // UDPSize sets the receive buffer to > 512, even when we don't include
  1209. // EDNS(0), which will mitigate issues with RFC 1035 non-compliant
  1210. // servers. See Go issue 51127.
  1211. dnsConn := &dns.Conn{
  1212. Conn: conn,
  1213. UDPSize: udpPacketBufferSize,
  1214. }
  1215. defer dnsConn.Close()
  1216. // SetQuestion initializes request.MsgHdr.Id to a random value
  1217. request := &dns.Msg{MsgHdr: dns.MsgHdr{RecursionDesired: true}}
  1218. switch questionType {
  1219. case resolverQuestionTypeA:
  1220. request.SetQuestion(dns.Fqdn(hostname), dns.TypeA)
  1221. case resolverQuestionTypeAAAA:
  1222. request.SetQuestion(dns.Fqdn(hostname), dns.TypeAAAA)
  1223. default:
  1224. return nil, nil, -1, errors.TraceNew("unknown DNS request question type")
  1225. }
  1226. if params.IncludeEDNS0 {
  1227. // miekg/dns: "RFC 6891, Section 6.1.1 allows the OPT record to appear
  1228. // anywhere in the additional record section, but it's usually at the
  1229. // end..."
  1230. request.SetEdns0(udpPacketBufferSize, false)
  1231. }
  1232. startTime := time.Now()
  1233. // Send the DNS request
  1234. dnsConn.WriteMsg(request)
  1235. // Read and process the DNS response
  1236. var IPs []net.IP
  1237. var TTLs []time.Duration
  1238. var lastErr error
  1239. RTT := time.Duration(-1)
  1240. for {
  1241. // Stop when resolveCtx is done; the caller, ResolveIP, will also
  1242. // close conn, which will interrupt a blocking dnsConn.ReadMsg.
  1243. if resolveCtx.Err() != nil {
  1244. // ResolveIP, which calls performDNSQuery, already records the
  1245. // context error (e.g., context timeout), so instead report
  1246. // lastErr, when present, as it may contain more useful
  1247. // information about why a response was rejected.
  1248. err := lastErr
  1249. if err == nil {
  1250. err = errors.Trace(resolveCtx.Err())
  1251. }
  1252. return nil, nil, RTT, err
  1253. }
  1254. // Read a response. RTT is the elapsed time between sending the
  1255. // request and reading the last received response.
  1256. response, err := dnsConn.ReadMsg()
  1257. RTT = time.Since(startTime)
  1258. if err == nil && response.MsgHdr.Id != request.MsgHdr.Id {
  1259. err = dns.ErrId
  1260. }
  1261. if err != nil {
  1262. // Try reading again, in case the first response packet failed to
  1263. // unmarshal or had an invalid ID. The Go resolver also does this;
  1264. // see Go issue 13281.
  1265. if resolveCtx.Err() == nil {
  1266. // Only log if resolveCtx is not done; otherwise the error could
  1267. // be due to conn being closed by ResolveIP.
  1268. lastErr = errors.Tracef("invalid response: %v", err)
  1269. logWarning(lastErr)
  1270. }
  1271. continue
  1272. }
  1273. // Check the RCode.
  1274. //
  1275. // For IPv4, we expect RCodeSuccess as Psiphon will typically only
  1276. // resolve domains that exist and have a valid IP (when this isn't
  1277. // the case, and we retry, the overall ResolveIP and its parent dial
  1278. // will still abort after resolveCtx is done, or RequestTimeout
  1279. // expires for maxAttempts).
  1280. //
  1281. // For IPv6, we should also expect RCodeSuccess even if there is no
  1282. // AAAA record, as long as the domain exists and has an A record.
  1283. // However, per RFC 6147 section 5.1.2, we may receive
  1284. // NXDOMAIN: "...some servers respond with RCODE=3 to a AAAA query
  1285. // even if there is an A record available for that owner name. Those
  1286. // servers are in clear violation of the meaning of RCODE 3...". In
  1287. // this case, we coalesce NXDOMAIN into success to treat the response
  1288. // the same as success with no AAAA record.
  1289. //
  1290. // All other RCodes, which are unexpected, lead to a read retry.
  1291. if response.MsgHdr.Rcode != dns.RcodeSuccess &&
  1292. !(questionType == resolverQuestionTypeAAAA && response.MsgHdr.Rcode == dns.RcodeNameError) {
  1293. errMsg, ok := dns.RcodeToString[response.MsgHdr.Rcode]
  1294. if !ok {
  1295. errMsg = fmt.Sprintf("Rcode: %d", response.MsgHdr.Rcode)
  1296. }
  1297. lastErr = errors.Tracef("unexpected RCode: %v", errMsg)
  1298. logWarning(lastErr)
  1299. continue
  1300. }
  1301. // Extract all IP answers, along with corresponding TTLs for caching.
  1302. // Perform additional validation, which may lead to another read
  1303. // retry. However, if _any_ valid IP is found, stop reading and
  1304. // return that result. Again, the validation is only best effort.
  1305. checkFailed := false
  1306. for _, answer := range response.Answer {
  1307. haveAnswer := false
  1308. var IP net.IP
  1309. var TTLSec uint32
  1310. switch questionType {
  1311. case resolverQuestionTypeA:
  1312. if a, ok := answer.(*dns.A); ok {
  1313. IP = a.A
  1314. TTLSec = a.Hdr.Ttl
  1315. haveAnswer = true
  1316. }
  1317. case resolverQuestionTypeAAAA:
  1318. if aaaa, ok := answer.(*dns.AAAA); ok {
  1319. IP = aaaa.AAAA
  1320. TTLSec = aaaa.Hdr.Ttl
  1321. haveAnswer = true
  1322. }
  1323. }
  1324. if !haveAnswer {
  1325. continue
  1326. }
  1327. err := checkDNSAnswerIP(IP)
  1328. if err != nil {
  1329. checkFailed = true
  1330. lastErr = errors.Tracef("invalid IP: %v", err)
  1331. logWarning(lastErr)
  1332. // Check the next answer
  1333. continue
  1334. }
  1335. IPs = append(IPs, IP)
  1336. TTLs = append(TTLs, time.Duration(TTLSec)*time.Second)
  1337. }
  1338. // For IPv4, an IP is expected, as noted in the comment above.
  1339. //
  1340. // In potential cases where we resolve a domain that has only an IPv6
  1341. // address, the concurrent AAAA request will deliver its result to
  1342. // ResolveIP, and that answer will be selected, so only the "await"
  1343. // logic will delay the parent dial in that case.
  1344. if questionType == resolverQuestionTypeA && len(IPs) == 0 && !checkFailed {
  1345. checkFailed = true
  1346. lastErr = errors.TraceNew("unexpected empty A response")
  1347. logWarning(lastErr)
  1348. }
  1349. // Retry if there are no valid IPs and any error; if no error, this
  1350. // may be a valid AAAA response with no IPs, in which case return the
  1351. // result.
  1352. if len(IPs) == 0 && checkFailed {
  1353. continue
  1354. }
  1355. return IPs, TTLs, RTT, nil
  1356. }
  1357. }
  1358. func checkDNSAnswerIP(IP net.IP) error {
  1359. if IP == nil {
  1360. return errors.TraceNew("IP is nil")
  1361. }
  1362. // Limitation: this could still be a phony/injected response, it's not
  1363. // possible to verify with plaintext DNS, but a "bogon" IP is clearly
  1364. // invalid.
  1365. if common.IsBogon(IP) {
  1366. return errors.TraceNew("IP is bogon")
  1367. }
  1368. // Create a temporary socket bound to the destination IP. This checks
  1369. // thats the local host has a route to this IP. If not, we'll reject the
  1370. // IP. This prevents selecting an IP which is guaranteed to fail to dial.
  1371. // Use UDP as this results in no network traffic; the destination port is
  1372. // arbitrary. The Go resolver performs a similar operation.
  1373. //
  1374. // Limitations:
  1375. // - We may cache the IP and reuse it without checking routability again;
  1376. // the cache should be flushed when network state changes.
  1377. // - Given that the AAAA is requested only when the host has an IPv6
  1378. // route, we don't expect this to often fail with a _valid_ response.
  1379. // However, this remains a possibility and in this case,
  1380. // performDNSQuery will keep awaiting a response which can trigger
  1381. // the "await" logic.
  1382. conn, err := net.DialUDP("udp", nil, &net.UDPAddr{IP: IP, Port: 443})
  1383. if err != nil {
  1384. return errors.Trace(err)
  1385. }
  1386. conn.Close()
  1387. return nil
  1388. }
  1389. func defaultResolverLookupIP(
  1390. ctx context.Context, hostname string, logHostnames bool) ([]net.IP, error) {
  1391. addrs, err := net.DefaultResolver.LookupIPAddr(ctx, hostname)
  1392. if err != nil && !logHostnames {
  1393. // Remove domain names from "net" error messages.
  1394. err = common.RedactNetError(err)
  1395. }
  1396. if err != nil {
  1397. return nil, errors.Trace(err)
  1398. }
  1399. ips := make([]net.IP, len(addrs))
  1400. for i, addr := range addrs {
  1401. ips[i] = addr.IP
  1402. }
  1403. return ips, nil
  1404. }
  1405. // transformDNSPacketConn wraps a *net.UDPConn, intercepting Write calls and
  1406. // applying the specified protocol transform.
  1407. //
  1408. // As transforms operate on strings and DNS requests are binary, the transform
  1409. // should be expressed using hex characters. The DNS packet to be written
  1410. // (input the Write) is converted to hex, transformed, and converted back to
  1411. // binary and then actually written to the UDP socket.
  1412. type transformDNSPacketConn struct {
  1413. *net.UDPConn
  1414. transform transforms.Spec
  1415. seed *prng.Seed
  1416. }
  1417. func (conn *transformDNSPacketConn) Write(b []byte) (int, error) {
  1418. // Limitation: there is no check that a transformed packet remains within
  1419. // the network packet MTU.
  1420. input := hex.EncodeToString(b)
  1421. output, err := conn.transform.Apply(conn.seed, input)
  1422. if err != nil {
  1423. return 0, errors.Trace(err)
  1424. }
  1425. packet, err := hex.DecodeString(output)
  1426. if err != nil {
  1427. return 0, errors.Trace(err)
  1428. }
  1429. _, err = conn.UDPConn.Write(packet)
  1430. if err != nil {
  1431. // In the error case, don't report bytes written as the number could
  1432. // exceed the pre-transform length.
  1433. return 0, errors.Trace(err)
  1434. }
  1435. // Report the pre-transform length as bytes written, as the caller may check
  1436. // that the requested len(b) bytes were written.
  1437. return len(b), nil
  1438. }