resolver.go 55 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668
  1. /*
  2. * Copyright (c) 2022, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. // Package resolver implements a DNS stub resolver, or DNS client, which
  20. // resolves domain names.
  21. //
  22. // The resolver is Psiphon-specific and oriented towards blocking resistance.
  23. // See ResolveIP for more details.
  24. package resolver
  25. import (
  26. "context"
  27. "encoding/hex"
  28. "fmt"
  29. "net"
  30. "sync"
  31. "sync/atomic"
  32. "syscall"
  33. "time"
  34. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  35. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
  36. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/parameters"
  37. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
  38. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/transforms"
  39. lrucache "github.com/cognusion/go-cache-lru"
  40. "github.com/miekg/dns"
  41. )
  42. const (
  43. resolverCacheDefaultTTL = 1 * time.Minute
  44. resolverCacheReapFrequency = 1 * time.Minute
  45. resolverCacheMaxEntries = 10000
  46. resolverServersUpdateTTL = 5 * time.Second
  47. resolverDefaultAttemptsPerServer = 2
  48. resolverDefaultRequestTimeout = 5 * time.Second
  49. resolverDefaultAwaitTimeout = 10 * time.Millisecond
  50. resolverDefaultAnswerTTL = 1 * time.Minute
  51. resolverDNSPort = "53"
  52. udpPacketBufferSize = 1232
  53. )
  54. // NetworkConfig specifies network-level configuration for a Resolver.
  55. type NetworkConfig struct {
  56. // GetDNSServers returns a list of system DNS server addresses (IP:port, or
  57. // IP only with port 53 assumed), as determined via OS APIs, in priority
  58. // order. GetDNSServers may be nil.
  59. GetDNSServers func() []string
  60. // BindToDevice should ensure the input file descriptor, a UDP socket, is
  61. // excluded from VPN routing. BindToDevice may be nil.
  62. BindToDevice func(fd int) (string, error)
  63. // AllowDefaultResolverWithBindToDevice indicates that it's safe to use
  64. // the default resolver when BindToDevice is configured, as the host OS
  65. // will automatically exclude DNS requests from the VPN.
  66. AllowDefaultResolverWithBindToDevice bool
  67. // IPv6Synthesize should apply NAT64 synthesis to the input IPv4 address,
  68. // returning a synthesized IPv6 address that will route to the same
  69. // endpoint. IPv6Synthesize may be nil.
  70. IPv6Synthesize func(IPv4 string) string
  71. // HasIPv6Route should return true when the host has an IPv6 route.
  72. // Resolver has an internal implementation, hasRoutableIPv6Interface, to
  73. // determine this, but it can fail on some platforms ("route ip+net:
  74. // netlinkrib: permission denied" on Android, for example; see Go issue
  75. // 40569). When HasIPv6Route is nil, the internal implementation is used.
  76. HasIPv6Route func() bool
  77. // LogWarning is an optional callback which is used to log warnings and
  78. // transient errors which would otherwise not be recorded or returned.
  79. LogWarning func(error)
  80. // LogHostnames indicates whether to log hostname in errors or not.
  81. LogHostnames bool
  82. // CacheExtensionInitialTTL specifies a minimum TTL to use when caching
  83. // domain resolution results. This minimum will override any TTL in the
  84. // DNS response. CacheExtensionInitialTTL is off when 0.
  85. CacheExtensionInitialTTL time.Duration
  86. // CacheExtensionVerifiedTTL specifies the minimum TTL to set for a cached
  87. // domain resolution result after the result has been verified.
  88. // CacheExtensionVerifiedTTL is off when 0.
  89. //
  90. // DNS cache extension is a workaround to partially mitigate issues with
  91. // obtaining underlying system DNS server IPs on platforms such as iOS
  92. // once a VPN is running and after network changes, such as changing from
  93. // Wi-Fi to mobile. While ResolveParameters.AlternateDNSServer can be
  94. // used to specify a known public DNS server, it may be the case that
  95. // public DNS servers are blocked or always falling back to a public DNS
  96. // server creates unusual traffic. And while it may be possible to use
  97. // the default system resolver, it lacks certain circumvention
  98. // capabilities.
  99. //
  100. // Extending the TTL for cached responses allows Psiphon to redial domains
  101. // using recently successful IPs.
  102. //
  103. // CacheExtensionInitialTTL allows for a greater initial minimum TTL, so
  104. // that the response entry remains in the cache long enough for a dial to
  105. // fully complete and verify the endpoint. Psiphon will call
  106. // Resolver.VerifyExtendCacheTTL once a dial has authenticated, for
  107. // example, the destination Psiphon server. VerifyCacheExtension will
  108. // further extend the corresponding TTL to CacheExtensionVerifiedTTL, a
  109. // longer TTL. CacheExtensionInitialTTL is intended to be on the order of
  110. // minutes and CacheExtensionVerifiedTTL may be on the order of hours.
  111. //
  112. // When CacheExtensionVerifiedTTL is on, the DNS cache is not flushed on
  113. // network changes, to allow for the previously cached entries to remain
  114. // available in the problematic scenario. Like adjusting TTLs, this is an
  115. // explicit trade-off which doesn't adhere to standard best practise, but
  116. // is expected to be more blocking resistent; this approach also assumes
  117. // that endpoints such as CDN IPs are typically available on any network.
  118. CacheExtensionVerifiedTTL time.Duration
  119. }
  120. func (c *NetworkConfig) allowDefaultResolver() bool {
  121. // When BindToDevice is configured, the standard library resolver is not
  122. // used, as the system resolver may not route outside of the VPN.
  123. return c.BindToDevice == nil || c.AllowDefaultResolverWithBindToDevice
  124. }
  125. func (c *NetworkConfig) logWarning(err error) {
  126. if c.LogWarning != nil {
  127. c.LogWarning(err)
  128. }
  129. }
  130. // ResolveParameters specifies the configuration and behavior of a single
  131. // ResolveIP call, a single domain name resolution.
  132. //
  133. // New ResolveParameters may be generated by calling MakeResolveParameters,
  134. // which takes tactics parameters as an input.
  135. //
  136. // ResolveParameters may be persisted for replay.
  137. type ResolveParameters struct {
  138. // AttemptsPerServer specifies how many requests to send to each DNS
  139. // server before trying the next server. IPv4 and IPv6 requests are sent
  140. // concurrently and count as one attempt.
  141. AttemptsPerServer int
  142. // AttemptsPerPreferredServer is AttemptsPerServer for a preferred
  143. // alternate DNS server.
  144. AttemptsPerPreferredServer int
  145. // RequestTimeout specifies how long to wait for a valid response before
  146. // moving on to the next attempt.
  147. RequestTimeout time.Duration
  148. // AwaitTimeout specifies how long to await an additional response after
  149. // the first response is received. This additional wait time applies only
  150. // when there is either no IPv4 or IPv6 response.
  151. AwaitTimeout time.Duration
  152. // PreresolvedIPAddress specifies an IP address result to be used in place
  153. // of making a request.
  154. PreresolvedIPAddress string
  155. // PreresolvedDomain is the domain for which PreresolvedIPAddress is to be
  156. // used.
  157. PreresolvedDomain string
  158. // AlternateDNSServer specifies an alterate DNS server (IP:port, or IP
  159. // only with port 53 assumed) to be used when either no system DNS
  160. // servers are available or when PreferAlternateDNSServer is set.
  161. AlternateDNSServer string
  162. // PreferAlternateDNSServer indicates whether to prioritize using the
  163. // AlternateDNSServer. When set, the AlternateDNSServer is attempted
  164. // before any system DNS servers.
  165. PreferAlternateDNSServer bool
  166. // ProtocolTransformName specifies the name associated with
  167. // ProtocolTransformSpec and is used for metrics.
  168. ProtocolTransformName string
  169. // ProtocolTransformSpec specifies a transform to apply to the DNS request packet.
  170. // See: "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/transforms".
  171. //
  172. // As transforms operate on strings and DNS requests are binary,
  173. // transforms should be expressed using hex characters.
  174. //
  175. // DNS transforms include strategies discovered by the Geneva team,
  176. // https://geneva.cs.umd.edu.
  177. ProtocolTransformSpec transforms.Spec
  178. // ProtocolTransformSeed specifies the seed to use for generating random
  179. // data in the ProtocolTransformSpec transform. To replay a transform,
  180. // specify the same seed.
  181. ProtocolTransformSeed *prng.Seed
  182. // IncludeEDNS0 indicates whether to include the EDNS(0) UDP maximum
  183. // response size extension in DNS requests. The resolver can handle
  184. // responses larger than 512 bytes (RFC 1035 maximum) regardless of
  185. // whether the extension is included; the extension may be included as
  186. // part of appearing similar to other DNS traffic.
  187. IncludeEDNS0 bool
  188. firstAttemptWithAnswer int32
  189. }
  190. // GetFirstAttemptWithAnswer returns the index of the first request attempt
  191. // that received a valid response, for the most recent ResolveIP call using
  192. // this ResolveParameters. This information is used for logging metrics. The
  193. // first attempt has index 1. GetFirstAttemptWithAnswer return 0 when no
  194. // request attempt has reported a valid response.
  195. //
  196. // The caller is responsible for synchronizing use of a ResolveParameters
  197. // instance (e.g, use a distinct ResolveParameters per ResolveIP to ensure
  198. // GetFirstAttemptWithAnswer refers to a specific ResolveIP).
  199. func (r *ResolveParameters) GetFirstAttemptWithAnswer() int {
  200. return int(atomic.LoadInt32(&r.firstAttemptWithAnswer))
  201. }
  202. func (r *ResolveParameters) setFirstAttemptWithAnswer(attempt int) {
  203. atomic.StoreInt32(&r.firstAttemptWithAnswer, int32(attempt))
  204. }
  205. // Implementation note: Go's standard net.Resolver supports specifying a
  206. // custom Dial function. This could be used to implement at least a large
  207. // subset of the Resolver functionality on top of Go's standard library
  208. // resolver. However, net.Resolver is limited to using the CGO resolver on
  209. // Android, https://github.com/golang/go/issues/8877, in which case the
  210. // custom Dial function is not used. Furthermore, the the pure Go resolver in
  211. // net/dnsclient_unix.go appears to not be used on Windows at this time.
  212. //
  213. // Go also provides golang.org/x/net/dns/dnsmessage, a DNS message marshaller,
  214. // which could potentially be used in place of github.com/miekg/dns.
  215. // Resolver is a DNS stub resolver, or DNS client, which resolves domain
  216. // names. A Resolver instance maintains a cache, a network state snapshot,
  217. // and metrics. All ResolveIP calls will share the same cache and state.
  218. // Multiple concurrent ResolveIP calls are supported.
  219. type Resolver struct {
  220. networkConfig *NetworkConfig
  221. mutex sync.Mutex
  222. networkID string
  223. hasIPv6Route bool
  224. systemServers []string
  225. lastServersUpdate time.Time
  226. cache *lrucache.Cache
  227. metrics resolverMetrics
  228. }
  229. type resolverMetrics struct {
  230. resolves int
  231. cacheHits int
  232. verifiedCacheExtensions int
  233. requestsIPv4 int
  234. requestsIPv6 int
  235. responsesIPv4 int
  236. responsesIPv6 int
  237. defaultResolves int
  238. defaultSuccesses int
  239. peakInFlight int64
  240. minRTT time.Duration
  241. maxRTT time.Duration
  242. }
  243. func newResolverMetrics() resolverMetrics {
  244. return resolverMetrics{minRTT: -1}
  245. }
  246. // NewResolver creates a new Resolver instance.
  247. func NewResolver(networkConfig *NetworkConfig, networkID string) *Resolver {
  248. r := &Resolver{
  249. networkConfig: networkConfig,
  250. metrics: newResolverMetrics(),
  251. }
  252. // updateNetworkState will initialize the cache and network state,
  253. // including system DNS servers.
  254. r.updateNetworkState(networkID)
  255. return r
  256. }
  257. // Stop clears the Resolver cache and resets metrics. Stop must be called only
  258. // after ceasing all in-flight ResolveIP goroutines, or else the cache or
  259. // metrics may repopulate. A Resolver may be resumed after calling Stop, but
  260. // Update must be called first.
  261. func (r *Resolver) Stop() {
  262. r.mutex.Lock()
  263. defer r.mutex.Unlock()
  264. // r.networkConfig is not set to nil to avoid possible nil pointer
  265. // dereferences by concurrent ResolveIP calls.
  266. r.networkID = ""
  267. r.hasIPv6Route = false
  268. r.systemServers = nil
  269. r.cache.Flush()
  270. r.metrics = newResolverMetrics()
  271. }
  272. // MakeResolveParameters generates ResolveParameters using the input tactics
  273. // parameters and optional frontingProviderID context.
  274. func (r *Resolver) MakeResolveParameters(
  275. p parameters.ParametersAccessor,
  276. frontingProviderID string,
  277. frontingDialDomain string) (*ResolveParameters, error) {
  278. params := &ResolveParameters{
  279. AttemptsPerServer: p.Int(parameters.DNSResolverAttemptsPerServer),
  280. AttemptsPerPreferredServer: p.Int(parameters.DNSResolverAttemptsPerPreferredServer),
  281. RequestTimeout: p.Duration(parameters.DNSResolverRequestTimeout),
  282. AwaitTimeout: p.Duration(parameters.DNSResolverAwaitTimeout),
  283. }
  284. // When a frontingProviderID is specified, generate a pre-resolved IP
  285. // address, based on tactics configuration.
  286. if frontingProviderID != "" {
  287. if frontingDialDomain == "" {
  288. return nil, errors.TraceNew("missing fronting dial domain")
  289. }
  290. if p.WeightedCoinFlip(parameters.DNSResolverPreresolvedIPAddressProbability) {
  291. CIDRs := p.LabeledCIDRs(parameters.DNSResolverPreresolvedIPAddressCIDRs, frontingProviderID)
  292. if len(CIDRs) > 0 {
  293. CIDR := CIDRs[prng.Intn(len(CIDRs))]
  294. IP, err := generateIPAddressFromCIDR(CIDR)
  295. if err != nil {
  296. return nil, errors.Trace(err)
  297. }
  298. params.PreresolvedIPAddress = IP.String()
  299. params.PreresolvedDomain = frontingDialDomain
  300. }
  301. }
  302. }
  303. // When preferring an alternate DNS server, select the alternate from
  304. // DNSResolverPreferredAlternateServers. This list is for circumvention
  305. // operations, such as using a public DNS server with a protocol
  306. // transform. Otherwise, select from DNSResolverAlternateServers, which
  307. // is a fallback list of DNS servers to be used when the system DNS
  308. // servers cannot be obtained.
  309. preferredServers := p.Strings(parameters.DNSResolverPreferredAlternateServers)
  310. preferAlternateDNSServer := len(preferredServers) > 0 && p.WeightedCoinFlip(
  311. parameters.DNSResolverPreferAlternateServerProbability)
  312. alternateServers := preferredServers
  313. if !preferAlternateDNSServer {
  314. alternateServers = p.Strings(parameters.DNSResolverAlternateServers)
  315. }
  316. // Select an alternate DNS server, typically a public DNS server. Ensure
  317. // tactics is configured with an empty DNSResolverAlternateServers list
  318. // in cases where attempts to public DNS server are unwanted.
  319. if len(alternateServers) > 0 {
  320. alternateServer := alternateServers[prng.Intn(len(alternateServers))]
  321. // Check that the alternateServer has a well-formed IP address; and add
  322. // a default port if none it present.
  323. host, _, err := net.SplitHostPort(alternateServer)
  324. if err != nil {
  325. // Assume the SplitHostPort error is due to missing port.
  326. host = alternateServer
  327. alternateServer = net.JoinHostPort(alternateServer, resolverDNSPort)
  328. }
  329. if net.ParseIP(host) == nil {
  330. // Log warning and proceed without this DNS server.
  331. r.networkConfig.logWarning(
  332. errors.TraceNew("invalid alternate DNS server IP address"))
  333. } else {
  334. params.AlternateDNSServer = alternateServer
  335. params.PreferAlternateDNSServer = preferAlternateDNSServer
  336. }
  337. }
  338. // Select a DNS transform. DNS request transforms are "scoped" by
  339. // alternate DNS server (IP address without port); that is, when an
  340. // alternate DNS server is certain to be attempted first, a transform
  341. // associated with and known to work with that DNS server will be
  342. // selected. Otherwise, a transform from the default scope
  343. // (transforms.SCOPE_ANY == "") is selected.
  344. //
  345. // In any case, ResolveIP will only apply a transform on the first request
  346. // attempt.
  347. if p.WeightedCoinFlip(parameters.DNSResolverProtocolTransformProbability) {
  348. specs := p.ProtocolTransformSpecs(
  349. parameters.DNSResolverProtocolTransformSpecs)
  350. scopedSpecNames := p.ProtocolTransformScopedSpecNames(
  351. parameters.DNSResolverProtocolTransformScopedSpecNames)
  352. // The alternate DNS server will be the first attempt if
  353. // PreferAlternateDNSServer or the list of system DNS servers is empty.
  354. //
  355. // Limitation: the system DNS server list may change, due to a later
  356. // Resolver.update call when ResolveIP is called with these
  357. // ResolveParameters.
  358. _, systemServers := r.getNetworkState()
  359. scope := transforms.SCOPE_ANY
  360. if params.AlternateDNSServer != "" &&
  361. (params.PreferAlternateDNSServer || len(systemServers) == 0) {
  362. // Remove the port number, as the scope key is an IP address only.
  363. //
  364. // TODO: when we only just added the default port above, which is
  365. // the common case, we could avoid this extra split.
  366. host, _, err := net.SplitHostPort(params.AlternateDNSServer)
  367. if err != nil {
  368. return nil, errors.Trace(err)
  369. }
  370. scope = host
  371. }
  372. name, spec := specs.Select(scope, scopedSpecNames)
  373. if spec != nil {
  374. params.ProtocolTransformName = name
  375. params.ProtocolTransformSpec = spec
  376. var err error
  377. params.ProtocolTransformSeed, err = prng.NewSeed()
  378. if err != nil {
  379. return nil, errors.Trace(err)
  380. }
  381. }
  382. }
  383. if p.WeightedCoinFlip(parameters.DNSResolverIncludeEDNS0Probability) {
  384. params.IncludeEDNS0 = true
  385. }
  386. return params, nil
  387. }
  388. // ResolveAddress splits the input host:port address, calls ResolveIP to
  389. // resolve the IP address of the host, selects an IP if there are multiple,
  390. // and returns a rejoined IP:port.
  391. func (r *Resolver) ResolveAddress(
  392. ctx context.Context,
  393. networkID string,
  394. params *ResolveParameters,
  395. address string) (string, error) {
  396. hostname, port, err := net.SplitHostPort(address)
  397. if err != nil {
  398. return "", errors.Trace(err)
  399. }
  400. IPs, err := r.ResolveIP(ctx, networkID, params, hostname)
  401. if err != nil {
  402. return "", errors.Trace(err)
  403. }
  404. return net.JoinHostPort(IPs[prng.Intn(len(IPs))].String(), port), nil
  405. }
  406. // ResolveIP resolves a domain name.
  407. //
  408. // The input params may be nil, in which case default timeouts are used.
  409. //
  410. // ResolveIP performs concurrent A and AAAA lookups, returns any valid
  411. // response IPs, and caches results. An error is returned when there are
  412. // no valid response IPs.
  413. //
  414. // ResolveIP is not a general purpose resolver and is Psiphon-specific. For
  415. // example, resolved domains are expected to exist; ResolveIP does not
  416. // fallback to TCP; does not consult any "hosts" file; does not perform RFC
  417. // 3484 sorting logic (see Go issue 18518); only implements a subset of
  418. // Go/glibc/resolv.conf(5) resolver parameters (attempts and timeouts, but
  419. // not rotate, single-request etc.) ResolveIP does not implement singleflight
  420. // logic, as the Go resolver does, and allows multiple concurrent request for
  421. // the same domain -- Psiphon won't often resolve the exact same domain
  422. // multiple times concurrently, and, when it does, there's a circumvention
  423. // benefit to attempting different DNS servers and protocol transforms.
  424. //
  425. // ResolveIP does not currently support DoT, DoH, or TCP; those protocols are
  426. // often blocked or less common. Instead, ResolveIP makes a best effort to
  427. // evade plaintext UDP DNS interference by ignoring invalid responses and by
  428. // optionally applying protocol transforms that may evade blocking.
  429. func (r *Resolver) ResolveIP(
  430. ctx context.Context,
  431. networkID string,
  432. params *ResolveParameters,
  433. hostname string) (x []net.IP, y error) {
  434. // ResolveIP does _not_ lock r.mutex for the lifetime of the function, to
  435. // ensure many ResolveIP calls can run concurrently.
  436. // If the hostname is already an IP address, just return that. For
  437. // metrics, this does not count as a resolve, as the caller may invoke
  438. // ResolveIP for all dials.
  439. IP := net.ParseIP(hostname)
  440. if IP != nil {
  441. return []net.IP{IP}, nil
  442. }
  443. // Count all resolves of an actual domain, including cached and
  444. // pre-resolved cases.
  445. r.updateMetricResolves()
  446. // Call updateNetworkState immediately before resolving, as a best effort
  447. // to ensure that system DNS servers and IPv6 routing network state
  448. // reflects the current network. updateNetworkState locks the Resolver
  449. // mutex for its duration, and so concurrent ResolveIP calls may block at
  450. // this point. However, all updateNetworkState operations are local to
  451. // the host or device; and, if the networkID is unchanged since the last
  452. // call, updateNetworkState may not perform any operations; and after the
  453. // updateNetworkState call, ResolveIP proceeds without holding the mutex
  454. // lock. As a result, this step should not prevent ResolveIP concurrency.
  455. r.updateNetworkState(networkID)
  456. if params == nil {
  457. // Supply default ResolveParameters
  458. params = &ResolveParameters{
  459. AttemptsPerServer: resolverDefaultAttemptsPerServer,
  460. AttemptsPerPreferredServer: resolverDefaultAttemptsPerServer,
  461. RequestTimeout: resolverDefaultRequestTimeout,
  462. AwaitTimeout: resolverDefaultAwaitTimeout,
  463. }
  464. }
  465. // When PreresolvedIPAddress is set, tactics parameters determined the IP address
  466. // in this case.
  467. if params.PreresolvedIPAddress != "" && params.PreresolvedDomain == hostname {
  468. IP := net.ParseIP(params.PreresolvedIPAddress)
  469. if IP == nil {
  470. // Unexpected case, as MakeResolveParameters selects the IP address.
  471. return nil, errors.TraceNew("invalid IP address")
  472. }
  473. return []net.IP{IP}, nil
  474. }
  475. // Use a snapshot of the current network state, including IPv6 routing and
  476. // system DNS servers.
  477. //
  478. // Limitation: these values are used even if the network changes in the
  479. // middle of a ResolveIP call; ResolveIP is not interrupted if the
  480. // network changes.
  481. hasIPv6Route, systemServers := r.getNetworkState()
  482. // Use the standard library resolver when there's no GetDNSServers, or the
  483. // system server list is otherwise empty, and no alternate DNS server is
  484. // configured.
  485. //
  486. // Note that in the case where there are no system DNS servers and there
  487. // is an AlternateDNSServer, if the AlternateDNSServer attempt fails,
  488. // control does not flow back to defaultResolverLookupIP. On platforms
  489. // without GetDNSServers, the caller must arrange for distinct attempts
  490. // that try a AlternateDNSServer, or just use the standard library
  491. // resolver.
  492. //
  493. // ResolveIP should always be called, even when defaultResolverLookupIP is
  494. // expected to be used, to ensure correct metrics counts and ensure a
  495. // consistent error message log stack for all DNS-related failures.
  496. //
  497. if len(systemServers) == 0 &&
  498. params.AlternateDNSServer == "" &&
  499. r.networkConfig.allowDefaultResolver() {
  500. IPs, err := defaultResolverLookupIP(ctx, hostname, r.networkConfig.LogHostnames)
  501. r.updateMetricDefaultResolver(err == nil)
  502. if err != nil {
  503. return nil, errors.Trace(err)
  504. }
  505. return IPs, err
  506. }
  507. // Consult the cache before making queries. This comes after the standard
  508. // library case, to allow the standard library to provide its own caching
  509. // logic.
  510. IPs := r.getCache(hostname)
  511. if IPs != nil {
  512. return IPs, nil
  513. }
  514. // Set the list of DNS servers to attempt. AlternateDNSServer is used
  515. // first when PreferAlternateDNSServer is set; otherwise
  516. // AlternateDNSServer is used only when there is no system DNS server.
  517. var servers []string
  518. if params.AlternateDNSServer != "" &&
  519. (len(systemServers) == 0 || params.PreferAlternateDNSServer) {
  520. servers = []string{params.AlternateDNSServer}
  521. }
  522. servers = append(servers, systemServers...)
  523. if len(servers) == 0 {
  524. return nil, errors.TraceNew("no DNS servers")
  525. }
  526. // Set the request timeout and set up a reusable timer for handling
  527. // request and await timeouts.
  528. //
  529. // We expect to always have a request timeout. Handle the unexpected no
  530. // timeout, 0, case by setting the longest timeout possible, ~290 years;
  531. // always having a non-zero timeout makes the following code marginally
  532. // simpler.
  533. requestTimeout := params.RequestTimeout
  534. if requestTimeout == 0 {
  535. requestTimeout = 1<<63 - 1
  536. }
  537. var timer *time.Timer
  538. timerDrained := true
  539. resetTimer := func(timeout time.Duration) {
  540. if timer == nil {
  541. timer = time.NewTimer(timeout)
  542. } else {
  543. if !timerDrained && !timer.Stop() {
  544. <-timer.C
  545. }
  546. timer.Reset(timeout)
  547. }
  548. timerDrained = false
  549. }
  550. // Orchestrate the DNS requests
  551. resolveCtx, cancelFunc := context.WithCancel(ctx)
  552. defer cancelFunc()
  553. waitGroup := new(sync.WaitGroup)
  554. conns := common.NewConns()
  555. type answer struct {
  556. attempt int
  557. IPs []net.IP
  558. TTLs []time.Duration
  559. }
  560. var maxAttempts int
  561. if params.PreferAlternateDNSServer {
  562. maxAttempts = params.AttemptsPerPreferredServer
  563. maxAttempts += (len(servers) - 1) * params.AttemptsPerServer
  564. } else {
  565. maxAttempts = len(servers) * params.AttemptsPerServer
  566. }
  567. answerChan := make(chan *answer, maxAttempts*2)
  568. inFlight := int64(0)
  569. awaitA := int32(1)
  570. awaitAAAA := int32(1)
  571. if !hasIPv6Route {
  572. awaitAAAA = 0
  573. }
  574. var result *answer
  575. var lastErr atomic.Value
  576. stop := false
  577. for i := 0; !stop && i < maxAttempts; i++ {
  578. var index int
  579. if params.PreferAlternateDNSServer {
  580. if i < params.AttemptsPerPreferredServer {
  581. index = 0
  582. } else {
  583. index = 1 + ((i - params.AttemptsPerPreferredServer) / params.AttemptsPerServer)
  584. }
  585. } else {
  586. index = i / params.AttemptsPerServer
  587. }
  588. server := servers[index]
  589. // Only the first attempt pair tries transforms, as it's not certain
  590. // the transforms will be compatible with DNS servers.
  591. useProtocolTransform := (i == 0 && params.ProtocolTransformSpec != nil)
  592. // Send A and AAAA requests concurrently.
  593. questionTypes := []resolverQuestionType{resolverQuestionTypeA, resolverQuestionTypeAAAA}
  594. if !hasIPv6Route {
  595. questionTypes = questionTypes[0:1]
  596. }
  597. for _, questionType := range questionTypes {
  598. waitGroup.Add(1)
  599. // For metrics, track peak concurrent in-flight requests for
  600. // a _single_ ResolveIP. inFlight for this ResolveIP is also used
  601. // to determine whether to await additional responses once the
  602. // first, valid response is received. For that logic to be
  603. // correct, we must increment inFlight in this outer goroutine to
  604. // ensure the await logic sees either inFlight > 0 or an answer
  605. // in the channel.
  606. r.updateMetricPeakInFlight(atomic.AddInt64(&inFlight, 1))
  607. go func(attempt int, questionType resolverQuestionType, useProtocolTransform bool) {
  608. defer waitGroup.Done()
  609. // We must decrement inFlight only after sending an answer and
  610. // setting awaitA or awaitAAAA to ensure that the await logic
  611. // in the outer goroutine will see inFlight 0 only once those
  612. // operations are complete.
  613. //
  614. // We cannot wait and decrement inFlight when the outer
  615. // goroutine receives answers, as no answer is sent in some
  616. // cases, such as when the resolve fails due to NXDOMAIN.
  617. defer atomic.AddInt64(&inFlight, -1)
  618. // The request count metric counts the _intention_ to send
  619. // requests, as there's a possibility that newResolverConn or
  620. // performDNSQuery fail locally before sending a request packet.
  621. switch questionType {
  622. case resolverQuestionTypeA:
  623. r.updateMetricRequestsIPv4()
  624. case resolverQuestionTypeAAAA:
  625. r.updateMetricRequestsIPv6()
  626. }
  627. // While it's possible, and potentially more optimal, to use
  628. // the same UDP socket for both the A and AAAA request, we
  629. // use a distinct socket per request, as common DNS clients do.
  630. conn, err := r.newResolverConn(r.networkConfig.logWarning, server)
  631. if err != nil {
  632. lastErr.Store(errors.Trace(err))
  633. return
  634. }
  635. defer conn.Close()
  636. // There's no context.Context support in the underlying API
  637. // used by performDNSQuery, so instead collect all the
  638. // request conns so that they can be closed, and any blocking
  639. // network I/O interrupted, below, if resolveCtx is done.
  640. if !conns.Add(conn) {
  641. // Add fails when conns is already closed.
  642. return
  643. }
  644. // performDNSQuery will send the request and read a response.
  645. // performDNSQuery will continue reading responses until it
  646. // receives a valid response, which can mitigate a subset of
  647. // DNS injection attacks (to the limited extent possible for
  648. // plaintext DNS).
  649. //
  650. // For IPv4, NXDOMAIN or a response with no IPs is not
  651. // expected for domains resolved by Psiphon, so
  652. // performDNSQuery treats such a response as invalid. For
  653. // IPv6, a response with no IPs, may be valid(even though the
  654. // response could be forged); the resolver will continue its
  655. // attempts loop if it has no other IPs.
  656. //
  657. // Each performDNSQuery has no timeout and runs
  658. // until it has read a valid response or the requestCtx is
  659. // done. This allows for slow arriving, valid responses to
  660. // eventually succeed, even if the read time exceeds
  661. // requestTimeout, as long as the read time is less than the
  662. // requestCtx timeout.
  663. //
  664. // With this approach, the overall ResolveIP call may have
  665. // more than 2 performDNSQuery requests in-flight at a time,
  666. // as requestTimeout is used to schedule sending the next
  667. // attempt but not cancel the current attempt. For
  668. // connectionless UDP, the resulting network traffic should
  669. // be similar to common DNS clients which do cancel request
  670. // before beginning the next attempt.
  671. IPs, TTLs, RTT, err := performDNSQuery(
  672. resolveCtx,
  673. r.networkConfig.logWarning,
  674. params,
  675. useProtocolTransform,
  676. conn,
  677. questionType,
  678. hostname)
  679. // Update the min/max RTT metric when reported (>=0) even if
  680. // the result is an error; i.e., the even if there was an
  681. // invalid response.
  682. //
  683. // Limitation: since individual requests aren't cancelled
  684. // after requestTimeout, RTT metrics won't reflect
  685. // no-response cases, although request and response count
  686. // disparities will still show up in the metrics.
  687. if RTT >= 0 {
  688. r.updateMetricRTT(RTT)
  689. }
  690. if err != nil {
  691. lastErr.Store(errors.Trace(err))
  692. return
  693. }
  694. if len(IPs) > 0 {
  695. select {
  696. case answerChan <- &answer{attempt: attempt, IPs: IPs, TTLs: TTLs}:
  697. default:
  698. }
  699. }
  700. // Mark no longer awaiting A or AAAA as long as there is a
  701. // valid response, even if there are no IPs in the IPv6 case.
  702. switch questionType {
  703. case resolverQuestionTypeA:
  704. r.updateMetricResponsesIPv4()
  705. atomic.StoreInt32(&awaitA, 0)
  706. case resolverQuestionTypeAAAA:
  707. r.updateMetricResponsesIPv6()
  708. atomic.StoreInt32(&awaitAAAA, 0)
  709. default:
  710. }
  711. }(i+1, questionType, useProtocolTransform)
  712. }
  713. resetTimer(requestTimeout)
  714. select {
  715. case result = <-answerChan:
  716. // When the first answer, a response with valid IPs, arrives, exit
  717. // the attempts loop. The following await branch may collect
  718. // additional answers.
  719. params.setFirstAttemptWithAnswer(result.attempt)
  720. stop = true
  721. case <-timer.C:
  722. // When requestTimeout arrives, loop around and launch the next
  723. // attempt; leave the existing requests running in case they
  724. // eventually respond.
  725. timerDrained = true
  726. case <-resolveCtx.Done():
  727. // When resolveCtx is done, exit the attempts loop.
  728. //
  729. // Append the existing lastErr, which may convey useful
  730. // information to be reported in a failed_tunnel error message.
  731. lastErr.Store(errors.Tracef("%v (lastErr: %v)", ctx.Err(), lastErr.Load()))
  732. stop = true
  733. }
  734. }
  735. // Receive any additional answers, now present in the channel, which
  736. // arrived concurrent with the first answer. This receive avoids a race
  737. // condition where inFlight may now be 0, with additional answers
  738. // enqueued, in which case the following await branch is not taken.
  739. //
  740. // It's possible for the attempts loop to exit with no received answer due
  741. // to timeouts or cancellation while, concurrently, an answer is sent to
  742. // the channel. In this case, when result == nil, we ignore the answers
  743. // and leave this as a failed resolve.
  744. if result != nil {
  745. for loop := true; loop; {
  746. select {
  747. case nextAnswer := <-answerChan:
  748. result.IPs = append(result.IPs, nextAnswer.IPs...)
  749. result.TTLs = append(result.TTLs, nextAnswer.TTLs...)
  750. default:
  751. loop = false
  752. }
  753. }
  754. }
  755. // When we have an answer, await -- for a short time,
  756. // params.AwaitTimeout -- extra answers from any remaining in-flight
  757. // requests. Only await if the request isn't cancelled and we don't
  758. // already have at least one IPv4 and one IPv6 response; only await AAAA
  759. // if it was sent; note that a valid AAAA response may include no IPs
  760. // lastErr is not set in timeout/cancelled cases here, since we already
  761. // have an answer.
  762. if result != nil &&
  763. resolveCtx.Err() == nil &&
  764. atomic.LoadInt64(&inFlight) > 0 &&
  765. (atomic.LoadInt32(&awaitA) != 0 || atomic.LoadInt32(&awaitAAAA) != 0) &&
  766. params.AwaitTimeout > 0 {
  767. resetTimer(params.AwaitTimeout)
  768. for {
  769. stop := false
  770. select {
  771. case nextAnswer := <-answerChan:
  772. result.IPs = append(result.IPs, nextAnswer.IPs...)
  773. result.TTLs = append(result.TTLs, nextAnswer.TTLs...)
  774. case <-timer.C:
  775. timerDrained = true
  776. stop = true
  777. case <-resolveCtx.Done():
  778. stop = true
  779. }
  780. if stop ||
  781. atomic.LoadInt64(&inFlight) == 0 ||
  782. (atomic.LoadInt32(&awaitA) == 0 && atomic.LoadInt32(&awaitAAAA) == 0) {
  783. break
  784. }
  785. }
  786. }
  787. if timer != nil {
  788. timer.Stop()
  789. }
  790. // Interrupt all workers.
  791. cancelFunc()
  792. conns.CloseAll()
  793. waitGroup.Wait()
  794. // When there's no answer, return the last error.
  795. if result == nil {
  796. err := lastErr.Load()
  797. if err == nil {
  798. err = errors.TraceNew("unexpected missing error")
  799. }
  800. if r.networkConfig.LogHostnames {
  801. err = fmt.Errorf("resolve %s : %w", hostname, err.(error))
  802. }
  803. return nil, errors.Trace(err.(error))
  804. }
  805. if len(result.IPs) == 0 {
  806. // Unexpected, since a len(IPs) > 0 check precedes sending to answerChan.
  807. return nil, errors.TraceNew("unexpected no IPs")
  808. }
  809. // Update the cache now, after all results are gathered.
  810. r.setCache(hostname, result.IPs, result.TTLs)
  811. return result.IPs, nil
  812. }
  813. // VerifyCacheExtension extends the TTL for any cached result for the
  814. // specified hostname to at least NetworkConfig.CacheExtensionVerifiedTTL.
  815. func (r *Resolver) VerifyCacheExtension(hostname string) {
  816. r.mutex.Lock()
  817. defer r.mutex.Unlock()
  818. if r.networkConfig.CacheExtensionVerifiedTTL == 0 {
  819. return
  820. }
  821. if net.ParseIP(hostname) != nil {
  822. return
  823. }
  824. entry, expires, ok := r.cache.GetWithExpiration(hostname)
  825. if !ok {
  826. return
  827. }
  828. // Change the TTL only if the entry expires and the existing TTL isn't
  829. // longer than the extension.
  830. neverExpires := time.Time{}
  831. if expires == neverExpires ||
  832. expires.After(time.Now().Add(r.networkConfig.CacheExtensionVerifiedTTL)) {
  833. return
  834. }
  835. r.cache.Set(hostname, entry, r.networkConfig.CacheExtensionVerifiedTTL)
  836. r.metrics.verifiedCacheExtensions += 1
  837. }
  838. // GetMetrics returns a summary of DNS metrics.
  839. func (r *Resolver) GetMetrics() string {
  840. r.mutex.Lock()
  841. defer r.mutex.Unlock()
  842. // When r.metrics.minRTT < 0, min/maxRTT is unset.
  843. minRTT := "n/a"
  844. maxRTT := minRTT
  845. if r.metrics.minRTT >= 0 {
  846. minRTT = fmt.Sprintf("%d", r.metrics.minRTT/time.Millisecond)
  847. maxRTT = fmt.Sprintf("%d", r.metrics.maxRTT/time.Millisecond)
  848. }
  849. extend := ""
  850. if r.networkConfig.CacheExtensionVerifiedTTL > 0 {
  851. extend = fmt.Sprintf("| extend %d ", r.metrics.verifiedCacheExtensions)
  852. }
  853. defaultResolves := ""
  854. if r.networkConfig.allowDefaultResolver() {
  855. defaultResolves = fmt.Sprintf(
  856. " | def %d/%d", r.metrics.defaultResolves, r.metrics.defaultSuccesses)
  857. }
  858. // Note that the number of system resolvers is a point-in-time value,
  859. // while the others are cumulative.
  860. return fmt.Sprintf("resolves %d | hit %d %s| req v4/v6 %d/%d | resp %d/%d | peak %d | rtt %s - %s ms. | sys %d%s",
  861. r.metrics.resolves,
  862. r.metrics.cacheHits,
  863. extend,
  864. r.metrics.requestsIPv4,
  865. r.metrics.requestsIPv6,
  866. r.metrics.responsesIPv4,
  867. r.metrics.responsesIPv6,
  868. r.metrics.peakInFlight,
  869. minRTT,
  870. maxRTT,
  871. len(r.systemServers),
  872. defaultResolves)
  873. }
  874. // updateNetworkState updates the system DNS server list, IPv6 state, and the
  875. // cache.
  876. //
  877. // Any errors that occur while querying network state are logged; in error
  878. // conditions the functionality of the resolver may be reduced, but the
  879. // resolver remains operational.
  880. func (r *Resolver) updateNetworkState(networkID string) {
  881. r.mutex.Lock()
  882. defer r.mutex.Unlock()
  883. // Only perform blocking/expensive update operations when necessary.
  884. updateAll := false
  885. updateIPv6Route := false
  886. updateServers := false
  887. flushCache := false
  888. // If r.cache is nil, this is the first update call in NewResolver. Create
  889. // the cache and perform all updates.
  890. if r.cache == nil {
  891. r.cache = lrucache.NewWithLRU(
  892. resolverCacheDefaultTTL,
  893. resolverCacheReapFrequency,
  894. resolverCacheMaxEntries)
  895. updateAll = true
  896. }
  897. // Perform all updates when the networkID has changed, which indicates a
  898. // different network.
  899. if r.networkID != networkID {
  900. updateAll = true
  901. }
  902. if updateAll {
  903. updateIPv6Route = true
  904. updateServers = true
  905. flushCache = true
  906. }
  907. // Even when the networkID has not changed, update DNS servers
  908. // periodically. This is similar to how other DNS clients
  909. // poll /etc/resolv.conf, including the period of 5s.
  910. if time.Since(r.lastServersUpdate) > resolverServersUpdateTTL {
  911. updateServers = true
  912. }
  913. // Update hasIPv6Route, which indicates whether the current network has an
  914. // IPv6 route and so if DNS requests for AAAA records will be sent.
  915. // There's no use for AAAA records on IPv4-only networks; and other
  916. // common DNS clients omit AAAA requests on IPv4-only records, so these
  917. // requests would otherwise be unusual.
  918. //
  919. // There's no hasIPv4Route as we always need to resolve A records,
  920. // particularly for IPv4-only endpoints; for IPv6-only networks,
  921. // NetworkConfig.IPv6Synthesize should be used to accomodate IPv4 DNS
  922. // server addresses, and dials performed outside the Resolver will
  923. // similarly use NAT 64 (on iOS; on Android, 464XLAT will handle this
  924. // transparently).
  925. if updateIPv6Route {
  926. if r.networkConfig.HasIPv6Route != nil {
  927. r.hasIPv6Route = r.networkConfig.HasIPv6Route()
  928. } else {
  929. hasIPv6Route, err := hasRoutableIPv6Interface()
  930. if err != nil {
  931. // Log warning and proceed without IPv6.
  932. r.networkConfig.logWarning(
  933. errors.Tracef("unable to determine IPv6 route: %v", err))
  934. hasIPv6Route = false
  935. }
  936. r.hasIPv6Route = hasIPv6Route
  937. }
  938. }
  939. // Update the list of system DNS servers. It's not an error condition here
  940. // if the list is empty: a subsequent ResolveIP may use
  941. // ResolveParameters which specifies an AlternateDNSServer.
  942. if updateServers && r.networkConfig.GetDNSServers != nil {
  943. systemServers := []string{}
  944. for _, systemServer := range r.networkConfig.GetDNSServers() {
  945. host, _, err := net.SplitHostPort(systemServer)
  946. if err != nil {
  947. // Assume the SplitHostPort error is due to systemServer being
  948. // an IP only, and append the default port, 53. If
  949. // systemServer _isn't_ an IP, the following ParseIP will fail.
  950. host = systemServer
  951. systemServer = net.JoinHostPort(systemServer, resolverDNSPort)
  952. }
  953. if net.ParseIP(host) == nil {
  954. // Log warning and proceed without this DNS server.
  955. r.networkConfig.logWarning(
  956. errors.TraceNew("invalid DNS server IP address"))
  957. continue
  958. }
  959. systemServers = append(systemServers, systemServer)
  960. }
  961. // Check if the list of servers has changed, including order. If
  962. // changed, flush the cache even if the networkID has not changed.
  963. // Cached results are only considered valid as long as the system DNS
  964. // configuration remains the same.
  965. equal := len(r.systemServers) == len(systemServers)
  966. if equal {
  967. for i := 0; i < len(r.systemServers); i++ {
  968. if r.systemServers[i] != systemServers[i] {
  969. equal = false
  970. break
  971. }
  972. }
  973. }
  974. flushCache = flushCache || !equal
  975. // Concurrency note: once the r.systemServers slice is set, the
  976. // contents of the backing array must not be modified due to
  977. // concurrent ResolveIP calls.
  978. r.systemServers = systemServers
  979. r.lastServersUpdate = time.Now()
  980. }
  981. // Skip cache flushes when the extended DNS caching mechanism is enabled.
  982. // TODO: retain only verified cache entries?
  983. if flushCache && r.networkConfig.CacheExtensionVerifiedTTL == 0 {
  984. r.cache.Flush()
  985. }
  986. // Set r.networkID only after all operations complete without errors; if
  987. // r.networkID were set earlier, a subsequent
  988. // ResolveIP/updateNetworkState call might proceed as if the network
  989. // state were updated for the specified network ID.
  990. r.networkID = networkID
  991. }
  992. func (r *Resolver) getNetworkState() (bool, []string) {
  993. r.mutex.Lock()
  994. defer r.mutex.Unlock()
  995. return r.hasIPv6Route, r.systemServers
  996. }
  997. func (r *Resolver) setCache(hostname string, IPs []net.IP, TTLs []time.Duration) {
  998. r.mutex.Lock()
  999. defer r.mutex.Unlock()
  1000. // The shortest TTL is used. In some cases, a DNS server may omit the TTL
  1001. // or set a 0 TTL, in which case the default is used.
  1002. TTL := resolverDefaultAnswerTTL
  1003. for _, answerTTL := range TTLs {
  1004. if answerTTL > 0 && answerTTL < TTL {
  1005. TTL = answerTTL
  1006. }
  1007. }
  1008. // When NetworkConfig.CacheExtensionInitialTTL configured, ensure the TTL
  1009. // is no shorter than CacheExtensionInitialTTL.
  1010. if r.networkConfig.CacheExtensionInitialTTL != 0 &&
  1011. TTL < r.networkConfig.CacheExtensionInitialTTL {
  1012. TTL = r.networkConfig.CacheExtensionInitialTTL
  1013. }
  1014. // Limitation: with concurrent ResolveIPs for the same domain, the last
  1015. // setCache call determines the cache value. The results are not merged.
  1016. r.cache.Set(hostname, IPs, TTL)
  1017. }
  1018. func (r *Resolver) getCache(hostname string) []net.IP {
  1019. r.mutex.Lock()
  1020. defer r.mutex.Unlock()
  1021. entry, ok := r.cache.Get(hostname)
  1022. if !ok {
  1023. return nil
  1024. }
  1025. r.metrics.cacheHits += 1
  1026. return entry.([]net.IP)
  1027. }
  1028. // newResolverConn creates a UDP socket that will send packets to serverAddr.
  1029. // serverAddr is an IP:port, which allows specifying the port for testing or
  1030. // in rare cases where the port isn't 53.
  1031. func (r *Resolver) newResolverConn(
  1032. logWarning func(error),
  1033. serverAddr string) (retConn net.Conn, retErr error) {
  1034. defer func() {
  1035. if retErr != nil {
  1036. logWarning(retErr)
  1037. }
  1038. }()
  1039. // When configured, attempt to synthesize an IPv6 address from
  1040. // an IPv4 address for compatibility on DNS64/NAT64 networks.
  1041. // If synthesize fails, try the original address.
  1042. if r.networkConfig.IPv6Synthesize != nil {
  1043. serverIPStr, port, err := net.SplitHostPort(serverAddr)
  1044. if err != nil {
  1045. return nil, errors.Trace(err)
  1046. }
  1047. serverIP := net.ParseIP(serverIPStr)
  1048. if serverIP != nil && serverIP.To4() != nil {
  1049. synthesized := r.networkConfig.IPv6Synthesize(serverIPStr)
  1050. if synthesized != "" && net.ParseIP(synthesized) != nil {
  1051. serverAddr = net.JoinHostPort(synthesized, port)
  1052. }
  1053. }
  1054. }
  1055. dialer := &net.Dialer{}
  1056. if r.networkConfig.BindToDevice != nil {
  1057. dialer.Control = func(_, _ string, c syscall.RawConn) error {
  1058. var controlErr error
  1059. err := c.Control(func(fd uintptr) {
  1060. _, err := r.networkConfig.BindToDevice(int(fd))
  1061. if err != nil {
  1062. controlErr = errors.Tracef("BindToDevice failed: %v", err)
  1063. return
  1064. }
  1065. })
  1066. if controlErr != nil {
  1067. return errors.Trace(controlErr)
  1068. }
  1069. return errors.Trace(err)
  1070. }
  1071. }
  1072. // context.Background is ok in this case as the UDP dial is just a local
  1073. // syscall to create the socket.
  1074. conn, err := dialer.DialContext(context.Background(), "udp", serverAddr)
  1075. if err != nil {
  1076. return nil, errors.Trace(err)
  1077. }
  1078. return conn, nil
  1079. }
  1080. func (r *Resolver) updateMetricResolves() {
  1081. r.mutex.Lock()
  1082. defer r.mutex.Unlock()
  1083. r.metrics.resolves += 1
  1084. }
  1085. func (r *Resolver) updateMetricRequestsIPv4() {
  1086. r.mutex.Lock()
  1087. defer r.mutex.Unlock()
  1088. r.metrics.requestsIPv4 += 1
  1089. }
  1090. func (r *Resolver) updateMetricRequestsIPv6() {
  1091. r.mutex.Lock()
  1092. defer r.mutex.Unlock()
  1093. r.metrics.requestsIPv6 += 1
  1094. }
  1095. func (r *Resolver) updateMetricResponsesIPv4() {
  1096. r.mutex.Lock()
  1097. defer r.mutex.Unlock()
  1098. r.metrics.responsesIPv4 += 1
  1099. }
  1100. func (r *Resolver) updateMetricResponsesIPv6() {
  1101. r.mutex.Lock()
  1102. defer r.mutex.Unlock()
  1103. r.metrics.responsesIPv6 += 1
  1104. }
  1105. func (r *Resolver) updateMetricDefaultResolver(success bool) {
  1106. r.mutex.Lock()
  1107. defer r.mutex.Unlock()
  1108. r.metrics.defaultResolves += 1
  1109. if success {
  1110. r.metrics.defaultSuccesses += 1
  1111. }
  1112. }
  1113. func (r *Resolver) updateMetricPeakInFlight(inFlight int64) {
  1114. r.mutex.Lock()
  1115. defer r.mutex.Unlock()
  1116. if inFlight > r.metrics.peakInFlight {
  1117. r.metrics.peakInFlight = inFlight
  1118. }
  1119. }
  1120. func (r *Resolver) updateMetricRTT(rtt time.Duration) {
  1121. r.mutex.Lock()
  1122. defer r.mutex.Unlock()
  1123. if rtt < 0 {
  1124. // Ignore invalid input.
  1125. return
  1126. }
  1127. // When r.metrics.minRTT < 0, min/maxRTT is unset.
  1128. if r.metrics.minRTT < 0 || rtt < r.metrics.minRTT {
  1129. r.metrics.minRTT = rtt
  1130. }
  1131. if rtt > r.metrics.maxRTT {
  1132. r.metrics.maxRTT = rtt
  1133. }
  1134. }
  1135. func hasRoutableIPv6Interface() (bool, error) {
  1136. interfaces, err := net.Interfaces()
  1137. if err != nil {
  1138. return false, errors.Trace(err)
  1139. }
  1140. for _, in := range interfaces {
  1141. if (in.Flags&net.FlagUp == 0) ||
  1142. (in.Flags&(net.FlagLoopback|net.FlagPointToPoint)) != 0 {
  1143. continue
  1144. }
  1145. addrs, err := in.Addrs()
  1146. if err != nil {
  1147. return false, errors.Trace(err)
  1148. }
  1149. for _, addr := range addrs {
  1150. if IPNet, ok := addr.(*net.IPNet); ok &&
  1151. IPNet.IP.To4() == nil &&
  1152. !IPNet.IP.IsLinkLocalUnicast() {
  1153. return true, nil
  1154. }
  1155. }
  1156. }
  1157. return false, nil
  1158. }
  1159. func generateIPAddressFromCIDR(CIDR string) (net.IP, error) {
  1160. _, IPNet, err := net.ParseCIDR(CIDR)
  1161. if err != nil {
  1162. return nil, errors.Trace(err)
  1163. }
  1164. // A retry is required, since a CIDR may include broadcast IPs (a.b.c.0) or
  1165. // other invalid values. The number of retries is limited to ensure we
  1166. // don't hang in the case of a misconfiguration.
  1167. for i := 0; i < 10; i++ {
  1168. randBytes := prng.Bytes(len(IPNet.IP))
  1169. IP := make(net.IP, len(IPNet.IP))
  1170. // The 1 bits in the mask must apply to the IP in the CIDR and the 0
  1171. // bits in the mask are available to randomize.
  1172. for i := 0; i < len(IP); i++ {
  1173. IP[i] = (IPNet.IP[i] & IPNet.Mask[i]) | (randBytes[i] & ^IPNet.Mask[i])
  1174. }
  1175. if IP.IsGlobalUnicast() && !common.IsBogon(IP) {
  1176. return IP, nil
  1177. }
  1178. }
  1179. return nil, errors.TraceNew("failed to generate random IP")
  1180. }
  1181. type resolverQuestionType int
  1182. const (
  1183. resolverQuestionTypeA = 0
  1184. resolverQuestionTypeAAAA = 1
  1185. )
  1186. func performDNSQuery(
  1187. resolveCtx context.Context,
  1188. logWarning func(error),
  1189. params *ResolveParameters,
  1190. useProtocolTransform bool,
  1191. conn net.Conn,
  1192. questionType resolverQuestionType,
  1193. hostname string) ([]net.IP, []time.Duration, time.Duration, error) {
  1194. if useProtocolTransform {
  1195. if params.ProtocolTransformSpec == nil ||
  1196. params.ProtocolTransformSeed == nil {
  1197. return nil, nil, -1, errors.TraceNew("invalid protocol transform configuration")
  1198. }
  1199. // miekg/dns expects conn to be a net.PacketConn or else it writes the
  1200. // TCP length prefix
  1201. udpConn, ok := conn.(*net.UDPConn)
  1202. if !ok {
  1203. return nil, nil, -1, errors.TraceNew("conn is not a *net.UDPConn")
  1204. }
  1205. conn = &transformDNSPacketConn{
  1206. UDPConn: udpConn,
  1207. transform: params.ProtocolTransformSpec,
  1208. seed: params.ProtocolTransformSeed,
  1209. }
  1210. }
  1211. // UDPSize sets the receive buffer to > 512, even when we don't include
  1212. // EDNS(0), which will mitigate issues with RFC 1035 non-compliant
  1213. // servers. See Go issue 51127.
  1214. dnsConn := &dns.Conn{
  1215. Conn: conn,
  1216. UDPSize: udpPacketBufferSize,
  1217. }
  1218. defer dnsConn.Close()
  1219. // SetQuestion initializes request.MsgHdr.Id to a random value
  1220. request := &dns.Msg{MsgHdr: dns.MsgHdr{RecursionDesired: true}}
  1221. switch questionType {
  1222. case resolverQuestionTypeA:
  1223. request.SetQuestion(dns.Fqdn(hostname), dns.TypeA)
  1224. case resolverQuestionTypeAAAA:
  1225. request.SetQuestion(dns.Fqdn(hostname), dns.TypeAAAA)
  1226. default:
  1227. return nil, nil, -1, errors.TraceNew("unknown DNS request question type")
  1228. }
  1229. if params.IncludeEDNS0 {
  1230. // miekg/dns: "RFC 6891, Section 6.1.1 allows the OPT record to appear
  1231. // anywhere in the additional record section, but it's usually at the
  1232. // end..."
  1233. request.SetEdns0(udpPacketBufferSize, false)
  1234. }
  1235. startTime := time.Now()
  1236. // Send the DNS request
  1237. dnsConn.WriteMsg(request)
  1238. // Read and process the DNS response
  1239. var IPs []net.IP
  1240. var TTLs []time.Duration
  1241. var lastErr error
  1242. RTT := time.Duration(-1)
  1243. for {
  1244. // Stop when resolveCtx is done; the caller, ResolveIP, will also
  1245. // close conn, which will interrupt a blocking dnsConn.ReadMsg.
  1246. if resolveCtx.Err() != nil {
  1247. // ResolveIP, which calls performDNSQuery, already records the
  1248. // context error (e.g., context timeout), so instead report
  1249. // lastErr, when present, as it may contain more useful
  1250. // information about why a response was rejected.
  1251. err := lastErr
  1252. if err == nil {
  1253. err = errors.Trace(resolveCtx.Err())
  1254. }
  1255. return nil, nil, RTT, err
  1256. }
  1257. // Read a response. RTT is the elapsed time between sending the
  1258. // request and reading the last received response.
  1259. response, err := dnsConn.ReadMsg()
  1260. RTT = time.Since(startTime)
  1261. if err == nil && response.MsgHdr.Id != request.MsgHdr.Id {
  1262. err = dns.ErrId
  1263. }
  1264. if err != nil {
  1265. // Try reading again, in case the first response packet failed to
  1266. // unmarshal or had an invalid ID. The Go resolver also does this;
  1267. // see Go issue 13281.
  1268. if resolveCtx.Err() == nil {
  1269. // Only log if resolveCtx is not done; otherwise the error could
  1270. // be due to conn being closed by ResolveIP.
  1271. lastErr = errors.Tracef("invalid response: %v", err)
  1272. logWarning(lastErr)
  1273. }
  1274. continue
  1275. }
  1276. // Check the RCode.
  1277. //
  1278. // For IPv4, we expect RCodeSuccess as Psiphon will typically only
  1279. // resolve domains that exist and have a valid IP (when this isn't
  1280. // the case, and we retry, the overall ResolveIP and its parent dial
  1281. // will still abort after resolveCtx is done, or RequestTimeout
  1282. // expires for maxAttempts).
  1283. //
  1284. // For IPv6, we should also expect RCodeSuccess even if there is no
  1285. // AAAA record, as long as the domain exists and has an A record.
  1286. // However, per RFC 6147 section 5.1.2, we may receive
  1287. // NXDOMAIN: "...some servers respond with RCODE=3 to a AAAA query
  1288. // even if there is an A record available for that owner name. Those
  1289. // servers are in clear violation of the meaning of RCODE 3...". In
  1290. // this case, we coalesce NXDOMAIN into success to treat the response
  1291. // the same as success with no AAAA record.
  1292. //
  1293. // All other RCodes, which are unexpected, lead to a read retry.
  1294. if response.MsgHdr.Rcode != dns.RcodeSuccess &&
  1295. !(questionType == resolverQuestionTypeAAAA && response.MsgHdr.Rcode == dns.RcodeNameError) {
  1296. errMsg, ok := dns.RcodeToString[response.MsgHdr.Rcode]
  1297. if !ok {
  1298. errMsg = fmt.Sprintf("Rcode: %d", response.MsgHdr.Rcode)
  1299. }
  1300. lastErr = errors.Tracef("unexpected RCode: %v", errMsg)
  1301. logWarning(lastErr)
  1302. continue
  1303. }
  1304. // Extract all IP answers, along with corresponding TTLs for caching.
  1305. // Perform additional validation, which may lead to another read
  1306. // retry. However, if _any_ valid IP is found, stop reading and
  1307. // return that result. Again, the validation is only best effort.
  1308. checkFailed := false
  1309. for _, answer := range response.Answer {
  1310. haveAnswer := false
  1311. var IP net.IP
  1312. var TTLSec uint32
  1313. switch questionType {
  1314. case resolverQuestionTypeA:
  1315. if a, ok := answer.(*dns.A); ok {
  1316. IP = a.A
  1317. TTLSec = a.Hdr.Ttl
  1318. haveAnswer = true
  1319. }
  1320. case resolverQuestionTypeAAAA:
  1321. if aaaa, ok := answer.(*dns.AAAA); ok {
  1322. IP = aaaa.AAAA
  1323. TTLSec = aaaa.Hdr.Ttl
  1324. haveAnswer = true
  1325. }
  1326. }
  1327. if !haveAnswer {
  1328. continue
  1329. }
  1330. err := checkDNSAnswerIP(IP)
  1331. if err != nil {
  1332. checkFailed = true
  1333. lastErr = errors.Tracef("invalid IP: %v", err)
  1334. logWarning(lastErr)
  1335. // Check the next answer
  1336. continue
  1337. }
  1338. IPs = append(IPs, IP)
  1339. TTLs = append(TTLs, time.Duration(TTLSec)*time.Second)
  1340. }
  1341. // For IPv4, an IP is expected, as noted in the comment above.
  1342. //
  1343. // In potential cases where we resolve a domain that has only an IPv6
  1344. // address, the concurrent AAAA request will deliver its result to
  1345. // ResolveIP, and that answer will be selected, so only the "await"
  1346. // logic will delay the parent dial in that case.
  1347. if questionType == resolverQuestionTypeA && len(IPs) == 0 && !checkFailed {
  1348. checkFailed = true
  1349. lastErr = errors.TraceNew("unexpected empty A response")
  1350. logWarning(lastErr)
  1351. }
  1352. // Retry if there are no valid IPs and any error; if no error, this
  1353. // may be a valid AAAA response with no IPs, in which case return the
  1354. // result.
  1355. if len(IPs) == 0 && checkFailed {
  1356. continue
  1357. }
  1358. return IPs, TTLs, RTT, nil
  1359. }
  1360. }
  1361. func checkDNSAnswerIP(IP net.IP) error {
  1362. if IP == nil {
  1363. return errors.TraceNew("IP is nil")
  1364. }
  1365. // Limitation: this could still be a phony/injected response, it's not
  1366. // possible to verify with plaintext DNS, but a "bogon" IP is clearly
  1367. // invalid.
  1368. if common.IsBogon(IP) {
  1369. return errors.TraceNew("IP is bogon")
  1370. }
  1371. // Create a temporary socket bound to the destination IP. This checks
  1372. // thats the local host has a route to this IP. If not, we'll reject the
  1373. // IP. This prevents selecting an IP which is guaranteed to fail to dial.
  1374. // Use UDP as this results in no network traffic; the destination port is
  1375. // arbitrary. The Go resolver performs a similar operation.
  1376. //
  1377. // Limitations:
  1378. // - We may cache the IP and reuse it without checking routability again;
  1379. // the cache should be flushed when network state changes.
  1380. // - Given that the AAAA is requested only when the host has an IPv6
  1381. // route, we don't expect this to often fail with a _valid_ response.
  1382. // However, this remains a possibility and in this case,
  1383. // performDNSQuery will keep awaiting a response which can trigger
  1384. // the "await" logic.
  1385. conn, err := net.DialUDP("udp", nil, &net.UDPAddr{IP: IP, Port: 443})
  1386. if err != nil {
  1387. return errors.Trace(err)
  1388. }
  1389. conn.Close()
  1390. return nil
  1391. }
  1392. func defaultResolverLookupIP(
  1393. ctx context.Context, hostname string, logHostnames bool) ([]net.IP, error) {
  1394. addrs, err := net.DefaultResolver.LookupIPAddr(ctx, hostname)
  1395. if err != nil && !logHostnames {
  1396. // Remove domain names from "net" error messages.
  1397. err = common.RedactNetError(err)
  1398. }
  1399. if err != nil {
  1400. return nil, errors.Trace(err)
  1401. }
  1402. ips := make([]net.IP, len(addrs))
  1403. for i, addr := range addrs {
  1404. ips[i] = addr.IP
  1405. }
  1406. return ips, nil
  1407. }
  1408. // transformDNSPacketConn wraps a *net.UDPConn, intercepting Write calls and
  1409. // applying the specified protocol transform.
  1410. //
  1411. // As transforms operate on strings and DNS requests are binary, the transform
  1412. // should be expressed using hex characters. The DNS packet to be written
  1413. // (input the Write) is converted to hex, transformed, and converted back to
  1414. // binary and then actually written to the UDP socket.
  1415. type transformDNSPacketConn struct {
  1416. *net.UDPConn
  1417. transform transforms.Spec
  1418. seed *prng.Seed
  1419. }
  1420. func (conn *transformDNSPacketConn) Write(b []byte) (int, error) {
  1421. // Limitation: there is no check that a transformed packet remains within
  1422. // the network packet MTU.
  1423. input := hex.EncodeToString(b)
  1424. output, err := conn.transform.ApplyString(conn.seed, input)
  1425. if err != nil {
  1426. return 0, errors.Trace(err)
  1427. }
  1428. packet, err := hex.DecodeString(output)
  1429. if err != nil {
  1430. return 0, errors.Trace(err)
  1431. }
  1432. _, err = conn.UDPConn.Write(packet)
  1433. if err != nil {
  1434. // In the error case, don't report bytes written as the number could
  1435. // exceed the pre-transform length.
  1436. return 0, errors.Trace(err)
  1437. }
  1438. // Report the pre-transform length as bytes written, as the caller may check
  1439. // that the requested len(b) bytes were written.
  1440. return len(b), nil
  1441. }