resolver.go 55 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707
  1. /*
  2. * Copyright (c) 2022, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. // Package resolver implements a DNS stub resolver, or DNS client, which
  20. // resolves domain names.
  21. //
  22. // The resolver is Psiphon-specific and oriented towards blocking resistance.
  23. // See ResolveIP for more details.
  24. package resolver
  25. import (
  26. "context"
  27. "encoding/hex"
  28. "fmt"
  29. "net"
  30. "sync"
  31. "sync/atomic"
  32. "syscall"
  33. "time"
  34. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  35. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
  36. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/parameters"
  37. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
  38. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/transforms"
  39. lrucache "github.com/cognusion/go-cache-lru"
  40. "github.com/miekg/dns"
  41. )
  42. const (
  43. resolverCacheDefaultTTL = 1 * time.Minute
  44. resolverCacheReapFrequency = 1 * time.Minute
  45. resolverCacheMaxEntries = 10000
  46. resolverServersUpdateTTL = 5 * time.Second
  47. resolverDefaultAttemptsPerServer = 2
  48. resolverDefaultRequestTimeout = 5 * time.Second
  49. resolverDefaultAwaitTimeout = 10 * time.Millisecond
  50. resolverDefaultAnswerTTL = 1 * time.Minute
  51. resolverDNSPort = "53"
  52. udpPacketBufferSize = 1232
  53. )
  54. // NetworkConfig specifies network-level configuration for a Resolver.
  55. type NetworkConfig struct {
  56. // GetDNSServers returns a list of system DNS server addresses (IP:port, or
  57. // IP only with port 53 assumed), as determined via OS APIs, in priority
  58. // order. GetDNSServers may be nil.
  59. GetDNSServers func() []string
  60. // BindToDevice should ensure the input file descriptor, a UDP socket, is
  61. // excluded from VPN routing. BindToDevice may be nil.
  62. BindToDevice func(fd int) (string, error)
  63. // AllowDefaultResolverWithBindToDevice indicates that it's safe to use
  64. // the default resolver when BindToDevice is configured, as the host OS
  65. // will automatically exclude DNS requests from the VPN.
  66. AllowDefaultResolverWithBindToDevice bool
  67. // IPv6Synthesize should apply NAT64 synthesis to the input IPv4 address,
  68. // returning a synthesized IPv6 address that will route to the same
  69. // endpoint. IPv6Synthesize may be nil.
  70. IPv6Synthesize func(IPv4 string) string
  71. // HasIPv6Route should return true when the host has an IPv6 route.
  72. // Resolver has an internal implementation, hasRoutableIPv6Interface, to
  73. // determine this, but it can fail on some platforms ("route ip+net:
  74. // netlinkrib: permission denied" on Android, for example; see Go issue
  75. // 40569). When HasIPv6Route is nil, the internal implementation is used.
  76. HasIPv6Route func() bool
  77. // LogWarning is an optional callback which is used to log warnings and
  78. // transient errors which would otherwise not be recorded or returned.
  79. LogWarning func(error)
  80. // LogHostnames indicates whether to log hostname in errors or not.
  81. LogHostnames bool
  82. // CacheExtensionInitialTTL specifies a minimum TTL to use when caching
  83. // domain resolution results. This minimum will override any TTL in the
  84. // DNS response. CacheExtensionInitialTTL is off when 0.
  85. CacheExtensionInitialTTL time.Duration
  86. // CacheExtensionVerifiedTTL specifies the minimum TTL to set for a cached
  87. // domain resolution result after the result has been verified.
  88. // CacheExtensionVerifiedTTL is off when 0.
  89. //
  90. // DNS cache extension is a workaround to partially mitigate issues with
  91. // obtaining underlying system DNS server IPs on platforms such as iOS
  92. // once a VPN is running and after network changes, such as changing from
  93. // Wi-Fi to mobile. While ResolveParameters.AlternateDNSServer can be
  94. // used to specify a known public DNS server, it may be the case that
  95. // public DNS servers are blocked or always falling back to a public DNS
  96. // server creates unusual traffic. And while it may be possible to use
  97. // the default system resolver, it lacks certain circumvention
  98. // capabilities.
  99. //
  100. // Extending the TTL for cached responses allows Psiphon to redial domains
  101. // using recently successful IPs.
  102. //
  103. // CacheExtensionInitialTTL allows for a greater initial minimum TTL, so
  104. // that the response entry remains in the cache long enough for a dial to
  105. // fully complete and verify the endpoint. Psiphon will call
  106. // Resolver.VerifyExtendCacheTTL once a dial has authenticated, for
  107. // example, the destination Psiphon server. VerifyCacheExtension will
  108. // further extend the corresponding TTL to CacheExtensionVerifiedTTL, a
  109. // longer TTL. CacheExtensionInitialTTL is intended to be on the order of
  110. // minutes and CacheExtensionVerifiedTTL may be on the order of hours.
  111. //
  112. // When CacheExtensionVerifiedTTL is on, the DNS cache is not flushed on
  113. // network changes, to allow for the previously cached entries to remain
  114. // available in the problematic scenario. Like adjusting TTLs, this is an
  115. // explicit trade-off which doesn't adhere to standard best practise, but
  116. // is expected to be more blocking resistent; this approach also assumes
  117. // that endpoints such as CDN IPs are typically available on any network.
  118. CacheExtensionVerifiedTTL time.Duration
  119. }
  120. func (c *NetworkConfig) allowDefaultResolver() bool {
  121. // When BindToDevice is configured, the standard library resolver is not
  122. // used, as the system resolver may not route outside of the VPN.
  123. return c.BindToDevice == nil || c.AllowDefaultResolverWithBindToDevice
  124. }
  125. func (c *NetworkConfig) logWarning(err error) {
  126. if c.LogWarning != nil {
  127. c.LogWarning(err)
  128. }
  129. }
  130. // ResolveParameters specifies the configuration and behavior of a single
  131. // ResolveIP call, a single domain name resolution.
  132. //
  133. // New ResolveParameters may be generated by calling MakeResolveParameters,
  134. // which takes tactics parameters as an input.
  135. //
  136. // ResolveParameters may be persisted for replay.
  137. type ResolveParameters struct {
  138. // AttemptsPerServer specifies how many requests to send to each DNS
  139. // server before trying the next server. IPv4 and IPv6 requests are sent
  140. // concurrently and count as one attempt.
  141. AttemptsPerServer int
  142. // AttemptsPerPreferredServer is AttemptsPerServer for a preferred
  143. // alternate DNS server.
  144. AttemptsPerPreferredServer int
  145. // RequestTimeout specifies how long to wait for a valid response before
  146. // moving on to the next attempt.
  147. RequestTimeout time.Duration
  148. // AwaitTimeout specifies how long to await an additional response after
  149. // the first response is received. This additional wait time applies only
  150. // when there is either no IPv4 or IPv6 response.
  151. AwaitTimeout time.Duration
  152. // PreresolvedIPAddress specifies an IP address result to be used in place
  153. // of making a request.
  154. PreresolvedIPAddress string
  155. // PreresolvedDomain is the domain for which PreresolvedIPAddress is to be
  156. // used.
  157. PreresolvedDomain string
  158. // AlternateDNSServer specifies an alterate DNS server (IP:port, or IP
  159. // only with port 53 assumed) to be used when either no system DNS
  160. // servers are available or when PreferAlternateDNSServer is set.
  161. AlternateDNSServer string
  162. // PreferAlternateDNSServer indicates whether to prioritize using the
  163. // AlternateDNSServer. When set, the AlternateDNSServer is attempted
  164. // before any system DNS servers.
  165. PreferAlternateDNSServer bool
  166. // ProtocolTransformName specifies the name associated with
  167. // ProtocolTransformSpec and is used for metrics.
  168. ProtocolTransformName string
  169. // ProtocolTransformSpec specifies a transform to apply to the DNS request packet.
  170. // See: "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/transforms".
  171. //
  172. // As transforms operate on strings and DNS requests are binary,
  173. // transforms should be expressed using hex characters.
  174. //
  175. // DNS transforms include strategies discovered by the Geneva team,
  176. // https://geneva.cs.umd.edu.
  177. ProtocolTransformSpec transforms.Spec
  178. // ProtocolTransformSeed specifies the seed to use for generating random
  179. // data in the ProtocolTransformSpec transform. To replay a transform,
  180. // specify the same seed.
  181. ProtocolTransformSeed *prng.Seed
  182. // IncludeEDNS0 indicates whether to include the EDNS(0) UDP maximum
  183. // response size extension in DNS requests. The resolver can handle
  184. // responses larger than 512 bytes (RFC 1035 maximum) regardless of
  185. // whether the extension is included; the extension may be included as
  186. // part of appearing similar to other DNS traffic.
  187. IncludeEDNS0 bool
  188. firstAttemptWithAnswer int32
  189. }
  190. // GetFirstAttemptWithAnswer returns the index of the first request attempt
  191. // that received a valid response, for the most recent ResolveIP call using
  192. // this ResolveParameters. This information is used for logging metrics. The
  193. // first attempt has index 1. GetFirstAttemptWithAnswer return 0 when no
  194. // request attempt has reported a valid response.
  195. //
  196. // The caller is responsible for synchronizing use of a ResolveParameters
  197. // instance (e.g, use a distinct ResolveParameters per ResolveIP to ensure
  198. // GetFirstAttemptWithAnswer refers to a specific ResolveIP).
  199. func (r *ResolveParameters) GetFirstAttemptWithAnswer() int {
  200. return int(atomic.LoadInt32(&r.firstAttemptWithAnswer))
  201. }
  202. func (r *ResolveParameters) setFirstAttemptWithAnswer(attempt int) {
  203. atomic.StoreInt32(&r.firstAttemptWithAnswer, int32(attempt))
  204. }
  205. // Implementation note: Go's standard net.Resolver supports specifying a
  206. // custom Dial function. This could be used to implement at least a large
  207. // subset of the Resolver functionality on top of Go's standard library
  208. // resolver. However, net.Resolver is limited to using the CGO resolver on
  209. // Android, https://github.com/golang/go/issues/8877, in which case the
  210. // custom Dial function is not used. Furthermore, the the pure Go resolver in
  211. // net/dnsclient_unix.go appears to not be used on Windows at this time.
  212. //
  213. // Go also provides golang.org/x/net/dns/dnsmessage, a DNS message marshaller,
  214. // which could potentially be used in place of github.com/miekg/dns.
  215. // Resolver is a DNS stub resolver, or DNS client, which resolves domain
  216. // names. A Resolver instance maintains a cache, a network state snapshot,
  217. // and metrics. All ResolveIP calls will share the same cache and state.
  218. // Multiple concurrent ResolveIP calls are supported.
  219. type Resolver struct {
  220. networkConfig *NetworkConfig
  221. mutex sync.Mutex
  222. networkID string
  223. hasIPv6Route bool
  224. systemServers []string
  225. lastServersUpdate time.Time
  226. cache *lrucache.Cache
  227. metrics resolverMetrics
  228. }
  229. type resolverMetrics struct {
  230. resolves int
  231. cacheHits int
  232. verifiedCacheExtensions int
  233. requestsIPv4 int
  234. requestsIPv6 int
  235. responsesIPv4 int
  236. responsesIPv6 int
  237. defaultResolves int
  238. defaultSuccesses int
  239. peakInFlight int64
  240. minRTT time.Duration
  241. maxRTT time.Duration
  242. }
  243. func newResolverMetrics() resolverMetrics {
  244. return resolverMetrics{minRTT: -1}
  245. }
  246. // NewResolver creates a new Resolver instance.
  247. func NewResolver(networkConfig *NetworkConfig, networkID string) *Resolver {
  248. r := &Resolver{
  249. networkConfig: networkConfig,
  250. metrics: newResolverMetrics(),
  251. }
  252. // updateNetworkState will initialize the cache and network state,
  253. // including system DNS servers.
  254. r.updateNetworkState(networkID)
  255. return r
  256. }
  257. // Stop clears the Resolver cache and resets metrics. Stop must be called only
  258. // after ceasing all in-flight ResolveIP goroutines, or else the cache or
  259. // metrics may repopulate. A Resolver may be resumed after calling Stop, but
  260. // Update must be called first.
  261. func (r *Resolver) Stop() {
  262. r.mutex.Lock()
  263. defer r.mutex.Unlock()
  264. // r.networkConfig is not set to nil to avoid possible nil pointer
  265. // dereferences by concurrent ResolveIP calls.
  266. r.networkID = ""
  267. r.hasIPv6Route = false
  268. r.systemServers = nil
  269. r.cache.Flush()
  270. r.metrics = newResolverMetrics()
  271. }
  272. // MakeResolveParameters generates ResolveParameters using the input tactics
  273. // parameters and optional frontingProviderID context.
  274. func (r *Resolver) MakeResolveParameters(
  275. p parameters.ParametersAccessor,
  276. frontingProviderID string,
  277. frontingDialDomain string) (*ResolveParameters, error) {
  278. params := &ResolveParameters{
  279. AttemptsPerServer: p.Int(parameters.DNSResolverAttemptsPerServer),
  280. AttemptsPerPreferredServer: p.Int(parameters.DNSResolverAttemptsPerPreferredServer),
  281. RequestTimeout: p.Duration(parameters.DNSResolverRequestTimeout),
  282. AwaitTimeout: p.Duration(parameters.DNSResolverAwaitTimeout),
  283. }
  284. // When a frontingProviderID is specified, generate a pre-resolved IP
  285. // address, based on tactics configuration.
  286. if frontingProviderID != "" {
  287. if frontingDialDomain == "" {
  288. return nil, errors.TraceNew("missing fronting dial domain")
  289. }
  290. if p.WeightedCoinFlip(parameters.DNSResolverPreresolvedIPAddressProbability) {
  291. CIDRs := p.LabeledCIDRs(parameters.DNSResolverPreresolvedIPAddressCIDRs, frontingProviderID)
  292. if len(CIDRs) > 0 {
  293. CIDR := CIDRs[prng.Intn(len(CIDRs))]
  294. IP, err := generateIPAddressFromCIDR(CIDR)
  295. if err != nil {
  296. return nil, errors.Trace(err)
  297. }
  298. params.PreresolvedIPAddress = IP.String()
  299. params.PreresolvedDomain = frontingDialDomain
  300. }
  301. }
  302. }
  303. // When preferring an alternate DNS server, select the alternate from
  304. // DNSResolverPreferredAlternateServers. This list is for circumvention
  305. // operations, such as using a public DNS server with a protocol
  306. // transform. Otherwise, select from DNSResolverAlternateServers, which
  307. // is a fallback list of DNS servers to be used when the system DNS
  308. // servers cannot be obtained.
  309. preferredServers := p.Strings(parameters.DNSResolverPreferredAlternateServers)
  310. preferAlternateDNSServer := len(preferredServers) > 0 && p.WeightedCoinFlip(
  311. parameters.DNSResolverPreferAlternateServerProbability)
  312. alternateServers := preferredServers
  313. if !preferAlternateDNSServer {
  314. alternateServers = p.Strings(parameters.DNSResolverAlternateServers)
  315. }
  316. // Select an alternate DNS server, typically a public DNS server. Ensure
  317. // tactics is configured with an empty DNSResolverAlternateServers list
  318. // in cases where attempts to public DNS server are unwanted.
  319. if len(alternateServers) > 0 {
  320. alternateServer := alternateServers[prng.Intn(len(alternateServers))]
  321. // Check that the alternateServer has a well-formed IP address; and add
  322. // a default port if none it present.
  323. host, _, err := net.SplitHostPort(alternateServer)
  324. if err != nil {
  325. // Assume the SplitHostPort error is due to missing port.
  326. host = alternateServer
  327. alternateServer = net.JoinHostPort(alternateServer, resolverDNSPort)
  328. }
  329. if net.ParseIP(host) == nil {
  330. // Log warning and proceed without this DNS server.
  331. r.networkConfig.logWarning(
  332. errors.TraceNew("invalid alternate DNS server IP address"))
  333. } else {
  334. params.AlternateDNSServer = alternateServer
  335. params.PreferAlternateDNSServer = preferAlternateDNSServer
  336. }
  337. }
  338. // Select a DNS transform. DNS request transforms are "scoped" by
  339. // alternate DNS server (IP address without port); that is, when an
  340. // alternate DNS server is certain to be attempted first, a transform
  341. // associated with and known to work with that DNS server will be
  342. // selected. Otherwise, a transform from the default scope
  343. // (transforms.SCOPE_ANY == "") is selected.
  344. //
  345. // In any case, ResolveIP will only apply a transform on the first request
  346. // attempt.
  347. if p.WeightedCoinFlip(parameters.DNSResolverProtocolTransformProbability) {
  348. specs := p.ProtocolTransformSpecs(
  349. parameters.DNSResolverProtocolTransformSpecs)
  350. scopedSpecNames := p.ProtocolTransformScopedSpecNames(
  351. parameters.DNSResolverProtocolTransformScopedSpecNames)
  352. // The alternate DNS server will be the first attempt if
  353. // PreferAlternateDNSServer or the list of system DNS servers is empty.
  354. //
  355. // Limitation: the system DNS server list may change, due to a later
  356. // Resolver.update call when ResolveIP is called with these
  357. // ResolveParameters.
  358. _, systemServers := r.getNetworkState()
  359. scope := transforms.SCOPE_ANY
  360. if params.AlternateDNSServer != "" &&
  361. (params.PreferAlternateDNSServer || len(systemServers) == 0) {
  362. // Remove the port number, as the scope key is an IP address only.
  363. //
  364. // TODO: when we only just added the default port above, which is
  365. // the common case, we could avoid this extra split.
  366. host, _, err := net.SplitHostPort(params.AlternateDNSServer)
  367. if err != nil {
  368. return nil, errors.Trace(err)
  369. }
  370. scope = host
  371. }
  372. name, spec := specs.Select(scope, scopedSpecNames)
  373. if spec != nil {
  374. params.ProtocolTransformName = name
  375. params.ProtocolTransformSpec = spec
  376. var err error
  377. params.ProtocolTransformSeed, err = prng.NewSeed()
  378. if err != nil {
  379. return nil, errors.Trace(err)
  380. }
  381. }
  382. }
  383. if p.WeightedCoinFlip(parameters.DNSResolverIncludeEDNS0Probability) {
  384. params.IncludeEDNS0 = true
  385. }
  386. return params, nil
  387. }
  388. // ResolveAddress splits the input host:port address, calls ResolveIP to
  389. // resolve the IP address of the host, selects an IP if there are multiple,
  390. // and returns a rejoined IP:port.
  391. //
  392. // IP address selection is random. When network input is set
  393. // to "ip4"/"tcp4"/"udp4" or "ip6"/"tcp6"/"udp6", selection is limited to
  394. // IPv4 or IPv6, respectively.
  395. func (r *Resolver) ResolveAddress(
  396. ctx context.Context,
  397. networkID string,
  398. params *ResolveParameters,
  399. network, address string) (string, error) {
  400. hostname, port, err := net.SplitHostPort(address)
  401. if err != nil {
  402. return "", errors.Trace(err)
  403. }
  404. IPs, err := r.ResolveIP(ctx, networkID, params, hostname)
  405. if err != nil {
  406. return "", errors.Trace(err)
  407. }
  408. copyIPs := append([]net.IP(nil), IPs...)
  409. prng.Shuffle(len(copyIPs), func(i, j int) {
  410. copyIPs[i], copyIPs[j] = copyIPs[j], copyIPs[i]
  411. })
  412. index := 0
  413. switch network {
  414. case "ip4", "tcp4", "udp4":
  415. index = -1
  416. for i, IP := range IPs {
  417. if IP.To4() != nil {
  418. index = i
  419. break
  420. }
  421. }
  422. case "ip6", "tcp6", "udp6":
  423. index = -1
  424. for i, IP := range IPs {
  425. if IP.To4() == nil {
  426. index = i
  427. break
  428. }
  429. }
  430. }
  431. if index == -1 {
  432. return "", errors.TraceNew("no IP for network")
  433. }
  434. return net.JoinHostPort(IPs[index].String(), port), nil
  435. }
  436. // ResolveIP resolves a domain name.
  437. //
  438. // The input params may be nil, in which case default timeouts are used.
  439. //
  440. // ResolveIP performs concurrent A and AAAA lookups, returns any valid
  441. // response IPs, and caches results. An error is returned when there are
  442. // no valid response IPs.
  443. //
  444. // ResolveIP is not a general purpose resolver and is Psiphon-specific. For
  445. // example, resolved domains are expected to exist; ResolveIP does not
  446. // fallback to TCP; does not consult any "hosts" file; does not perform RFC
  447. // 3484 sorting logic (see Go issue 18518); only implements a subset of
  448. // Go/glibc/resolv.conf(5) resolver parameters (attempts and timeouts, but
  449. // not rotate, single-request etc.) ResolveIP does not implement singleflight
  450. // logic, as the Go resolver does, and allows multiple concurrent request for
  451. // the same domain -- Psiphon won't often resolve the exact same domain
  452. // multiple times concurrently, and, when it does, there's a circumvention
  453. // benefit to attempting different DNS servers and protocol transforms.
  454. //
  455. // ResolveIP does not currently support DoT, DoH, or TCP; those protocols are
  456. // often blocked or less common. Instead, ResolveIP makes a best effort to
  457. // evade plaintext UDP DNS interference by ignoring invalid responses and by
  458. // optionally applying protocol transforms that may evade blocking.
  459. //
  460. // Due to internal caching, the caller must not mutate returned net.IP slice
  461. // or entries.
  462. func (r *Resolver) ResolveIP(
  463. ctx context.Context,
  464. networkID string,
  465. params *ResolveParameters,
  466. hostname string) ([]net.IP, error) {
  467. // ResolveIP does _not_ lock r.mutex for the lifetime of the function, to
  468. // ensure many ResolveIP calls can run concurrently.
  469. // If the hostname is already an IP address, just return that. For
  470. // metrics, this does not count as a resolve, as the caller may invoke
  471. // ResolveIP for all dials.
  472. IP := net.ParseIP(hostname)
  473. if IP != nil {
  474. return []net.IP{IP}, nil
  475. }
  476. // Count all resolves of an actual domain, including cached and
  477. // pre-resolved cases.
  478. r.updateMetricResolves()
  479. // Call updateNetworkState immediately before resolving, as a best effort
  480. // to ensure that system DNS servers and IPv6 routing network state
  481. // reflects the current network. updateNetworkState locks the Resolver
  482. // mutex for its duration, and so concurrent ResolveIP calls may block at
  483. // this point. However, all updateNetworkState operations are local to
  484. // the host or device; and, if the networkID is unchanged since the last
  485. // call, updateNetworkState may not perform any operations; and after the
  486. // updateNetworkState call, ResolveIP proceeds without holding the mutex
  487. // lock. As a result, this step should not prevent ResolveIP concurrency.
  488. r.updateNetworkState(networkID)
  489. if params == nil {
  490. // Supply default ResolveParameters
  491. params = &ResolveParameters{
  492. AttemptsPerServer: resolverDefaultAttemptsPerServer,
  493. AttemptsPerPreferredServer: resolverDefaultAttemptsPerServer,
  494. RequestTimeout: resolverDefaultRequestTimeout,
  495. AwaitTimeout: resolverDefaultAwaitTimeout,
  496. }
  497. }
  498. // When PreresolvedIPAddress is set, tactics parameters determined the IP address
  499. // in this case.
  500. if params.PreresolvedIPAddress != "" && params.PreresolvedDomain == hostname {
  501. IP := net.ParseIP(params.PreresolvedIPAddress)
  502. if IP == nil {
  503. // Unexpected case, as MakeResolveParameters selects the IP address.
  504. return nil, errors.TraceNew("invalid IP address")
  505. }
  506. return []net.IP{IP}, nil
  507. }
  508. // Use a snapshot of the current network state, including IPv6 routing and
  509. // system DNS servers.
  510. //
  511. // Limitation: these values are used even if the network changes in the
  512. // middle of a ResolveIP call; ResolveIP is not interrupted if the
  513. // network changes.
  514. hasIPv6Route, systemServers := r.getNetworkState()
  515. // Use the standard library resolver when there's no GetDNSServers, or the
  516. // system server list is otherwise empty, and no alternate DNS server is
  517. // configured.
  518. //
  519. // Note that in the case where there are no system DNS servers and there
  520. // is an AlternateDNSServer, if the AlternateDNSServer attempt fails,
  521. // control does not flow back to defaultResolverLookupIP. On platforms
  522. // without GetDNSServers, the caller must arrange for distinct attempts
  523. // that try a AlternateDNSServer, or just use the standard library
  524. // resolver.
  525. //
  526. // ResolveIP should always be called, even when defaultResolverLookupIP is
  527. // expected to be used, to ensure correct metrics counts and ensure a
  528. // consistent error message log stack for all DNS-related failures.
  529. //
  530. if len(systemServers) == 0 &&
  531. params.AlternateDNSServer == "" &&
  532. r.networkConfig.allowDefaultResolver() {
  533. IPs, err := defaultResolverLookupIP(ctx, hostname, r.networkConfig.LogHostnames)
  534. r.updateMetricDefaultResolver(err == nil)
  535. if err != nil {
  536. return nil, errors.Trace(err)
  537. }
  538. return IPs, err
  539. }
  540. // Consult the cache before making queries. This comes after the standard
  541. // library case, to allow the standard library to provide its own caching
  542. // logic.
  543. IPs := r.getCache(hostname)
  544. if IPs != nil {
  545. return IPs, nil
  546. }
  547. // Set the list of DNS servers to attempt. AlternateDNSServer is used
  548. // first when PreferAlternateDNSServer is set; otherwise
  549. // AlternateDNSServer is used only when there is no system DNS server.
  550. var servers []string
  551. if params.AlternateDNSServer != "" &&
  552. (len(systemServers) == 0 || params.PreferAlternateDNSServer) {
  553. servers = []string{params.AlternateDNSServer}
  554. }
  555. servers = append(servers, systemServers...)
  556. if len(servers) == 0 {
  557. return nil, errors.TraceNew("no DNS servers")
  558. }
  559. // Set the request timeout and set up a reusable timer for handling
  560. // request and await timeouts.
  561. //
  562. // We expect to always have a request timeout. Handle the unexpected no
  563. // timeout, 0, case by setting the longest timeout possible, ~290 years;
  564. // always having a non-zero timeout makes the following code marginally
  565. // simpler.
  566. requestTimeout := params.RequestTimeout
  567. if requestTimeout == 0 {
  568. requestTimeout = 1<<63 - 1
  569. }
  570. var timer *time.Timer
  571. timerDrained := true
  572. resetTimer := func(timeout time.Duration) {
  573. if timer == nil {
  574. timer = time.NewTimer(timeout)
  575. } else {
  576. if !timerDrained && !timer.Stop() {
  577. <-timer.C
  578. }
  579. timer.Reset(timeout)
  580. }
  581. timerDrained = false
  582. }
  583. // Orchestrate the DNS requests
  584. resolveCtx, cancelFunc := context.WithCancel(ctx)
  585. defer cancelFunc()
  586. waitGroup := new(sync.WaitGroup)
  587. conns := common.NewConns()
  588. type answer struct {
  589. attempt int
  590. IPs []net.IP
  591. TTLs []time.Duration
  592. }
  593. var maxAttempts int
  594. if params.PreferAlternateDNSServer {
  595. maxAttempts = params.AttemptsPerPreferredServer
  596. maxAttempts += (len(servers) - 1) * params.AttemptsPerServer
  597. } else {
  598. maxAttempts = len(servers) * params.AttemptsPerServer
  599. }
  600. answerChan := make(chan *answer, maxAttempts*2)
  601. inFlight := int64(0)
  602. awaitA := int32(1)
  603. awaitAAAA := int32(1)
  604. if !hasIPv6Route {
  605. awaitAAAA = 0
  606. }
  607. var result *answer
  608. var lastErr atomic.Value
  609. stop := false
  610. for i := 0; !stop && i < maxAttempts; i++ {
  611. var index int
  612. if params.PreferAlternateDNSServer {
  613. if i < params.AttemptsPerPreferredServer {
  614. index = 0
  615. } else {
  616. index = 1 + ((i - params.AttemptsPerPreferredServer) / params.AttemptsPerServer)
  617. }
  618. } else {
  619. index = i / params.AttemptsPerServer
  620. }
  621. server := servers[index]
  622. // Only the first attempt pair tries transforms, as it's not certain
  623. // the transforms will be compatible with DNS servers.
  624. useProtocolTransform := (i == 0 && params.ProtocolTransformSpec != nil)
  625. // Send A and AAAA requests concurrently.
  626. questionTypes := []resolverQuestionType{resolverQuestionTypeA, resolverQuestionTypeAAAA}
  627. if !hasIPv6Route {
  628. questionTypes = questionTypes[0:1]
  629. }
  630. for _, questionType := range questionTypes {
  631. waitGroup.Add(1)
  632. // For metrics, track peak concurrent in-flight requests for
  633. // a _single_ ResolveIP. inFlight for this ResolveIP is also used
  634. // to determine whether to await additional responses once the
  635. // first, valid response is received. For that logic to be
  636. // correct, we must increment inFlight in this outer goroutine to
  637. // ensure the await logic sees either inFlight > 0 or an answer
  638. // in the channel.
  639. r.updateMetricPeakInFlight(atomic.AddInt64(&inFlight, 1))
  640. go func(attempt int, questionType resolverQuestionType, useProtocolTransform bool) {
  641. defer waitGroup.Done()
  642. // We must decrement inFlight only after sending an answer and
  643. // setting awaitA or awaitAAAA to ensure that the await logic
  644. // in the outer goroutine will see inFlight 0 only once those
  645. // operations are complete.
  646. //
  647. // We cannot wait and decrement inFlight when the outer
  648. // goroutine receives answers, as no answer is sent in some
  649. // cases, such as when the resolve fails due to NXDOMAIN.
  650. defer atomic.AddInt64(&inFlight, -1)
  651. // The request count metric counts the _intention_ to send
  652. // requests, as there's a possibility that newResolverConn or
  653. // performDNSQuery fail locally before sending a request packet.
  654. switch questionType {
  655. case resolverQuestionTypeA:
  656. r.updateMetricRequestsIPv4()
  657. case resolverQuestionTypeAAAA:
  658. r.updateMetricRequestsIPv6()
  659. }
  660. // While it's possible, and potentially more optimal, to use
  661. // the same UDP socket for both the A and AAAA request, we
  662. // use a distinct socket per request, as common DNS clients do.
  663. conn, err := r.newResolverConn(r.networkConfig.logWarning, server)
  664. if err != nil {
  665. lastErr.Store(errors.Trace(err))
  666. return
  667. }
  668. defer conn.Close()
  669. // There's no context.Context support in the underlying API
  670. // used by performDNSQuery, so instead collect all the
  671. // request conns so that they can be closed, and any blocking
  672. // network I/O interrupted, below, if resolveCtx is done.
  673. if !conns.Add(conn) {
  674. // Add fails when conns is already closed.
  675. return
  676. }
  677. // performDNSQuery will send the request and read a response.
  678. // performDNSQuery will continue reading responses until it
  679. // receives a valid response, which can mitigate a subset of
  680. // DNS injection attacks (to the limited extent possible for
  681. // plaintext DNS).
  682. //
  683. // For IPv4, NXDOMAIN or a response with no IPs is not
  684. // expected for domains resolved by Psiphon, so
  685. // performDNSQuery treats such a response as invalid. For
  686. // IPv6, a response with no IPs, may be valid(even though the
  687. // response could be forged); the resolver will continue its
  688. // attempts loop if it has no other IPs.
  689. //
  690. // Each performDNSQuery has no timeout and runs
  691. // until it has read a valid response or the requestCtx is
  692. // done. This allows for slow arriving, valid responses to
  693. // eventually succeed, even if the read time exceeds
  694. // requestTimeout, as long as the read time is less than the
  695. // requestCtx timeout.
  696. //
  697. // With this approach, the overall ResolveIP call may have
  698. // more than 2 performDNSQuery requests in-flight at a time,
  699. // as requestTimeout is used to schedule sending the next
  700. // attempt but not cancel the current attempt. For
  701. // connectionless UDP, the resulting network traffic should
  702. // be similar to common DNS clients which do cancel request
  703. // before beginning the next attempt.
  704. IPs, TTLs, RTT, err := performDNSQuery(
  705. resolveCtx,
  706. r.networkConfig.logWarning,
  707. params,
  708. useProtocolTransform,
  709. conn,
  710. questionType,
  711. hostname)
  712. // Update the min/max RTT metric when reported (>=0) even if
  713. // the result is an error; i.e., the even if there was an
  714. // invalid response.
  715. //
  716. // Limitation: since individual requests aren't cancelled
  717. // after requestTimeout, RTT metrics won't reflect
  718. // no-response cases, although request and response count
  719. // disparities will still show up in the metrics.
  720. if RTT >= 0 {
  721. r.updateMetricRTT(RTT)
  722. }
  723. if err != nil {
  724. lastErr.Store(errors.Trace(err))
  725. return
  726. }
  727. if len(IPs) > 0 {
  728. select {
  729. case answerChan <- &answer{attempt: attempt, IPs: IPs, TTLs: TTLs}:
  730. default:
  731. }
  732. }
  733. // Mark no longer awaiting A or AAAA as long as there is a
  734. // valid response, even if there are no IPs in the IPv6 case.
  735. switch questionType {
  736. case resolverQuestionTypeA:
  737. r.updateMetricResponsesIPv4()
  738. atomic.StoreInt32(&awaitA, 0)
  739. case resolverQuestionTypeAAAA:
  740. r.updateMetricResponsesIPv6()
  741. atomic.StoreInt32(&awaitAAAA, 0)
  742. default:
  743. }
  744. }(i+1, questionType, useProtocolTransform)
  745. }
  746. resetTimer(requestTimeout)
  747. select {
  748. case result = <-answerChan:
  749. // When the first answer, a response with valid IPs, arrives, exit
  750. // the attempts loop. The following await branch may collect
  751. // additional answers.
  752. params.setFirstAttemptWithAnswer(result.attempt)
  753. stop = true
  754. case <-timer.C:
  755. // When requestTimeout arrives, loop around and launch the next
  756. // attempt; leave the existing requests running in case they
  757. // eventually respond.
  758. timerDrained = true
  759. case <-resolveCtx.Done():
  760. // When resolveCtx is done, exit the attempts loop.
  761. //
  762. // Append the existing lastErr, which may convey useful
  763. // information to be reported in a failed_tunnel error message.
  764. lastErr.Store(errors.Tracef("%v (lastErr: %v)", ctx.Err(), lastErr.Load()))
  765. stop = true
  766. }
  767. }
  768. // Receive any additional answers, now present in the channel, which
  769. // arrived concurrent with the first answer. This receive avoids a race
  770. // condition where inFlight may now be 0, with additional answers
  771. // enqueued, in which case the following await branch is not taken.
  772. //
  773. // It's possible for the attempts loop to exit with no received answer due
  774. // to timeouts or cancellation while, concurrently, an answer is sent to
  775. // the channel. In this case, when result == nil, we ignore the answers
  776. // and leave this as a failed resolve.
  777. if result != nil {
  778. for loop := true; loop; {
  779. select {
  780. case nextAnswer := <-answerChan:
  781. result.IPs = append(result.IPs, nextAnswer.IPs...)
  782. result.TTLs = append(result.TTLs, nextAnswer.TTLs...)
  783. default:
  784. loop = false
  785. }
  786. }
  787. }
  788. // When we have an answer, await -- for a short time,
  789. // params.AwaitTimeout -- extra answers from any remaining in-flight
  790. // requests. Only await if the request isn't cancelled and we don't
  791. // already have at least one IPv4 and one IPv6 response; only await AAAA
  792. // if it was sent; note that a valid AAAA response may include no IPs
  793. // lastErr is not set in timeout/cancelled cases here, since we already
  794. // have an answer.
  795. if result != nil &&
  796. resolveCtx.Err() == nil &&
  797. atomic.LoadInt64(&inFlight) > 0 &&
  798. (atomic.LoadInt32(&awaitA) != 0 || atomic.LoadInt32(&awaitAAAA) != 0) &&
  799. params.AwaitTimeout > 0 {
  800. resetTimer(params.AwaitTimeout)
  801. for {
  802. stop := false
  803. select {
  804. case nextAnswer := <-answerChan:
  805. result.IPs = append(result.IPs, nextAnswer.IPs...)
  806. result.TTLs = append(result.TTLs, nextAnswer.TTLs...)
  807. case <-timer.C:
  808. timerDrained = true
  809. stop = true
  810. case <-resolveCtx.Done():
  811. stop = true
  812. }
  813. if stop ||
  814. atomic.LoadInt64(&inFlight) == 0 ||
  815. (atomic.LoadInt32(&awaitA) == 0 && atomic.LoadInt32(&awaitAAAA) == 0) {
  816. break
  817. }
  818. }
  819. }
  820. if timer != nil {
  821. timer.Stop()
  822. }
  823. // Interrupt all workers.
  824. cancelFunc()
  825. conns.CloseAll()
  826. waitGroup.Wait()
  827. // When there's no answer, return the last error.
  828. if result == nil {
  829. err := lastErr.Load()
  830. if err == nil {
  831. err = errors.TraceNew("unexpected missing error")
  832. }
  833. if r.networkConfig.LogHostnames {
  834. err = fmt.Errorf("resolve %s : %w", hostname, err.(error))
  835. }
  836. return nil, errors.Trace(err.(error))
  837. }
  838. if len(result.IPs) == 0 {
  839. // Unexpected, since a len(IPs) > 0 check precedes sending to answerChan.
  840. return nil, errors.TraceNew("unexpected no IPs")
  841. }
  842. // Update the cache now, after all results are gathered.
  843. r.setCache(hostname, result.IPs, result.TTLs)
  844. return result.IPs, nil
  845. }
  846. // VerifyCacheExtension extends the TTL for any cached result for the
  847. // specified hostname to at least NetworkConfig.CacheExtensionVerifiedTTL.
  848. func (r *Resolver) VerifyCacheExtension(hostname string) {
  849. r.mutex.Lock()
  850. defer r.mutex.Unlock()
  851. if r.networkConfig.CacheExtensionVerifiedTTL == 0 {
  852. return
  853. }
  854. if net.ParseIP(hostname) != nil {
  855. return
  856. }
  857. entry, expires, ok := r.cache.GetWithExpiration(hostname)
  858. if !ok {
  859. return
  860. }
  861. // Change the TTL only if the entry expires and the existing TTL isn't
  862. // longer than the extension.
  863. neverExpires := time.Time{}
  864. if expires == neverExpires ||
  865. expires.After(time.Now().Add(r.networkConfig.CacheExtensionVerifiedTTL)) {
  866. return
  867. }
  868. r.cache.Set(hostname, entry, r.networkConfig.CacheExtensionVerifiedTTL)
  869. r.metrics.verifiedCacheExtensions += 1
  870. }
  871. // GetMetrics returns a summary of DNS metrics.
  872. func (r *Resolver) GetMetrics() string {
  873. r.mutex.Lock()
  874. defer r.mutex.Unlock()
  875. // When r.metrics.minRTT < 0, min/maxRTT is unset.
  876. minRTT := "n/a"
  877. maxRTT := minRTT
  878. if r.metrics.minRTT >= 0 {
  879. minRTT = fmt.Sprintf("%d", r.metrics.minRTT/time.Millisecond)
  880. maxRTT = fmt.Sprintf("%d", r.metrics.maxRTT/time.Millisecond)
  881. }
  882. extend := ""
  883. if r.networkConfig.CacheExtensionVerifiedTTL > 0 {
  884. extend = fmt.Sprintf("| extend %d ", r.metrics.verifiedCacheExtensions)
  885. }
  886. defaultResolves := ""
  887. if r.networkConfig.allowDefaultResolver() {
  888. defaultResolves = fmt.Sprintf(
  889. " | def %d/%d", r.metrics.defaultResolves, r.metrics.defaultSuccesses)
  890. }
  891. // Note that the number of system resolvers is a point-in-time value,
  892. // while the others are cumulative.
  893. return fmt.Sprintf("resolves %d | hit %d %s| req v4/v6 %d/%d | resp %d/%d | peak %d | rtt %s - %s ms. | sys %d%s",
  894. r.metrics.resolves,
  895. r.metrics.cacheHits,
  896. extend,
  897. r.metrics.requestsIPv4,
  898. r.metrics.requestsIPv6,
  899. r.metrics.responsesIPv4,
  900. r.metrics.responsesIPv6,
  901. r.metrics.peakInFlight,
  902. minRTT,
  903. maxRTT,
  904. len(r.systemServers),
  905. defaultResolves)
  906. }
  907. // updateNetworkState updates the system DNS server list, IPv6 state, and the
  908. // cache.
  909. //
  910. // Any errors that occur while querying network state are logged; in error
  911. // conditions the functionality of the resolver may be reduced, but the
  912. // resolver remains operational.
  913. func (r *Resolver) updateNetworkState(networkID string) {
  914. r.mutex.Lock()
  915. defer r.mutex.Unlock()
  916. // Only perform blocking/expensive update operations when necessary.
  917. updateAll := false
  918. updateIPv6Route := false
  919. updateServers := false
  920. flushCache := false
  921. // If r.cache is nil, this is the first update call in NewResolver. Create
  922. // the cache and perform all updates.
  923. if r.cache == nil {
  924. r.cache = lrucache.NewWithLRU(
  925. resolverCacheDefaultTTL,
  926. resolverCacheReapFrequency,
  927. resolverCacheMaxEntries)
  928. updateAll = true
  929. }
  930. // Perform all updates when the networkID has changed, which indicates a
  931. // different network.
  932. if r.networkID != networkID {
  933. updateAll = true
  934. }
  935. if updateAll {
  936. updateIPv6Route = true
  937. updateServers = true
  938. flushCache = true
  939. }
  940. // Even when the networkID has not changed, update DNS servers
  941. // periodically. This is similar to how other DNS clients
  942. // poll /etc/resolv.conf, including the period of 5s.
  943. if time.Since(r.lastServersUpdate) > resolverServersUpdateTTL {
  944. updateServers = true
  945. }
  946. // Update hasIPv6Route, which indicates whether the current network has an
  947. // IPv6 route and so if DNS requests for AAAA records will be sent.
  948. // There's no use for AAAA records on IPv4-only networks; and other
  949. // common DNS clients omit AAAA requests on IPv4-only records, so these
  950. // requests would otherwise be unusual.
  951. //
  952. // There's no hasIPv4Route as we always need to resolve A records,
  953. // particularly for IPv4-only endpoints; for IPv6-only networks,
  954. // NetworkConfig.IPv6Synthesize should be used to accomodate IPv4 DNS
  955. // server addresses, and dials performed outside the Resolver will
  956. // similarly use NAT 64 (on iOS; on Android, 464XLAT will handle this
  957. // transparently).
  958. if updateIPv6Route {
  959. if r.networkConfig.HasIPv6Route != nil {
  960. r.hasIPv6Route = r.networkConfig.HasIPv6Route()
  961. } else {
  962. hasIPv6Route, err := hasRoutableIPv6Interface()
  963. if err != nil {
  964. // Log warning and proceed without IPv6.
  965. r.networkConfig.logWarning(
  966. errors.Tracef("unable to determine IPv6 route: %v", err))
  967. hasIPv6Route = false
  968. }
  969. r.hasIPv6Route = hasIPv6Route
  970. }
  971. }
  972. // Update the list of system DNS servers. It's not an error condition here
  973. // if the list is empty: a subsequent ResolveIP may use
  974. // ResolveParameters which specifies an AlternateDNSServer.
  975. if updateServers && r.networkConfig.GetDNSServers != nil {
  976. systemServers := []string{}
  977. for _, systemServer := range r.networkConfig.GetDNSServers() {
  978. host, _, err := net.SplitHostPort(systemServer)
  979. if err != nil {
  980. // Assume the SplitHostPort error is due to systemServer being
  981. // an IP only, and append the default port, 53. If
  982. // systemServer _isn't_ an IP, the following ParseIP will fail.
  983. host = systemServer
  984. systemServer = net.JoinHostPort(systemServer, resolverDNSPort)
  985. }
  986. if net.ParseIP(host) == nil {
  987. // Log warning and proceed without this DNS server.
  988. r.networkConfig.logWarning(
  989. errors.TraceNew("invalid DNS server IP address"))
  990. continue
  991. }
  992. systemServers = append(systemServers, systemServer)
  993. }
  994. // Check if the list of servers has changed, including order. If
  995. // changed, flush the cache even if the networkID has not changed.
  996. // Cached results are only considered valid as long as the system DNS
  997. // configuration remains the same.
  998. equal := len(r.systemServers) == len(systemServers)
  999. if equal {
  1000. for i := 0; i < len(r.systemServers); i++ {
  1001. if r.systemServers[i] != systemServers[i] {
  1002. equal = false
  1003. break
  1004. }
  1005. }
  1006. }
  1007. flushCache = flushCache || !equal
  1008. // Concurrency note: once the r.systemServers slice is set, the
  1009. // contents of the backing array must not be modified due to
  1010. // concurrent ResolveIP calls.
  1011. r.systemServers = systemServers
  1012. r.lastServersUpdate = time.Now()
  1013. }
  1014. // Skip cache flushes when the extended DNS caching mechanism is enabled.
  1015. // TODO: retain only verified cache entries?
  1016. if flushCache && r.networkConfig.CacheExtensionVerifiedTTL == 0 {
  1017. r.cache.Flush()
  1018. }
  1019. // Set r.networkID only after all operations complete without errors; if
  1020. // r.networkID were set earlier, a subsequent
  1021. // ResolveIP/updateNetworkState call might proceed as if the network
  1022. // state were updated for the specified network ID.
  1023. r.networkID = networkID
  1024. }
  1025. func (r *Resolver) getNetworkState() (bool, []string) {
  1026. r.mutex.Lock()
  1027. defer r.mutex.Unlock()
  1028. return r.hasIPv6Route, r.systemServers
  1029. }
  1030. func (r *Resolver) setCache(hostname string, IPs []net.IP, TTLs []time.Duration) {
  1031. r.mutex.Lock()
  1032. defer r.mutex.Unlock()
  1033. // The shortest TTL is used. In some cases, a DNS server may omit the TTL
  1034. // or set a 0 TTL, in which case the default is used.
  1035. TTL := resolverDefaultAnswerTTL
  1036. for _, answerTTL := range TTLs {
  1037. if answerTTL > 0 && answerTTL < TTL {
  1038. TTL = answerTTL
  1039. }
  1040. }
  1041. // When NetworkConfig.CacheExtensionInitialTTL configured, ensure the TTL
  1042. // is no shorter than CacheExtensionInitialTTL.
  1043. if r.networkConfig.CacheExtensionInitialTTL != 0 &&
  1044. TTL < r.networkConfig.CacheExtensionInitialTTL {
  1045. TTL = r.networkConfig.CacheExtensionInitialTTL
  1046. }
  1047. // Limitation: with concurrent ResolveIPs for the same domain, the last
  1048. // setCache call determines the cache value. The results are not merged.
  1049. r.cache.Set(hostname, IPs, TTL)
  1050. }
  1051. func (r *Resolver) getCache(hostname string) []net.IP {
  1052. r.mutex.Lock()
  1053. defer r.mutex.Unlock()
  1054. entry, ok := r.cache.Get(hostname)
  1055. if !ok {
  1056. return nil
  1057. }
  1058. r.metrics.cacheHits += 1
  1059. return entry.([]net.IP)
  1060. }
  1061. // newResolverConn creates a UDP socket that will send packets to serverAddr.
  1062. // serverAddr is an IP:port, which allows specifying the port for testing or
  1063. // in rare cases where the port isn't 53.
  1064. func (r *Resolver) newResolverConn(
  1065. logWarning func(error),
  1066. serverAddr string) (retConn net.Conn, retErr error) {
  1067. defer func() {
  1068. if retErr != nil {
  1069. logWarning(retErr)
  1070. }
  1071. }()
  1072. // When configured, attempt to synthesize an IPv6 address from
  1073. // an IPv4 address for compatibility on DNS64/NAT64 networks.
  1074. // If synthesize fails, try the original address.
  1075. if r.networkConfig.IPv6Synthesize != nil {
  1076. serverIPStr, port, err := net.SplitHostPort(serverAddr)
  1077. if err != nil {
  1078. return nil, errors.Trace(err)
  1079. }
  1080. serverIP := net.ParseIP(serverIPStr)
  1081. if serverIP != nil && serverIP.To4() != nil {
  1082. synthesized := r.networkConfig.IPv6Synthesize(serverIPStr)
  1083. if synthesized != "" && net.ParseIP(synthesized) != nil {
  1084. serverAddr = net.JoinHostPort(synthesized, port)
  1085. }
  1086. }
  1087. }
  1088. dialer := &net.Dialer{}
  1089. if r.networkConfig.BindToDevice != nil {
  1090. dialer.Control = func(_, _ string, c syscall.RawConn) error {
  1091. var controlErr error
  1092. err := c.Control(func(fd uintptr) {
  1093. _, err := r.networkConfig.BindToDevice(int(fd))
  1094. if err != nil {
  1095. controlErr = errors.Tracef("BindToDevice failed: %v", err)
  1096. return
  1097. }
  1098. })
  1099. if controlErr != nil {
  1100. return errors.Trace(controlErr)
  1101. }
  1102. return errors.Trace(err)
  1103. }
  1104. }
  1105. // context.Background is ok in this case as the UDP dial is just a local
  1106. // syscall to create the socket.
  1107. conn, err := dialer.DialContext(context.Background(), "udp", serverAddr)
  1108. if err != nil {
  1109. return nil, errors.Trace(err)
  1110. }
  1111. return conn, nil
  1112. }
  1113. func (r *Resolver) updateMetricResolves() {
  1114. r.mutex.Lock()
  1115. defer r.mutex.Unlock()
  1116. r.metrics.resolves += 1
  1117. }
  1118. func (r *Resolver) updateMetricRequestsIPv4() {
  1119. r.mutex.Lock()
  1120. defer r.mutex.Unlock()
  1121. r.metrics.requestsIPv4 += 1
  1122. }
  1123. func (r *Resolver) updateMetricRequestsIPv6() {
  1124. r.mutex.Lock()
  1125. defer r.mutex.Unlock()
  1126. r.metrics.requestsIPv6 += 1
  1127. }
  1128. func (r *Resolver) updateMetricResponsesIPv4() {
  1129. r.mutex.Lock()
  1130. defer r.mutex.Unlock()
  1131. r.metrics.responsesIPv4 += 1
  1132. }
  1133. func (r *Resolver) updateMetricResponsesIPv6() {
  1134. r.mutex.Lock()
  1135. defer r.mutex.Unlock()
  1136. r.metrics.responsesIPv6 += 1
  1137. }
  1138. func (r *Resolver) updateMetricDefaultResolver(success bool) {
  1139. r.mutex.Lock()
  1140. defer r.mutex.Unlock()
  1141. r.metrics.defaultResolves += 1
  1142. if success {
  1143. r.metrics.defaultSuccesses += 1
  1144. }
  1145. }
  1146. func (r *Resolver) updateMetricPeakInFlight(inFlight int64) {
  1147. r.mutex.Lock()
  1148. defer r.mutex.Unlock()
  1149. if inFlight > r.metrics.peakInFlight {
  1150. r.metrics.peakInFlight = inFlight
  1151. }
  1152. }
  1153. func (r *Resolver) updateMetricRTT(rtt time.Duration) {
  1154. r.mutex.Lock()
  1155. defer r.mutex.Unlock()
  1156. if rtt < 0 {
  1157. // Ignore invalid input.
  1158. return
  1159. }
  1160. // When r.metrics.minRTT < 0, min/maxRTT is unset.
  1161. if r.metrics.minRTT < 0 || rtt < r.metrics.minRTT {
  1162. r.metrics.minRTT = rtt
  1163. }
  1164. if rtt > r.metrics.maxRTT {
  1165. r.metrics.maxRTT = rtt
  1166. }
  1167. }
  1168. func hasRoutableIPv6Interface() (bool, error) {
  1169. interfaces, err := net.Interfaces()
  1170. if err != nil {
  1171. return false, errors.Trace(err)
  1172. }
  1173. for _, in := range interfaces {
  1174. if (in.Flags&net.FlagUp == 0) ||
  1175. // Note: don't exclude interfaces with the net.FlagPointToPoint
  1176. // flag, which is set for certain mobile networks
  1177. (in.Flags&net.FlagLoopback != 0) {
  1178. continue
  1179. }
  1180. addrs, err := in.Addrs()
  1181. if err != nil {
  1182. return false, errors.Trace(err)
  1183. }
  1184. for _, addr := range addrs {
  1185. if IPNet, ok := addr.(*net.IPNet); ok &&
  1186. IPNet.IP.To4() == nil &&
  1187. !IPNet.IP.IsLinkLocalUnicast() {
  1188. return true, nil
  1189. }
  1190. }
  1191. }
  1192. return false, nil
  1193. }
  1194. func generateIPAddressFromCIDR(CIDR string) (net.IP, error) {
  1195. _, IPNet, err := net.ParseCIDR(CIDR)
  1196. if err != nil {
  1197. return nil, errors.Trace(err)
  1198. }
  1199. // A retry is required, since a CIDR may include broadcast IPs (a.b.c.0) or
  1200. // other invalid values. The number of retries is limited to ensure we
  1201. // don't hang in the case of a misconfiguration.
  1202. for i := 0; i < 10; i++ {
  1203. randBytes := prng.Bytes(len(IPNet.IP))
  1204. IP := make(net.IP, len(IPNet.IP))
  1205. // The 1 bits in the mask must apply to the IP in the CIDR and the 0
  1206. // bits in the mask are available to randomize.
  1207. for i := 0; i < len(IP); i++ {
  1208. IP[i] = (IPNet.IP[i] & IPNet.Mask[i]) | (randBytes[i] & ^IPNet.Mask[i])
  1209. }
  1210. if IP.IsGlobalUnicast() && !common.IsBogon(IP) {
  1211. return IP, nil
  1212. }
  1213. }
  1214. return nil, errors.TraceNew("failed to generate random IP")
  1215. }
  1216. type resolverQuestionType int
  1217. const (
  1218. resolverQuestionTypeA = 0
  1219. resolverQuestionTypeAAAA = 1
  1220. )
  1221. func performDNSQuery(
  1222. resolveCtx context.Context,
  1223. logWarning func(error),
  1224. params *ResolveParameters,
  1225. useProtocolTransform bool,
  1226. conn net.Conn,
  1227. questionType resolverQuestionType,
  1228. hostname string) ([]net.IP, []time.Duration, time.Duration, error) {
  1229. if useProtocolTransform {
  1230. if params.ProtocolTransformSpec == nil ||
  1231. params.ProtocolTransformSeed == nil {
  1232. return nil, nil, -1, errors.TraceNew("invalid protocol transform configuration")
  1233. }
  1234. // miekg/dns expects conn to be a net.PacketConn or else it writes the
  1235. // TCP length prefix
  1236. udpConn, ok := conn.(*net.UDPConn)
  1237. if !ok {
  1238. return nil, nil, -1, errors.TraceNew("conn is not a *net.UDPConn")
  1239. }
  1240. conn = &transformDNSPacketConn{
  1241. UDPConn: udpConn,
  1242. transform: params.ProtocolTransformSpec,
  1243. seed: params.ProtocolTransformSeed,
  1244. }
  1245. }
  1246. // UDPSize sets the receive buffer to > 512, even when we don't include
  1247. // EDNS(0), which will mitigate issues with RFC 1035 non-compliant
  1248. // servers. See Go issue 51127.
  1249. dnsConn := &dns.Conn{
  1250. Conn: conn,
  1251. UDPSize: udpPacketBufferSize,
  1252. }
  1253. defer dnsConn.Close()
  1254. // SetQuestion initializes request.MsgHdr.Id to a random value
  1255. request := &dns.Msg{MsgHdr: dns.MsgHdr{RecursionDesired: true}}
  1256. switch questionType {
  1257. case resolverQuestionTypeA:
  1258. request.SetQuestion(dns.Fqdn(hostname), dns.TypeA)
  1259. case resolverQuestionTypeAAAA:
  1260. request.SetQuestion(dns.Fqdn(hostname), dns.TypeAAAA)
  1261. default:
  1262. return nil, nil, -1, errors.TraceNew("unknown DNS request question type")
  1263. }
  1264. if params.IncludeEDNS0 {
  1265. // miekg/dns: "RFC 6891, Section 6.1.1 allows the OPT record to appear
  1266. // anywhere in the additional record section, but it's usually at the
  1267. // end..."
  1268. request.SetEdns0(udpPacketBufferSize, false)
  1269. }
  1270. startTime := time.Now()
  1271. // Send the DNS request
  1272. dnsConn.WriteMsg(request)
  1273. // Read and process the DNS response
  1274. var IPs []net.IP
  1275. var TTLs []time.Duration
  1276. var lastErr error
  1277. RTT := time.Duration(-1)
  1278. for {
  1279. // Stop when resolveCtx is done; the caller, ResolveIP, will also
  1280. // close conn, which will interrupt a blocking dnsConn.ReadMsg.
  1281. if resolveCtx.Err() != nil {
  1282. // ResolveIP, which calls performDNSQuery, already records the
  1283. // context error (e.g., context timeout), so instead report
  1284. // lastErr, when present, as it may contain more useful
  1285. // information about why a response was rejected.
  1286. err := lastErr
  1287. if err == nil {
  1288. err = errors.Trace(resolveCtx.Err())
  1289. }
  1290. return nil, nil, RTT, err
  1291. }
  1292. // Read a response. RTT is the elapsed time between sending the
  1293. // request and reading the last received response.
  1294. response, err := dnsConn.ReadMsg()
  1295. RTT = time.Since(startTime)
  1296. if err == nil && response.MsgHdr.Id != request.MsgHdr.Id {
  1297. err = dns.ErrId
  1298. }
  1299. if err != nil {
  1300. // Try reading again, in case the first response packet failed to
  1301. // unmarshal or had an invalid ID. The Go resolver also does this;
  1302. // see Go issue 13281.
  1303. if resolveCtx.Err() == nil {
  1304. // Only log if resolveCtx is not done; otherwise the error could
  1305. // be due to conn being closed by ResolveIP.
  1306. lastErr = errors.Tracef("invalid response: %v", err)
  1307. logWarning(lastErr)
  1308. }
  1309. continue
  1310. }
  1311. // Check the RCode.
  1312. //
  1313. // For IPv4, we expect RCodeSuccess as Psiphon will typically only
  1314. // resolve domains that exist and have a valid IP (when this isn't
  1315. // the case, and we retry, the overall ResolveIP and its parent dial
  1316. // will still abort after resolveCtx is done, or RequestTimeout
  1317. // expires for maxAttempts).
  1318. //
  1319. // For IPv6, we should also expect RCodeSuccess even if there is no
  1320. // AAAA record, as long as the domain exists and has an A record.
  1321. // However, per RFC 6147 section 5.1.2, we may receive
  1322. // NXDOMAIN: "...some servers respond with RCODE=3 to a AAAA query
  1323. // even if there is an A record available for that owner name. Those
  1324. // servers are in clear violation of the meaning of RCODE 3...". In
  1325. // this case, we coalesce NXDOMAIN into success to treat the response
  1326. // the same as success with no AAAA record.
  1327. //
  1328. // All other RCodes, which are unexpected, lead to a read retry.
  1329. if response.MsgHdr.Rcode != dns.RcodeSuccess &&
  1330. !(questionType == resolverQuestionTypeAAAA && response.MsgHdr.Rcode == dns.RcodeNameError) {
  1331. errMsg, ok := dns.RcodeToString[response.MsgHdr.Rcode]
  1332. if !ok {
  1333. errMsg = fmt.Sprintf("Rcode: %d", response.MsgHdr.Rcode)
  1334. }
  1335. lastErr = errors.Tracef("unexpected RCode: %v", errMsg)
  1336. logWarning(lastErr)
  1337. continue
  1338. }
  1339. // Extract all IP answers, along with corresponding TTLs for caching.
  1340. // Perform additional validation, which may lead to another read
  1341. // retry. However, if _any_ valid IP is found, stop reading and
  1342. // return that result. Again, the validation is only best effort.
  1343. checkFailed := false
  1344. for _, answer := range response.Answer {
  1345. haveAnswer := false
  1346. var IP net.IP
  1347. var TTLSec uint32
  1348. switch questionType {
  1349. case resolverQuestionTypeA:
  1350. if a, ok := answer.(*dns.A); ok {
  1351. IP = a.A
  1352. TTLSec = a.Hdr.Ttl
  1353. haveAnswer = true
  1354. }
  1355. case resolverQuestionTypeAAAA:
  1356. if aaaa, ok := answer.(*dns.AAAA); ok {
  1357. IP = aaaa.AAAA
  1358. TTLSec = aaaa.Hdr.Ttl
  1359. haveAnswer = true
  1360. }
  1361. }
  1362. if !haveAnswer {
  1363. continue
  1364. }
  1365. err := checkDNSAnswerIP(IP)
  1366. if err != nil {
  1367. checkFailed = true
  1368. lastErr = errors.Tracef("invalid IP: %v", err)
  1369. logWarning(lastErr)
  1370. // Check the next answer
  1371. continue
  1372. }
  1373. IPs = append(IPs, IP)
  1374. TTLs = append(TTLs, time.Duration(TTLSec)*time.Second)
  1375. }
  1376. // For IPv4, an IP is expected, as noted in the comment above.
  1377. //
  1378. // In potential cases where we resolve a domain that has only an IPv6
  1379. // address, the concurrent AAAA request will deliver its result to
  1380. // ResolveIP, and that answer will be selected, so only the "await"
  1381. // logic will delay the parent dial in that case.
  1382. if questionType == resolverQuestionTypeA && len(IPs) == 0 && !checkFailed {
  1383. checkFailed = true
  1384. lastErr = errors.TraceNew("unexpected empty A response")
  1385. logWarning(lastErr)
  1386. }
  1387. // Retry if there are no valid IPs and any error; if no error, this
  1388. // may be a valid AAAA response with no IPs, in which case return the
  1389. // result.
  1390. if len(IPs) == 0 && checkFailed {
  1391. continue
  1392. }
  1393. return IPs, TTLs, RTT, nil
  1394. }
  1395. }
  1396. func checkDNSAnswerIP(IP net.IP) error {
  1397. if IP == nil {
  1398. return errors.TraceNew("IP is nil")
  1399. }
  1400. // Limitation: this could still be a phony/injected response, it's not
  1401. // possible to verify with plaintext DNS, but a "bogon" IP is clearly
  1402. // invalid.
  1403. if common.IsBogon(IP) {
  1404. return errors.TraceNew("IP is bogon")
  1405. }
  1406. // Create a temporary socket bound to the destination IP. This checks
  1407. // thats the local host has a route to this IP. If not, we'll reject the
  1408. // IP. This prevents selecting an IP which is guaranteed to fail to dial.
  1409. // Use UDP as this results in no network traffic; the destination port is
  1410. // arbitrary. The Go resolver performs a similar operation.
  1411. //
  1412. // Limitations:
  1413. // - We may cache the IP and reuse it without checking routability again;
  1414. // the cache should be flushed when network state changes.
  1415. // - Given that the AAAA is requested only when the host has an IPv6
  1416. // route, we don't expect this to often fail with a _valid_ response.
  1417. // However, this remains a possibility and in this case,
  1418. // performDNSQuery will keep awaiting a response which can trigger
  1419. // the "await" logic.
  1420. conn, err := net.DialUDP("udp", nil, &net.UDPAddr{IP: IP, Port: 443})
  1421. if err != nil {
  1422. return errors.Trace(err)
  1423. }
  1424. conn.Close()
  1425. return nil
  1426. }
  1427. func defaultResolverLookupIP(
  1428. ctx context.Context, hostname string, logHostnames bool) ([]net.IP, error) {
  1429. addrs, err := net.DefaultResolver.LookupIPAddr(ctx, hostname)
  1430. if err != nil && !logHostnames {
  1431. // Remove domain names from "net" error messages.
  1432. err = common.RedactNetError(err)
  1433. }
  1434. if err != nil {
  1435. return nil, errors.Trace(err)
  1436. }
  1437. ips := make([]net.IP, len(addrs))
  1438. for i, addr := range addrs {
  1439. ips[i] = addr.IP
  1440. }
  1441. return ips, nil
  1442. }
  1443. // transformDNSPacketConn wraps a *net.UDPConn, intercepting Write calls and
  1444. // applying the specified protocol transform.
  1445. //
  1446. // As transforms operate on strings and DNS requests are binary, the transform
  1447. // should be expressed using hex characters. The DNS packet to be written
  1448. // (input the Write) is converted to hex, transformed, and converted back to
  1449. // binary and then actually written to the UDP socket.
  1450. type transformDNSPacketConn struct {
  1451. *net.UDPConn
  1452. transform transforms.Spec
  1453. seed *prng.Seed
  1454. }
  1455. func (conn *transformDNSPacketConn) Write(b []byte) (int, error) {
  1456. // Limitation: there is no check that a transformed packet remains within
  1457. // the network packet MTU.
  1458. input := hex.EncodeToString(b)
  1459. output, err := conn.transform.ApplyString(conn.seed, input)
  1460. if err != nil {
  1461. return 0, errors.Trace(err)
  1462. }
  1463. packet, err := hex.DecodeString(output)
  1464. if err != nil {
  1465. return 0, errors.Trace(err)
  1466. }
  1467. _, err = conn.UDPConn.Write(packet)
  1468. if err != nil {
  1469. // In the error case, don't report bytes written as the number could
  1470. // exceed the pre-transform length.
  1471. return 0, errors.Trace(err)
  1472. }
  1473. // Report the pre-transform length as bytes written, as the caller may check
  1474. // that the requested len(b) bytes were written.
  1475. return len(b), nil
  1476. }