resolver.go 57 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759
  1. /*
  2. * Copyright (c) 2022, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. // Package resolver implements a DNS stub resolver, or DNS client, which
  20. // resolves domain names.
  21. //
  22. // The resolver is Psiphon-specific and oriented towards blocking resistance.
  23. // See ResolveIP for more details.
  24. package resolver
  25. import (
  26. "context"
  27. "encoding/hex"
  28. "fmt"
  29. "net"
  30. "sync"
  31. "sync/atomic"
  32. "syscall"
  33. "time"
  34. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  35. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
  36. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/parameters"
  37. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
  38. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/transforms"
  39. lrucache "github.com/cognusion/go-cache-lru"
  40. "github.com/miekg/dns"
  41. )
  42. const (
  43. resolverCacheDefaultTTL = 1 * time.Minute
  44. resolverCacheReapFrequency = 1 * time.Minute
  45. resolverCacheMaxEntries = 10000
  46. resolverServersUpdateTTL = 5 * time.Second
  47. resolverDefaultAttemptsPerServer = 2
  48. resolverDefaultRequestTimeout = 5 * time.Second
  49. resolverDefaultAwaitTimeout = 10 * time.Millisecond
  50. resolverDefaultAnswerTTL = 1 * time.Minute
  51. resolverDNSPort = "53"
  52. udpPacketBufferSize = 1232
  53. )
  54. // NetworkConfig specifies network-level configuration for a Resolver.
  55. type NetworkConfig struct {
  56. // GetDNSServers returns a list of system DNS server addresses (IP:port, or
  57. // IP only with port 53 assumed), as determined via OS APIs, in priority
  58. // order. GetDNSServers may be nil.
  59. GetDNSServers func() []string
  60. // BindToDevice should ensure the input file descriptor, a UDP socket, is
  61. // excluded from VPN routing. BindToDevice may be nil.
  62. BindToDevice func(fd int) (string, error)
  63. // AllowDefaultResolverWithBindToDevice indicates that it's safe to use
  64. // the default resolver when BindToDevice is configured, as the host OS
  65. // will automatically exclude DNS requests from the VPN.
  66. AllowDefaultResolverWithBindToDevice bool
  67. // IPv6Synthesize should apply NAT64 synthesis to the input IPv4 address,
  68. // returning a synthesized IPv6 address that will route to the same
  69. // endpoint. IPv6Synthesize may be nil.
  70. IPv6Synthesize func(IPv4 string) string
  71. // HasIPv6Route should return true when the host has an IPv6 route.
  72. // Resolver has an internal implementation, hasRoutableIPv6Interface, to
  73. // determine this, but it can fail on some platforms ("route ip+net:
  74. // netlinkrib: permission denied" on Android, for example; see Go issue
  75. // 40569). When HasIPv6Route is nil, the internal implementation is used.
  76. HasIPv6Route func() bool
  77. // LogWarning is an optional callback which is used to log warnings and
  78. // transient errors which would otherwise not be recorded or returned.
  79. LogWarning func(error)
  80. // LogHostnames indicates whether to log hostname in errors or not.
  81. LogHostnames bool
  82. // CacheExtensionInitialTTL specifies a minimum TTL to use when caching
  83. // domain resolution results. This minimum will override any TTL in the
  84. // DNS response. CacheExtensionInitialTTL is off when 0.
  85. CacheExtensionInitialTTL time.Duration
  86. // CacheExtensionVerifiedTTL specifies the minimum TTL to set for a cached
  87. // domain resolution result after the result has been verified.
  88. // CacheExtensionVerifiedTTL is off when 0.
  89. //
  90. // DNS cache extension is a workaround to partially mitigate issues with
  91. // obtaining underlying system DNS server IPs on platforms such as iOS
  92. // once a VPN is running and after network changes, such as changing from
  93. // Wi-Fi to mobile. While ResolveParameters.AlternateDNSServer can be
  94. // used to specify a known public DNS server, it may be the case that
  95. // public DNS servers are blocked or always falling back to a public DNS
  96. // server creates unusual traffic. And while it may be possible to use
  97. // the default system resolver, it lacks certain circumvention
  98. // capabilities.
  99. //
  100. // Extending the TTL for cached responses allows Psiphon to redial domains
  101. // using recently successful IPs.
  102. //
  103. // CacheExtensionInitialTTL allows for a greater initial minimum TTL, so
  104. // that the response entry remains in the cache long enough for a dial to
  105. // fully complete and verify the endpoint. Psiphon will call
  106. // Resolver.VerifyExtendCacheTTL once a dial has authenticated, for
  107. // example, the destination Psiphon server. VerifyCacheExtension will
  108. // further extend the corresponding TTL to CacheExtensionVerifiedTTL, a
  109. // longer TTL. CacheExtensionInitialTTL is intended to be on the order of
  110. // minutes and CacheExtensionVerifiedTTL may be on the order of hours.
  111. //
  112. // When CacheExtensionVerifiedTTL is on, the DNS cache is not flushed on
  113. // network changes, to allow for the previously cached entries to remain
  114. // available in the problematic scenario. Like adjusting TTLs, this is an
  115. // explicit trade-off which doesn't adhere to standard best practise, but
  116. // is expected to be more blocking resistent; this approach also assumes
  117. // that endpoints such as CDN IPs are typically available on any network.
  118. CacheExtensionVerifiedTTL time.Duration
  119. }
  120. func (c *NetworkConfig) allowDefaultResolver() bool {
  121. // When BindToDevice is configured, the standard library resolver is not
  122. // used, as the system resolver may not route outside of the VPN.
  123. return c.BindToDevice == nil || c.AllowDefaultResolverWithBindToDevice
  124. }
  125. func (c *NetworkConfig) logWarning(err error) {
  126. if c.LogWarning != nil {
  127. c.LogWarning(err)
  128. }
  129. }
  130. // ResolveParameters specifies the configuration and behavior of a single
  131. // ResolveIP call, a single domain name resolution.
  132. //
  133. // New ResolveParameters may be generated by calling MakeResolveParameters,
  134. // which takes tactics parameters as an input.
  135. //
  136. // ResolveParameters may be persisted for replay.
  137. type ResolveParameters struct {
  138. // AttemptsPerServer specifies how many requests to send to each DNS
  139. // server before trying the next server. IPv4 and IPv6 requests are sent
  140. // concurrently and count as one attempt.
  141. AttemptsPerServer int
  142. // AttemptsPerPreferredServer is AttemptsPerServer for a preferred
  143. // alternate DNS server.
  144. AttemptsPerPreferredServer int
  145. // RequestTimeout specifies how long to wait for a valid response before
  146. // moving on to the next attempt.
  147. RequestTimeout time.Duration
  148. // AwaitTimeout specifies how long to await an additional response after
  149. // the first response is received. This additional wait time applies only
  150. // when there is either no IPv4 or IPv6 response.
  151. AwaitTimeout time.Duration
  152. // PreresolvedIPAddress specifies an IP address result to be used in place
  153. // of making a request.
  154. PreresolvedIPAddress string
  155. // PreresolvedDomain is the domain for which PreresolvedIPAddress is to be
  156. // used.
  157. PreresolvedDomain string
  158. // AlternateDNSServer specifies an alterate DNS server (IP:port, or IP
  159. // only with port 53 assumed) to be used when either no system DNS
  160. // servers are available or when PreferAlternateDNSServer is set.
  161. AlternateDNSServer string
  162. // PreferAlternateDNSServer indicates whether to prioritize using the
  163. // AlternateDNSServer. When set, the AlternateDNSServer is attempted
  164. // before any system DNS servers.
  165. PreferAlternateDNSServer bool
  166. // ProtocolTransformName specifies the name associated with
  167. // ProtocolTransformSpec and is used for metrics.
  168. ProtocolTransformName string
  169. // ProtocolTransformSpec specifies a transform to apply to the DNS request packet.
  170. // See: "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/transforms".
  171. //
  172. // As transforms operate on strings and DNS requests are binary,
  173. // transforms should be expressed using hex characters.
  174. //
  175. // DNS transforms include strategies discovered by the Geneva team,
  176. // https://geneva.cs.umd.edu.
  177. ProtocolTransformSpec transforms.Spec
  178. // ProtocolTransformSeed specifies the seed to use for generating random
  179. // data in the ProtocolTransformSpec transform. To replay a transform,
  180. // specify the same seed.
  181. ProtocolTransformSeed *prng.Seed
  182. // IncludeEDNS0 indicates whether to include the EDNS(0) UDP maximum
  183. // response size extension in DNS requests. The resolver can handle
  184. // responses larger than 512 bytes (RFC 1035 maximum) regardless of
  185. // whether the extension is included; the extension may be included as
  186. // part of appearing similar to other DNS traffic.
  187. IncludeEDNS0 bool
  188. firstAttemptWithAnswer int32
  189. }
  190. // GetFirstAttemptWithAnswer returns the index of the first request attempt
  191. // that received a valid response, for the most recent ResolveIP call using
  192. // this ResolveParameters. This information is used for logging metrics. The
  193. // first attempt has index 1. GetFirstAttemptWithAnswer return 0 when no
  194. // request attempt has reported a valid response.
  195. //
  196. // The caller is responsible for synchronizing use of a ResolveParameters
  197. // instance (e.g, use a distinct ResolveParameters per ResolveIP to ensure
  198. // GetFirstAttemptWithAnswer refers to a specific ResolveIP).
  199. func (r *ResolveParameters) GetFirstAttemptWithAnswer() int {
  200. return int(atomic.LoadInt32(&r.firstAttemptWithAnswer))
  201. }
  202. func (r *ResolveParameters) setFirstAttemptWithAnswer(attempt int) {
  203. atomic.StoreInt32(&r.firstAttemptWithAnswer, int32(attempt))
  204. }
  205. // Implementation note: Go's standard net.Resolver supports specifying a
  206. // custom Dial function. This could be used to implement at least a large
  207. // subset of the Resolver functionality on top of Go's standard library
  208. // resolver. However, net.Resolver is limited to using the CGO resolver on
  209. // Android, https://github.com/golang/go/issues/8877, in which case the
  210. // custom Dial function is not used. Furthermore, the the pure Go resolver in
  211. // net/dnsclient_unix.go appears to not be used on Windows at this time.
  212. //
  213. // Go also provides golang.org/x/net/dns/dnsmessage, a DNS message marshaller,
  214. // which could potentially be used in place of github.com/miekg/dns.
  215. // Resolver is a DNS stub resolver, or DNS client, which resolves domain
  216. // names. A Resolver instance maintains a cache, a network state snapshot,
  217. // and metrics. All ResolveIP calls will share the same cache and state.
  218. // Multiple concurrent ResolveIP calls are supported.
  219. type Resolver struct {
  220. networkConfig *NetworkConfig
  221. mutex sync.Mutex
  222. networkID string
  223. hasIPv6Route bool
  224. systemServers []string
  225. lastServersUpdate time.Time
  226. cache *lrucache.Cache
  227. metrics resolverMetrics
  228. }
  229. type resolverMetrics struct {
  230. resolves int
  231. cacheHits int
  232. verifiedCacheExtensions int
  233. requestsIPv4 int
  234. requestsIPv6 int
  235. responsesIPv4 int
  236. responsesIPv6 int
  237. defaultResolves int
  238. defaultSuccesses int
  239. peakInFlight int
  240. minRTT time.Duration
  241. maxRTT time.Duration
  242. }
  243. func newResolverMetrics() resolverMetrics {
  244. return resolverMetrics{minRTT: -1}
  245. }
  246. // NewResolver creates a new Resolver instance.
  247. func NewResolver(networkConfig *NetworkConfig, networkID string) *Resolver {
  248. r := &Resolver{
  249. networkConfig: networkConfig,
  250. metrics: newResolverMetrics(),
  251. }
  252. // updateNetworkState will initialize the cache and network state,
  253. // including system DNS servers.
  254. r.updateNetworkState(networkID)
  255. return r
  256. }
  257. // Stop clears the Resolver cache and resets metrics. Stop must be called only
  258. // after ceasing all in-flight ResolveIP goroutines, or else the cache or
  259. // metrics may repopulate. A Resolver may be resumed after calling Stop, but
  260. // Update must be called first.
  261. func (r *Resolver) Stop() {
  262. r.mutex.Lock()
  263. defer r.mutex.Unlock()
  264. // r.networkConfig is not set to nil to avoid possible nil pointer
  265. // dereferences by concurrent ResolveIP calls.
  266. r.networkID = ""
  267. r.hasIPv6Route = false
  268. r.systemServers = nil
  269. r.cache.Flush()
  270. r.metrics = newResolverMetrics()
  271. }
  272. // MakeResolveParameters generates ResolveParameters using the input tactics
  273. // parameters and optional frontingProviderID context.
  274. func (r *Resolver) MakeResolveParameters(
  275. p parameters.ParametersAccessor,
  276. frontingProviderID string,
  277. frontingDialDomain string) (*ResolveParameters, error) {
  278. params := &ResolveParameters{
  279. AttemptsPerServer: p.Int(parameters.DNSResolverAttemptsPerServer),
  280. AttemptsPerPreferredServer: p.Int(parameters.DNSResolverAttemptsPerPreferredServer),
  281. RequestTimeout: p.Duration(parameters.DNSResolverRequestTimeout),
  282. AwaitTimeout: p.Duration(parameters.DNSResolverAwaitTimeout),
  283. }
  284. // When a frontingProviderID is specified, generate a pre-resolved IP
  285. // address, based on tactics configuration.
  286. if frontingProviderID != "" {
  287. if frontingDialDomain == "" {
  288. return nil, errors.TraceNew("missing fronting dial domain")
  289. }
  290. if p.WeightedCoinFlip(parameters.DNSResolverPreresolvedIPAddressProbability) {
  291. CIDRs := p.LabeledCIDRs(parameters.DNSResolverPreresolvedIPAddressCIDRs, frontingProviderID)
  292. if len(CIDRs) > 0 {
  293. CIDR := CIDRs[prng.Intn(len(CIDRs))]
  294. IP, err := generateIPAddressFromCIDR(CIDR)
  295. if err != nil {
  296. return nil, errors.Trace(err)
  297. }
  298. params.PreresolvedIPAddress = IP.String()
  299. params.PreresolvedDomain = frontingDialDomain
  300. }
  301. }
  302. }
  303. // When preferring an alternate DNS server, select the alternate from
  304. // DNSResolverPreferredAlternateServers. This list is for circumvention
  305. // operations, such as using a public DNS server with a protocol
  306. // transform. Otherwise, select from DNSResolverAlternateServers, which
  307. // is a fallback list of DNS servers to be used when the system DNS
  308. // servers cannot be obtained.
  309. preferredServers := p.Strings(parameters.DNSResolverPreferredAlternateServers)
  310. preferAlternateDNSServer := len(preferredServers) > 0 && p.WeightedCoinFlip(
  311. parameters.DNSResolverPreferAlternateServerProbability)
  312. alternateServers := preferredServers
  313. if !preferAlternateDNSServer {
  314. alternateServers = p.Strings(parameters.DNSResolverAlternateServers)
  315. }
  316. // Select an alternate DNS server, typically a public DNS server. Ensure
  317. // tactics is configured with an empty DNSResolverAlternateServers list
  318. // in cases where attempts to public DNS server are unwanted.
  319. if len(alternateServers) > 0 {
  320. alternateServer := alternateServers[prng.Intn(len(alternateServers))]
  321. // Check that the alternateServer has a well-formed IP address; and add
  322. // a default port if none it present.
  323. host, _, err := net.SplitHostPort(alternateServer)
  324. if err != nil {
  325. // Assume the SplitHostPort error is due to missing port.
  326. host = alternateServer
  327. alternateServer = net.JoinHostPort(alternateServer, resolverDNSPort)
  328. }
  329. if net.ParseIP(host) == nil {
  330. // Log warning and proceed without this DNS server.
  331. r.networkConfig.logWarning(
  332. errors.TraceNew("invalid alternate DNS server IP address"))
  333. } else {
  334. params.AlternateDNSServer = alternateServer
  335. params.PreferAlternateDNSServer = preferAlternateDNSServer
  336. }
  337. }
  338. // Select a DNS transform. DNS request transforms are "scoped" by
  339. // alternate DNS server (IP address without port); that is, when an
  340. // alternate DNS server is certain to be attempted first, a transform
  341. // associated with and known to work with that DNS server will be
  342. // selected. Otherwise, a transform from the default scope
  343. // (transforms.SCOPE_ANY == "") is selected.
  344. //
  345. // In any case, ResolveIP will only apply a transform on the first request
  346. // attempt.
  347. if p.WeightedCoinFlip(parameters.DNSResolverProtocolTransformProbability) {
  348. specs := p.ProtocolTransformSpecs(
  349. parameters.DNSResolverProtocolTransformSpecs)
  350. scopedSpecNames := p.ProtocolTransformScopedSpecNames(
  351. parameters.DNSResolverProtocolTransformScopedSpecNames)
  352. // The alternate DNS server will be the first attempt if
  353. // PreferAlternateDNSServer or the list of system DNS servers is empty.
  354. //
  355. // Limitation: the system DNS server list may change, due to a later
  356. // Resolver.update call when ResolveIP is called with these
  357. // ResolveParameters.
  358. _, systemServers := r.getNetworkState()
  359. scope := transforms.SCOPE_ANY
  360. if params.AlternateDNSServer != "" &&
  361. (params.PreferAlternateDNSServer || len(systemServers) == 0) {
  362. // Remove the port number, as the scope key is an IP address only.
  363. //
  364. // TODO: when we only just added the default port above, which is
  365. // the common case, we could avoid this extra split.
  366. host, _, err := net.SplitHostPort(params.AlternateDNSServer)
  367. if err != nil {
  368. return nil, errors.Trace(err)
  369. }
  370. scope = host
  371. }
  372. name, spec := specs.Select(scope, scopedSpecNames)
  373. if spec != nil {
  374. params.ProtocolTransformName = name
  375. params.ProtocolTransformSpec = spec
  376. var err error
  377. params.ProtocolTransformSeed, err = prng.NewSeed()
  378. if err != nil {
  379. return nil, errors.Trace(err)
  380. }
  381. }
  382. }
  383. if p.WeightedCoinFlip(parameters.DNSResolverIncludeEDNS0Probability) {
  384. params.IncludeEDNS0 = true
  385. }
  386. return params, nil
  387. }
  388. // ResolveAddress splits the input host:port address, calls ResolveIP to
  389. // resolve the IP address of the host, selects an IP if there are multiple,
  390. // and returns a rejoined IP:port.
  391. //
  392. // IP address selection is random. When network input is set
  393. // to "ip4"/"tcp4"/"udp4" or "ip6"/"tcp6"/"udp6", selection is limited to
  394. // IPv4 or IPv6, respectively.
  395. func (r *Resolver) ResolveAddress(
  396. ctx context.Context,
  397. networkID string,
  398. params *ResolveParameters,
  399. network, address string) (string, error) {
  400. hostname, port, err := net.SplitHostPort(address)
  401. if err != nil {
  402. return "", errors.Trace(err)
  403. }
  404. IPs, err := r.ResolveIP(ctx, networkID, params, hostname)
  405. if err != nil {
  406. return "", errors.Trace(err)
  407. }
  408. // Don't shuffle or otherwise mutate the slice returned by ResolveIP.
  409. permutedIndexes := prng.Perm(len(IPs))
  410. index := 0
  411. switch network {
  412. case "ip4", "tcp4", "udp4":
  413. index = -1
  414. for _, i := range permutedIndexes {
  415. if IPs[i].To4() != nil {
  416. index = i
  417. break
  418. }
  419. }
  420. case "ip6", "tcp6", "udp6":
  421. index = -1
  422. for _, i := range permutedIndexes {
  423. if IPs[i].To4() == nil {
  424. index = i
  425. break
  426. }
  427. }
  428. }
  429. if index == -1 {
  430. return "", errors.Tracef("no IP for network '%s'", network)
  431. }
  432. return net.JoinHostPort(IPs[index].String(), port), nil
  433. }
  434. // ResolveIP resolves a domain name.
  435. //
  436. // The input params may be nil, in which case default timeouts are used.
  437. //
  438. // ResolveIP performs concurrent A and AAAA lookups, returns any valid
  439. // response IPs, and caches results. An error is returned when there are
  440. // no valid response IPs.
  441. //
  442. // ResolveIP is not a general purpose resolver and is Psiphon-specific. For
  443. // example, resolved domains are expected to exist; ResolveIP does not
  444. // fallback to TCP; does not consult any "hosts" file; does not perform RFC
  445. // 3484 sorting logic (see Go issue 18518); only implements a subset of
  446. // Go/glibc/resolv.conf(5) resolver parameters (attempts and timeouts, but
  447. // not rotate, single-request etc.) ResolveIP does not implement singleflight
  448. // logic, as the Go resolver does, and allows multiple concurrent request for
  449. // the same domain -- Psiphon won't often resolve the exact same domain
  450. // multiple times concurrently, and, when it does, there's a circumvention
  451. // benefit to attempting different DNS servers and protocol transforms.
  452. //
  453. // ResolveIP does not currently support DoT, DoH, or TCP; those protocols are
  454. // often blocked or less common. Instead, ResolveIP makes a best effort to
  455. // evade plaintext UDP DNS interference by ignoring invalid responses and by
  456. // optionally applying protocol transforms that may evade blocking.
  457. //
  458. // Due to internal caching, the caller must not mutate returned net.IP slice
  459. // or entries.
  460. func (r *Resolver) ResolveIP(
  461. ctx context.Context,
  462. networkID string,
  463. params *ResolveParameters,
  464. hostname string) ([]net.IP, error) {
  465. // ResolveIP does _not_ lock r.mutex for the lifetime of the function, to
  466. // ensure many ResolveIP calls can run concurrently.
  467. // If the hostname is already an IP address, just return that. For
  468. // metrics, this does not count as a resolve, as the caller may invoke
  469. // ResolveIP for all dials.
  470. IP := net.ParseIP(hostname)
  471. if IP != nil {
  472. return []net.IP{IP}, nil
  473. }
  474. // Count all resolves of an actual domain, including cached and
  475. // pre-resolved cases.
  476. r.updateMetricResolves()
  477. // Call updateNetworkState immediately before resolving, as a best effort
  478. // to ensure that system DNS servers and IPv6 routing network state
  479. // reflects the current network. updateNetworkState locks the Resolver
  480. // mutex for its duration, and so concurrent ResolveIP calls may block at
  481. // this point. However, all updateNetworkState operations are local to
  482. // the host or device; and, if the networkID is unchanged since the last
  483. // call, updateNetworkState may not perform any operations; and after the
  484. // updateNetworkState call, ResolveIP proceeds without holding the mutex
  485. // lock. As a result, this step should not prevent ResolveIP concurrency.
  486. r.updateNetworkState(networkID)
  487. if params == nil {
  488. // Supply default ResolveParameters
  489. params = &ResolveParameters{
  490. AttemptsPerServer: resolverDefaultAttemptsPerServer,
  491. AttemptsPerPreferredServer: resolverDefaultAttemptsPerServer,
  492. RequestTimeout: resolverDefaultRequestTimeout,
  493. AwaitTimeout: resolverDefaultAwaitTimeout,
  494. }
  495. }
  496. // When PreresolvedIPAddress is set, tactics parameters determined the IP address
  497. // in this case.
  498. if params.PreresolvedIPAddress != "" && params.PreresolvedDomain == hostname {
  499. IP := net.ParseIP(params.PreresolvedIPAddress)
  500. if IP == nil {
  501. // Unexpected case, as MakeResolveParameters selects the IP address.
  502. return nil, errors.TraceNew("invalid IP address")
  503. }
  504. return []net.IP{IP}, nil
  505. }
  506. // Use a snapshot of the current network state, including IPv6 routing and
  507. // system DNS servers.
  508. //
  509. // Limitation: these values are used even if the network changes in the
  510. // middle of a ResolveIP call; ResolveIP is not interrupted if the
  511. // network changes.
  512. hasIPv6Route, systemServers := r.getNetworkState()
  513. // Use the standard library resolver when there's no GetDNSServers, or the
  514. // system server list is otherwise empty, and no alternate DNS server is
  515. // configured.
  516. //
  517. // Note that in the case where there are no system DNS servers and there
  518. // is an AlternateDNSServer, if the AlternateDNSServer attempt fails,
  519. // control does not flow back to defaultResolverLookupIP. On platforms
  520. // without GetDNSServers, the caller must arrange for distinct attempts
  521. // that try a AlternateDNSServer, or just use the standard library
  522. // resolver.
  523. //
  524. // ResolveIP should always be called, even when defaultResolverLookupIP is
  525. // expected to be used, to ensure correct metrics counts and ensure a
  526. // consistent error message log stack for all DNS-related failures.
  527. //
  528. if len(systemServers) == 0 &&
  529. params.AlternateDNSServer == "" &&
  530. r.networkConfig.allowDefaultResolver() {
  531. IPs, err := defaultResolverLookupIP(ctx, hostname, r.networkConfig.LogHostnames)
  532. r.updateMetricDefaultResolver(err == nil)
  533. if err != nil {
  534. return nil, errors.Trace(err)
  535. }
  536. return IPs, err
  537. }
  538. // Consult the cache before making queries. This comes after the standard
  539. // library case, to allow the standard library to provide its own caching
  540. // logic.
  541. IPs := r.getCache(hostname)
  542. if IPs != nil {
  543. // TODO: it would be safer to make and return a copy of the cached
  544. // slice, instead of depending on all callers to not mutate the slice.
  545. return IPs, nil
  546. }
  547. // Set the list of DNS servers to attempt. AlternateDNSServer is used
  548. // first when PreferAlternateDNSServer is set; otherwise
  549. // AlternateDNSServer is used only when there is no system DNS server.
  550. var servers []string
  551. if params.AlternateDNSServer != "" &&
  552. (len(systemServers) == 0 || params.PreferAlternateDNSServer) {
  553. servers = []string{params.AlternateDNSServer}
  554. }
  555. servers = append(servers, systemServers...)
  556. if len(servers) == 0 {
  557. return nil, errors.TraceNew("no DNS servers")
  558. }
  559. // Set the request timeout and set up a reusable timer for handling
  560. // request and await timeouts.
  561. //
  562. // We expect to always have a request timeout. Handle the unexpected no
  563. // timeout, 0, case by setting the longest timeout possible, ~290 years;
  564. // always having a non-zero timeout makes the following code marginally
  565. // simpler.
  566. requestTimeout := params.RequestTimeout
  567. if requestTimeout == 0 {
  568. requestTimeout = 1<<63 - 1
  569. }
  570. var timer *time.Timer
  571. timerDrained := true
  572. resetTimer := func(timeout time.Duration) {
  573. if timer == nil {
  574. timer = time.NewTimer(timeout)
  575. } else {
  576. if !timerDrained && !timer.Stop() {
  577. <-timer.C
  578. }
  579. timer.Reset(timeout)
  580. }
  581. timerDrained = false
  582. }
  583. // Orchestrate the DNS requests
  584. resolveCtx, cancelFunc := context.WithCancelCause(ctx)
  585. defer cancelFunc(nil)
  586. waitGroup := new(sync.WaitGroup)
  587. conns := common.NewConns[net.Conn]()
  588. type answer struct {
  589. attempt int
  590. questionType resolverQuestionType
  591. IPs []net.IP
  592. TTLs []time.Duration
  593. }
  594. var maxAttempts int
  595. if params.PreferAlternateDNSServer {
  596. maxAttempts = params.AttemptsPerPreferredServer
  597. maxAttempts += (len(servers) - 1) * params.AttemptsPerServer
  598. } else {
  599. maxAttempts = len(servers) * params.AttemptsPerServer
  600. }
  601. answerChan := make(chan *answer, maxAttempts*2)
  602. inFlight := 0
  603. awaitA := true
  604. awaitAAAA := hasIPv6Route
  605. var result *answer
  606. var lastErr atomic.Value
  607. trackResult := func(a *answer) {
  608. // A result is sent from every attempt goroutine that is launched,
  609. // even in the case of an error, in which case the result is nil.
  610. // Update the number of in-flight attempts as results are received.
  611. // Mark no longer awaiting A or AAAA as long as there is a valid
  612. // response, even if there are no IPs in the IPv6 case.
  613. if inFlight > 0 {
  614. inFlight -= 1
  615. }
  616. if a != nil {
  617. switch a.questionType {
  618. case resolverQuestionTypeA:
  619. awaitA = false
  620. case resolverQuestionTypeAAAA:
  621. awaitAAAA = false
  622. }
  623. }
  624. }
  625. stop := false
  626. for i := 0; !stop && i < maxAttempts; i++ {
  627. var index int
  628. if params.PreferAlternateDNSServer {
  629. if i < params.AttemptsPerPreferredServer {
  630. index = 0
  631. } else {
  632. index = 1 + ((i - params.AttemptsPerPreferredServer) / params.AttemptsPerServer)
  633. }
  634. } else {
  635. index = i / params.AttemptsPerServer
  636. }
  637. server := servers[index]
  638. // Only the first attempt pair tries transforms, as it's not certain
  639. // the transforms will be compatible with DNS servers.
  640. useProtocolTransform := (i == 0 && params.ProtocolTransformSpec != nil)
  641. // Send A and AAAA requests concurrently.
  642. questionTypes := []resolverQuestionType{resolverQuestionTypeA, resolverQuestionTypeAAAA}
  643. if !hasIPv6Route {
  644. questionTypes = questionTypes[0:1]
  645. }
  646. for _, questionType := range questionTypes {
  647. waitGroup.Add(1)
  648. // For metrics, track peak concurrent in-flight requests for
  649. // a _single_ ResolveIP. inFlight for this ResolveIP is also used
  650. // to determine whether to await additional responses once the
  651. // first, valid response is received. For that logic to be
  652. // correct, we must increment inFlight in this outer goroutine to
  653. // ensure the await logic sees either inFlight > 0 or an answer
  654. // in the channel.
  655. inFlight += 1
  656. r.updateMetricPeakInFlight(inFlight)
  657. go func(attempt int, questionType resolverQuestionType, useProtocolTransform bool) {
  658. defer waitGroup.Done()
  659. // Always send a result back to the main loop, even if this
  660. // attempt fails, so the main loop proceeds to the next
  661. // iteration immediately. Nil is sent in failure cases. When
  662. // the answer is not nil, it's already been sent.
  663. var a *answer
  664. defer func() {
  665. if a == nil {
  666. // The channel should have sufficient buffering for
  667. // the send to never block; the default case is used
  668. // to avoid a hang in the case of a bug.
  669. select {
  670. case answerChan <- a:
  671. default:
  672. }
  673. }
  674. }()
  675. // The request count metric counts the _intention_ to send
  676. // requests, as there's a possibility that newResolverConn or
  677. // performDNSQuery fail locally before sending a request packet.
  678. switch questionType {
  679. case resolverQuestionTypeA:
  680. r.updateMetricRequestsIPv4()
  681. case resolverQuestionTypeAAAA:
  682. r.updateMetricRequestsIPv6()
  683. }
  684. // While it's possible, and potentially more optimal, to use
  685. // the same UDP socket for both the A and AAAA request, we
  686. // use a distinct socket per request, as common DNS clients do.
  687. conn, err := r.newResolverConn(r.networkConfig.logWarning, server)
  688. if err != nil {
  689. lastErr.Store(errors.Trace(err))
  690. return
  691. }
  692. defer conn.Close()
  693. // There's no context.Context support in the underlying API
  694. // used by performDNSQuery, so instead collect all the
  695. // request conns so that they can be closed, and any blocking
  696. // network I/O interrupted, below, if resolveCtx is done.
  697. if !conns.Add(conn) {
  698. // Add fails when conns is already closed. Do not
  699. // overwrite lastErr in this case.
  700. return
  701. }
  702. // performDNSQuery will send the request and read a response.
  703. // performDNSQuery will continue reading responses until it
  704. // receives a valid response, which can mitigate a subset of
  705. // DNS injection attacks (to the limited extent possible for
  706. // plaintext DNS).
  707. //
  708. // For IPv4, NXDOMAIN or a response with no IPs is not
  709. // expected for domains resolved by Psiphon, so
  710. // performDNSQuery treats such a response as invalid. For
  711. // IPv6, a response with no IPs, may be valid(even though the
  712. // response could be forged); the resolver will continue its
  713. // attempts loop if it has no other IPs.
  714. //
  715. // Each performDNSQuery has no timeout and runs
  716. // until it has read a valid response or the requestCtx is
  717. // done. This allows for slow arriving, valid responses to
  718. // eventually succeed, even if the read time exceeds
  719. // requestTimeout, as long as the read time is less than the
  720. // requestCtx timeout.
  721. //
  722. // With this approach, the overall ResolveIP call may have
  723. // more than 2 performDNSQuery requests in-flight at a time,
  724. // as requestTimeout is used to schedule sending the next
  725. // attempt but not cancel the current attempt. For
  726. // connectionless UDP, the resulting network traffic should
  727. // be similar to common DNS clients which do cancel request
  728. // before beginning the next attempt.
  729. IPs, TTLs, RTT, err := performDNSQuery(
  730. resolveCtx,
  731. r.networkConfig.logWarning,
  732. params,
  733. useProtocolTransform,
  734. conn,
  735. questionType,
  736. hostname)
  737. // Update the min/max RTT metric when reported (>=0) even if
  738. // the result is an error; i.e., the even if there was an
  739. // invalid response.
  740. //
  741. // Limitation: since individual requests aren't cancelled
  742. // after requestTimeout, RTT metrics won't reflect
  743. // no-response cases, although request and response count
  744. // disparities will still show up in the metrics.
  745. if RTT >= 0 {
  746. r.updateMetricRTT(RTT)
  747. }
  748. if err != nil {
  749. lastErr.Store(errors.Trace(err))
  750. return
  751. }
  752. // Update response stats.
  753. switch questionType {
  754. case resolverQuestionTypeA:
  755. r.updateMetricResponsesIPv4()
  756. case resolverQuestionTypeAAAA:
  757. r.updateMetricResponsesIPv6()
  758. }
  759. // Send the answer back to the main loop.
  760. if len(IPs) > 0 || questionType == resolverQuestionTypeAAAA {
  761. a = &answer{
  762. attempt: attempt,
  763. questionType: questionType,
  764. IPs: IPs,
  765. TTLs: TTLs}
  766. // The channel should have sufficient buffering for
  767. // the send to never block; the default case is used
  768. // to avoid a hang in the case of a bug.
  769. select {
  770. case answerChan <- a:
  771. default:
  772. }
  773. }
  774. }(i+1, questionType, useProtocolTransform)
  775. }
  776. resetTimer(requestTimeout)
  777. select {
  778. case result = <-answerChan:
  779. trackResult(result)
  780. if result != nil {
  781. // When the first answer, a response with valid IPs, arrives, exit
  782. // the attempts loop. The following await branch may collect
  783. // additional answers.
  784. params.setFirstAttemptWithAnswer(result.attempt)
  785. stop = true
  786. }
  787. case <-timer.C:
  788. // When requestTimeout arrives, loop around and launch the next
  789. // attempt; leave the existing requests running in case they
  790. // eventually respond.
  791. timerDrained = true
  792. case <-resolveCtx.Done():
  793. // When resolveCtx is done, exit the attempts loop.
  794. //
  795. // Append the existing lastErr, which may convey useful
  796. // information to be reported in a failed_tunnel error message.
  797. lastErr.Store(errors.Tracef(
  798. "%v (lastErr: %v)", context.Cause(resolveCtx), lastErr.Load()))
  799. stop = true
  800. }
  801. }
  802. // Receive any additional answers, now present in the channel, which
  803. // arrived concurrent with the first answer. This receive avoids a race
  804. // condition where inFlight may now be 0, with additional answers
  805. // enqueued, in which case the following await branch is not taken.
  806. //
  807. // It's possible for the attempts loop to exit with no received answer due
  808. // to timeouts or cancellation while, concurrently, an answer is sent to
  809. // the channel. In this case, when result == nil, we ignore the answers
  810. // and leave this as a failed resolve.
  811. if result != nil {
  812. for loop := true; loop; {
  813. select {
  814. case nextAnswer := <-answerChan:
  815. trackResult(nextAnswer)
  816. if nextAnswer != nil {
  817. result.IPs = append(result.IPs, nextAnswer.IPs...)
  818. result.TTLs = append(result.TTLs, nextAnswer.TTLs...)
  819. }
  820. default:
  821. loop = false
  822. }
  823. }
  824. }
  825. // When we have an answer, await -- for a short time,
  826. // params.AwaitTimeout -- extra answers from any remaining in-flight
  827. // requests. Only await if the request isn't cancelled and we don't
  828. // already have at least one IPv4 and one IPv6 response; only await AAAA
  829. // if it was sent; note that a valid AAAA response may include no IPs
  830. // lastErr is not set in timeout/cancelled cases here, since we already
  831. // have an answer.
  832. if result != nil &&
  833. resolveCtx.Err() == nil &&
  834. inFlight > 0 &&
  835. (awaitA || awaitAAAA) &&
  836. params.AwaitTimeout > 0 {
  837. resetTimer(params.AwaitTimeout)
  838. for {
  839. stop := false
  840. select {
  841. case nextAnswer := <-answerChan:
  842. trackResult(nextAnswer)
  843. if nextAnswer != nil {
  844. result.IPs = append(result.IPs, nextAnswer.IPs...)
  845. result.TTLs = append(result.TTLs, nextAnswer.TTLs...)
  846. }
  847. case <-timer.C:
  848. timerDrained = true
  849. stop = true
  850. case <-resolveCtx.Done():
  851. stop = true
  852. }
  853. if stop || inFlight == 0 || (!awaitA && !awaitAAAA) {
  854. break
  855. }
  856. }
  857. }
  858. if timer != nil {
  859. timer.Stop()
  860. }
  861. // Interrupt all workers.
  862. cancelFunc(errors.TraceNew("resolve canceled"))
  863. conns.CloseAll()
  864. waitGroup.Wait()
  865. // When there's no answer, return the last error.
  866. if result == nil {
  867. err := lastErr.Load()
  868. if err == nil {
  869. err = context.Cause(resolveCtx)
  870. }
  871. if err == nil {
  872. err = errors.TraceNew("unexpected missing error")
  873. }
  874. if r.networkConfig.LogHostnames {
  875. err = fmt.Errorf("resolve %s : %w", hostname, err.(error))
  876. }
  877. return nil, errors.Trace(err.(error))
  878. }
  879. if len(result.IPs) == 0 {
  880. // Unexpected, since a len(IPs) > 0 check precedes sending to answerChan.
  881. return nil, errors.TraceNew("unexpected no IPs")
  882. }
  883. // Update the cache now, after all results are gathered.
  884. r.setCache(hostname, result.IPs, result.TTLs)
  885. return result.IPs, nil
  886. }
  887. // VerifyCacheExtension extends the TTL for any cached result for the
  888. // specified hostname to at least NetworkConfig.CacheExtensionVerifiedTTL.
  889. func (r *Resolver) VerifyCacheExtension(hostname string) {
  890. r.mutex.Lock()
  891. defer r.mutex.Unlock()
  892. if r.networkConfig.CacheExtensionVerifiedTTL == 0 {
  893. return
  894. }
  895. if net.ParseIP(hostname) != nil {
  896. return
  897. }
  898. entry, expires, ok := r.cache.GetWithExpiration(hostname)
  899. if !ok {
  900. return
  901. }
  902. // Change the TTL only if the entry expires and the existing TTL isn't
  903. // longer than the extension.
  904. neverExpires := time.Time{}
  905. if expires == neverExpires ||
  906. expires.After(time.Now().Add(r.networkConfig.CacheExtensionVerifiedTTL)) {
  907. return
  908. }
  909. r.cache.Set(hostname, entry, r.networkConfig.CacheExtensionVerifiedTTL)
  910. r.metrics.verifiedCacheExtensions += 1
  911. }
  912. // GetMetrics returns a summary of DNS metrics.
  913. func (r *Resolver) GetMetrics() string {
  914. r.mutex.Lock()
  915. defer r.mutex.Unlock()
  916. // When r.metrics.minRTT < 0, min/maxRTT is unset.
  917. minRTT := "n/a"
  918. maxRTT := minRTT
  919. if r.metrics.minRTT >= 0 {
  920. minRTT = fmt.Sprintf("%d", r.metrics.minRTT/time.Millisecond)
  921. maxRTT = fmt.Sprintf("%d", r.metrics.maxRTT/time.Millisecond)
  922. }
  923. extend := ""
  924. if r.networkConfig.CacheExtensionVerifiedTTL > 0 {
  925. extend = fmt.Sprintf("| extend %d ", r.metrics.verifiedCacheExtensions)
  926. }
  927. defaultResolves := ""
  928. if r.networkConfig.allowDefaultResolver() {
  929. defaultResolves = fmt.Sprintf(
  930. " | def %d/%d", r.metrics.defaultResolves, r.metrics.defaultSuccesses)
  931. }
  932. // Note that the number of system resolvers is a point-in-time value,
  933. // while the others are cumulative.
  934. return fmt.Sprintf("resolves %d | hit %d %s| req v4/v6 %d/%d | resp %d/%d | peak %d | rtt %s - %s ms. | sys %d%s",
  935. r.metrics.resolves,
  936. r.metrics.cacheHits,
  937. extend,
  938. r.metrics.requestsIPv4,
  939. r.metrics.requestsIPv6,
  940. r.metrics.responsesIPv4,
  941. r.metrics.responsesIPv6,
  942. r.metrics.peakInFlight,
  943. minRTT,
  944. maxRTT,
  945. len(r.systemServers),
  946. defaultResolves)
  947. }
  948. // updateNetworkState updates the system DNS server list, IPv6 state, and the
  949. // cache.
  950. //
  951. // Any errors that occur while querying network state are logged; in error
  952. // conditions the functionality of the resolver may be reduced, but the
  953. // resolver remains operational.
  954. func (r *Resolver) updateNetworkState(networkID string) {
  955. r.mutex.Lock()
  956. defer r.mutex.Unlock()
  957. // Only perform blocking/expensive update operations when necessary.
  958. updateAll := false
  959. updateIPv6Route := false
  960. updateServers := false
  961. flushCache := false
  962. // If r.cache is nil, this is the first update call in NewResolver. Create
  963. // the cache and perform all updates.
  964. if r.cache == nil {
  965. r.cache = lrucache.NewWithLRU(
  966. resolverCacheDefaultTTL,
  967. resolverCacheReapFrequency,
  968. resolverCacheMaxEntries)
  969. updateAll = true
  970. }
  971. // Perform all updates when the networkID has changed, which indicates a
  972. // different network.
  973. if r.networkID != networkID {
  974. updateAll = true
  975. }
  976. if updateAll {
  977. updateIPv6Route = true
  978. updateServers = true
  979. flushCache = true
  980. }
  981. // Even when the networkID has not changed, update DNS servers
  982. // periodically. This is similar to how other DNS clients
  983. // poll /etc/resolv.conf, including the period of 5s.
  984. if time.Since(r.lastServersUpdate) > resolverServersUpdateTTL {
  985. updateServers = true
  986. }
  987. // Update hasIPv6Route, which indicates whether the current network has an
  988. // IPv6 route and so if DNS requests for AAAA records will be sent.
  989. // There's no use for AAAA records on IPv4-only networks; and other
  990. // common DNS clients omit AAAA requests on IPv4-only records, so these
  991. // requests would otherwise be unusual.
  992. //
  993. // There's no hasIPv4Route as we always need to resolve A records,
  994. // particularly for IPv4-only endpoints; for IPv6-only networks,
  995. // NetworkConfig.IPv6Synthesize should be used to accomodate IPv4 DNS
  996. // server addresses, and dials performed outside the Resolver will
  997. // similarly use NAT 64 (on iOS; on Android, 464XLAT will handle this
  998. // transparently).
  999. if updateIPv6Route {
  1000. // TODO: the HasIPv6Route callback provides hasRoutableIPv6Interface
  1001. // functionality on platforms where that internal implementation
  1002. // fails. In particular, "route ip+net: netlinkrib: permission
  1003. // denied" on Android; see Go issue 40569). This Android case can be
  1004. // fixed, and the callback retired, by sharing the workaround now
  1005. // implemented in inproxy.pionNetwork.Interfaces.
  1006. if r.networkConfig.HasIPv6Route != nil {
  1007. r.hasIPv6Route = r.networkConfig.HasIPv6Route()
  1008. } else {
  1009. hasIPv6Route, err := hasRoutableIPv6Interface()
  1010. if err != nil {
  1011. // Log warning and proceed without IPv6.
  1012. r.networkConfig.logWarning(
  1013. errors.Tracef("unable to determine IPv6 route: %v", err))
  1014. hasIPv6Route = false
  1015. }
  1016. r.hasIPv6Route = hasIPv6Route
  1017. }
  1018. }
  1019. // Update the list of system DNS servers. It's not an error condition here
  1020. // if the list is empty: a subsequent ResolveIP may use
  1021. // ResolveParameters which specifies an AlternateDNSServer.
  1022. if updateServers && r.networkConfig.GetDNSServers != nil {
  1023. systemServers := []string{}
  1024. for _, systemServer := range r.networkConfig.GetDNSServers() {
  1025. host, _, err := net.SplitHostPort(systemServer)
  1026. if err != nil {
  1027. // Assume the SplitHostPort error is due to systemServer being
  1028. // an IP only, and append the default port, 53. If
  1029. // systemServer _isn't_ an IP, the following ParseIP will fail.
  1030. host = systemServer
  1031. systemServer = net.JoinHostPort(systemServer, resolverDNSPort)
  1032. }
  1033. if net.ParseIP(host) == nil {
  1034. // Log warning and proceed without this DNS server.
  1035. r.networkConfig.logWarning(
  1036. errors.TraceNew("invalid DNS server IP address"))
  1037. continue
  1038. }
  1039. systemServers = append(systemServers, systemServer)
  1040. }
  1041. // Check if the list of servers has changed, including order. If
  1042. // changed, flush the cache even if the networkID has not changed.
  1043. // Cached results are only considered valid as long as the system DNS
  1044. // configuration remains the same.
  1045. equal := len(r.systemServers) == len(systemServers)
  1046. if equal {
  1047. for i := 0; i < len(r.systemServers); i++ {
  1048. if r.systemServers[i] != systemServers[i] {
  1049. equal = false
  1050. break
  1051. }
  1052. }
  1053. }
  1054. flushCache = flushCache || !equal
  1055. // Concurrency note: once the r.systemServers slice is set, the
  1056. // contents of the backing array must not be modified due to
  1057. // concurrent ResolveIP calls.
  1058. r.systemServers = systemServers
  1059. r.lastServersUpdate = time.Now()
  1060. }
  1061. // Skip cache flushes when the extended DNS caching mechanism is enabled.
  1062. // TODO: retain only verified cache entries?
  1063. if flushCache && r.networkConfig.CacheExtensionVerifiedTTL == 0 {
  1064. r.cache.Flush()
  1065. }
  1066. // Set r.networkID only after all operations complete without errors; if
  1067. // r.networkID were set earlier, a subsequent
  1068. // ResolveIP/updateNetworkState call might proceed as if the network
  1069. // state were updated for the specified network ID.
  1070. r.networkID = networkID
  1071. }
  1072. func (r *Resolver) getNetworkState() (bool, []string) {
  1073. r.mutex.Lock()
  1074. defer r.mutex.Unlock()
  1075. return r.hasIPv6Route, r.systemServers
  1076. }
  1077. func (r *Resolver) setCache(hostname string, IPs []net.IP, TTLs []time.Duration) {
  1078. r.mutex.Lock()
  1079. defer r.mutex.Unlock()
  1080. // The shortest TTL is used. In some cases, a DNS server may omit the TTL
  1081. // or set a 0 TTL, in which case the default is used.
  1082. TTL := resolverDefaultAnswerTTL
  1083. for _, answerTTL := range TTLs {
  1084. if answerTTL > 0 && answerTTL < TTL {
  1085. TTL = answerTTL
  1086. }
  1087. }
  1088. // When NetworkConfig.CacheExtensionInitialTTL configured, ensure the TTL
  1089. // is no shorter than CacheExtensionInitialTTL.
  1090. if r.networkConfig.CacheExtensionInitialTTL != 0 &&
  1091. TTL < r.networkConfig.CacheExtensionInitialTTL {
  1092. TTL = r.networkConfig.CacheExtensionInitialTTL
  1093. }
  1094. // Limitation: with concurrent ResolveIPs for the same domain, the last
  1095. // setCache call determines the cache value. The results are not merged.
  1096. r.cache.Set(hostname, IPs, TTL)
  1097. }
  1098. func (r *Resolver) getCache(hostname string) []net.IP {
  1099. r.mutex.Lock()
  1100. defer r.mutex.Unlock()
  1101. entry, ok := r.cache.Get(hostname)
  1102. if !ok {
  1103. return nil
  1104. }
  1105. r.metrics.cacheHits += 1
  1106. return entry.([]net.IP)
  1107. }
  1108. // newResolverConn creates a UDP socket that will send packets to serverAddr.
  1109. // serverAddr is an IP:port, which allows specifying the port for testing or
  1110. // in rare cases where the port isn't 53.
  1111. func (r *Resolver) newResolverConn(
  1112. logWarning func(error),
  1113. serverAddr string) (retConn net.Conn, retErr error) {
  1114. defer func() {
  1115. if retErr != nil {
  1116. logWarning(retErr)
  1117. }
  1118. }()
  1119. // When configured, attempt to synthesize an IPv6 address from
  1120. // an IPv4 address for compatibility on DNS64/NAT64 networks.
  1121. // If synthesize fails, try the original address.
  1122. if r.networkConfig.IPv6Synthesize != nil {
  1123. serverIPStr, port, err := net.SplitHostPort(serverAddr)
  1124. if err != nil {
  1125. return nil, errors.Trace(err)
  1126. }
  1127. serverIP := net.ParseIP(serverIPStr)
  1128. if serverIP != nil && serverIP.To4() != nil {
  1129. synthesized := r.networkConfig.IPv6Synthesize(serverIPStr)
  1130. if synthesized != "" && net.ParseIP(synthesized) != nil {
  1131. serverAddr = net.JoinHostPort(synthesized, port)
  1132. }
  1133. }
  1134. }
  1135. dialer := &net.Dialer{}
  1136. if r.networkConfig.BindToDevice != nil {
  1137. dialer.Control = func(_, _ string, c syscall.RawConn) error {
  1138. var controlErr error
  1139. err := c.Control(func(fd uintptr) {
  1140. _, err := r.networkConfig.BindToDevice(int(fd))
  1141. if err != nil {
  1142. controlErr = errors.Tracef("BindToDevice failed: %v", err)
  1143. return
  1144. }
  1145. })
  1146. if controlErr != nil {
  1147. return errors.Trace(controlErr)
  1148. }
  1149. return errors.Trace(err)
  1150. }
  1151. }
  1152. // context.Background is ok in this case as the UDP dial is just a local
  1153. // syscall to create the socket.
  1154. conn, err := dialer.DialContext(context.Background(), "udp", serverAddr)
  1155. if err != nil {
  1156. return nil, errors.Trace(err)
  1157. }
  1158. return conn, nil
  1159. }
  1160. func (r *Resolver) updateMetricResolves() {
  1161. r.mutex.Lock()
  1162. defer r.mutex.Unlock()
  1163. r.metrics.resolves += 1
  1164. }
  1165. func (r *Resolver) updateMetricRequestsIPv4() {
  1166. r.mutex.Lock()
  1167. defer r.mutex.Unlock()
  1168. r.metrics.requestsIPv4 += 1
  1169. }
  1170. func (r *Resolver) updateMetricRequestsIPv6() {
  1171. r.mutex.Lock()
  1172. defer r.mutex.Unlock()
  1173. r.metrics.requestsIPv6 += 1
  1174. }
  1175. func (r *Resolver) updateMetricResponsesIPv4() {
  1176. r.mutex.Lock()
  1177. defer r.mutex.Unlock()
  1178. r.metrics.responsesIPv4 += 1
  1179. }
  1180. func (r *Resolver) updateMetricResponsesIPv6() {
  1181. r.mutex.Lock()
  1182. defer r.mutex.Unlock()
  1183. r.metrics.responsesIPv6 += 1
  1184. }
  1185. func (r *Resolver) updateMetricDefaultResolver(success bool) {
  1186. r.mutex.Lock()
  1187. defer r.mutex.Unlock()
  1188. r.metrics.defaultResolves += 1
  1189. if success {
  1190. r.metrics.defaultSuccesses += 1
  1191. }
  1192. }
  1193. func (r *Resolver) updateMetricPeakInFlight(inFlight int) {
  1194. r.mutex.Lock()
  1195. defer r.mutex.Unlock()
  1196. if inFlight > r.metrics.peakInFlight {
  1197. r.metrics.peakInFlight = inFlight
  1198. }
  1199. }
  1200. func (r *Resolver) updateMetricRTT(rtt time.Duration) {
  1201. r.mutex.Lock()
  1202. defer r.mutex.Unlock()
  1203. if rtt < 0 {
  1204. // Ignore invalid input.
  1205. return
  1206. }
  1207. // When r.metrics.minRTT < 0, min/maxRTT is unset.
  1208. if r.metrics.minRTT < 0 || rtt < r.metrics.minRTT {
  1209. r.metrics.minRTT = rtt
  1210. }
  1211. if rtt > r.metrics.maxRTT {
  1212. r.metrics.maxRTT = rtt
  1213. }
  1214. }
  1215. func hasRoutableIPv6Interface() (bool, error) {
  1216. interfaces, err := net.Interfaces()
  1217. if err != nil {
  1218. return false, errors.Trace(err)
  1219. }
  1220. for _, in := range interfaces {
  1221. if (in.Flags&net.FlagUp == 0) ||
  1222. // Note: don't exclude interfaces with the net.FlagPointToPoint
  1223. // flag, which is set for certain mobile networks
  1224. (in.Flags&net.FlagLoopback != 0) {
  1225. continue
  1226. }
  1227. addrs, err := in.Addrs()
  1228. if err != nil {
  1229. return false, errors.Trace(err)
  1230. }
  1231. for _, addr := range addrs {
  1232. if IPNet, ok := addr.(*net.IPNet); ok &&
  1233. IPNet.IP.To4() == nil &&
  1234. !IPNet.IP.IsLinkLocalUnicast() {
  1235. return true, nil
  1236. }
  1237. }
  1238. }
  1239. return false, nil
  1240. }
  1241. func generateIPAddressFromCIDR(CIDR string) (net.IP, error) {
  1242. _, IPNet, err := net.ParseCIDR(CIDR)
  1243. if err != nil {
  1244. return nil, errors.Trace(err)
  1245. }
  1246. // A retry is required, since a CIDR may include broadcast IPs (a.b.c.0) or
  1247. // other invalid values. The number of retries is limited to ensure we
  1248. // don't hang in the case of a misconfiguration.
  1249. for i := 0; i < 10; i++ {
  1250. randBytes := prng.Bytes(len(IPNet.IP))
  1251. IP := make(net.IP, len(IPNet.IP))
  1252. // The 1 bits in the mask must apply to the IP in the CIDR and the 0
  1253. // bits in the mask are available to randomize.
  1254. for i := 0; i < len(IP); i++ {
  1255. IP[i] = (IPNet.IP[i] & IPNet.Mask[i]) | (randBytes[i] & ^IPNet.Mask[i])
  1256. }
  1257. if IP.IsGlobalUnicast() && !common.IsBogon(IP) {
  1258. return IP, nil
  1259. }
  1260. }
  1261. return nil, errors.TraceNew("failed to generate random IP")
  1262. }
  1263. type resolverQuestionType int
  1264. const (
  1265. resolverQuestionTypeA = 0
  1266. resolverQuestionTypeAAAA = 1
  1267. )
  1268. func performDNSQuery(
  1269. resolveCtx context.Context,
  1270. logWarning func(error),
  1271. params *ResolveParameters,
  1272. useProtocolTransform bool,
  1273. conn net.Conn,
  1274. questionType resolverQuestionType,
  1275. hostname string) ([]net.IP, []time.Duration, time.Duration, error) {
  1276. if useProtocolTransform {
  1277. if params.ProtocolTransformSpec == nil ||
  1278. params.ProtocolTransformSeed == nil {
  1279. return nil, nil, -1, errors.TraceNew("invalid protocol transform configuration")
  1280. }
  1281. // miekg/dns expects conn to be a net.PacketConn or else it writes the
  1282. // TCP length prefix
  1283. udpConn, ok := conn.(*net.UDPConn)
  1284. if !ok {
  1285. return nil, nil, -1, errors.TraceNew("conn is not a *net.UDPConn")
  1286. }
  1287. conn = &transformDNSPacketConn{
  1288. UDPConn: udpConn,
  1289. transform: params.ProtocolTransformSpec,
  1290. seed: params.ProtocolTransformSeed,
  1291. }
  1292. }
  1293. // UDPSize sets the receive buffer to > 512, even when we don't include
  1294. // EDNS(0), which will mitigate issues with RFC 1035 non-compliant
  1295. // servers. See Go issue 51127.
  1296. dnsConn := &dns.Conn{
  1297. Conn: conn,
  1298. UDPSize: udpPacketBufferSize,
  1299. }
  1300. defer dnsConn.Close()
  1301. // SetQuestion initializes request.MsgHdr.Id to a random value
  1302. request := &dns.Msg{MsgHdr: dns.MsgHdr{RecursionDesired: true}}
  1303. switch questionType {
  1304. case resolverQuestionTypeA:
  1305. request.SetQuestion(dns.Fqdn(hostname), dns.TypeA)
  1306. case resolverQuestionTypeAAAA:
  1307. request.SetQuestion(dns.Fqdn(hostname), dns.TypeAAAA)
  1308. default:
  1309. return nil, nil, -1, errors.TraceNew("unknown DNS request question type")
  1310. }
  1311. if params.IncludeEDNS0 {
  1312. // miekg/dns: "RFC 6891, Section 6.1.1 allows the OPT record to appear
  1313. // anywhere in the additional record section, but it's usually at the
  1314. // end..."
  1315. request.SetEdns0(udpPacketBufferSize, false)
  1316. }
  1317. startTime := time.Now()
  1318. // Send the DNS request
  1319. dnsConn.WriteMsg(request)
  1320. // Read and process the DNS response
  1321. var IPs []net.IP
  1322. var TTLs []time.Duration
  1323. var lastErr error
  1324. RTT := time.Duration(-1)
  1325. for {
  1326. // Stop when resolveCtx is done; the caller, ResolveIP, will also
  1327. // close conn, which will interrupt a blocking dnsConn.ReadMsg.
  1328. if resolveCtx.Err() != nil {
  1329. // ResolveIP, which calls performDNSQuery, already records the
  1330. // context error (e.g., context timeout), so instead report
  1331. // lastErr, when present, as it may contain more useful
  1332. // information about why a response was rejected.
  1333. err := lastErr
  1334. if err == nil {
  1335. err = errors.Trace(context.Cause(resolveCtx))
  1336. }
  1337. return nil, nil, RTT, err
  1338. }
  1339. // Read a response. RTT is the elapsed time between sending the
  1340. // request and reading the last received response.
  1341. response, err := dnsConn.ReadMsg()
  1342. RTT = time.Since(startTime)
  1343. if err == nil && response.MsgHdr.Id != request.MsgHdr.Id {
  1344. err = dns.ErrId
  1345. }
  1346. if err != nil {
  1347. // Try reading again, in case the first response packet failed to
  1348. // unmarshal or had an invalid ID. The Go resolver also does this;
  1349. // see Go issue 13281.
  1350. if resolveCtx.Err() == nil {
  1351. // Only log if resolveCtx is not done; otherwise the error could
  1352. // be due to conn being closed by ResolveIP.
  1353. lastErr = errors.Tracef("invalid response: %v", err)
  1354. logWarning(lastErr)
  1355. }
  1356. continue
  1357. }
  1358. // Check the RCode.
  1359. //
  1360. // For IPv4, we expect RCodeSuccess as Psiphon will typically only
  1361. // resolve domains that exist and have a valid IP (when this isn't
  1362. // the case, and we retry, the overall ResolveIP and its parent dial
  1363. // will still abort after resolveCtx is done, or RequestTimeout
  1364. // expires for maxAttempts).
  1365. //
  1366. // For IPv6, we should also expect RCodeSuccess even if there is no
  1367. // AAAA record, as long as the domain exists and has an A record.
  1368. // However, per RFC 6147 section 5.1.2, we may receive
  1369. // NXDOMAIN: "...some servers respond with RCODE=3 to a AAAA query
  1370. // even if there is an A record available for that owner name. Those
  1371. // servers are in clear violation of the meaning of RCODE 3...". In
  1372. // this case, we coalesce NXDOMAIN into success to treat the response
  1373. // the same as success with no AAAA record.
  1374. //
  1375. // All other RCodes, which are unexpected, lead to a read retry.
  1376. if response.MsgHdr.Rcode != dns.RcodeSuccess &&
  1377. !(questionType == resolverQuestionTypeAAAA && response.MsgHdr.Rcode == dns.RcodeNameError) {
  1378. errMsg, ok := dns.RcodeToString[response.MsgHdr.Rcode]
  1379. if !ok {
  1380. errMsg = fmt.Sprintf("Rcode: %d", response.MsgHdr.Rcode)
  1381. }
  1382. lastErr = errors.Tracef("unexpected RCode: %v", errMsg)
  1383. logWarning(lastErr)
  1384. continue
  1385. }
  1386. // Extract all IP answers, along with corresponding TTLs for caching.
  1387. // Perform additional validation, which may lead to another read
  1388. // retry. However, if _any_ valid IP is found, stop reading and
  1389. // return that result. Again, the validation is only best effort.
  1390. checkFailed := false
  1391. for _, answer := range response.Answer {
  1392. haveAnswer := false
  1393. var IP net.IP
  1394. var TTLSec uint32
  1395. switch questionType {
  1396. case resolverQuestionTypeA:
  1397. if a, ok := answer.(*dns.A); ok {
  1398. IP = a.A
  1399. TTLSec = a.Hdr.Ttl
  1400. haveAnswer = true
  1401. }
  1402. case resolverQuestionTypeAAAA:
  1403. if aaaa, ok := answer.(*dns.AAAA); ok {
  1404. IP = aaaa.AAAA
  1405. TTLSec = aaaa.Hdr.Ttl
  1406. haveAnswer = true
  1407. }
  1408. }
  1409. if !haveAnswer {
  1410. continue
  1411. }
  1412. err := checkDNSAnswerIP(IP)
  1413. if err != nil {
  1414. checkFailed = true
  1415. lastErr = errors.Tracef("invalid IP: %v", err)
  1416. logWarning(lastErr)
  1417. // Check the next answer
  1418. continue
  1419. }
  1420. IPs = append(IPs, IP)
  1421. TTLs = append(TTLs, time.Duration(TTLSec)*time.Second)
  1422. }
  1423. // For IPv4, an IP is expected, as noted in the comment above.
  1424. //
  1425. // In potential cases where we resolve a domain that has only an IPv6
  1426. // address, the concurrent AAAA request will deliver its result to
  1427. // ResolveIP, and that answer will be selected, so only the "await"
  1428. // logic will delay the parent dial in that case.
  1429. if questionType == resolverQuestionTypeA && len(IPs) == 0 && !checkFailed {
  1430. checkFailed = true
  1431. lastErr = errors.TraceNew("unexpected empty A response")
  1432. logWarning(lastErr)
  1433. }
  1434. // Retry if there are no valid IPs and any error; if no error, this
  1435. // may be a valid AAAA response with no IPs, in which case return the
  1436. // result.
  1437. if len(IPs) == 0 && checkFailed {
  1438. continue
  1439. }
  1440. return IPs, TTLs, RTT, nil
  1441. }
  1442. }
  1443. func checkDNSAnswerIP(IP net.IP) error {
  1444. if IP == nil {
  1445. return errors.TraceNew("IP is nil")
  1446. }
  1447. // Limitation: this could still be a phony/injected response, it's not
  1448. // possible to verify with plaintext DNS, but a "bogon" IP is clearly
  1449. // invalid.
  1450. if common.IsBogon(IP) {
  1451. return errors.TraceNew("IP is bogon")
  1452. }
  1453. // Create a temporary socket bound to the destination IP. This checks
  1454. // thats the local host has a route to this IP. If not, we'll reject the
  1455. // IP. This prevents selecting an IP which is guaranteed to fail to dial.
  1456. // Use UDP as this results in no network traffic; the destination port is
  1457. // arbitrary. The Go resolver performs a similar operation.
  1458. //
  1459. // Limitations:
  1460. // - We may cache the IP and reuse it without checking routability again;
  1461. // the cache should be flushed when network state changes.
  1462. // - Given that the AAAA is requested only when the host has an IPv6
  1463. // route, we don't expect this to often fail with a _valid_ response.
  1464. // However, this remains a possibility and in this case,
  1465. // performDNSQuery will keep awaiting a response which can trigger
  1466. // the "await" logic.
  1467. conn, err := net.DialUDP("udp", nil, &net.UDPAddr{IP: IP, Port: 443})
  1468. if err != nil {
  1469. return errors.Trace(err)
  1470. }
  1471. conn.Close()
  1472. return nil
  1473. }
  1474. func defaultResolverLookupIP(
  1475. ctx context.Context, hostname string, logHostnames bool) ([]net.IP, error) {
  1476. addrs, err := net.DefaultResolver.LookupIPAddr(ctx, hostname)
  1477. if err != nil && !logHostnames {
  1478. // Remove domain names from "net" error messages.
  1479. err = common.RedactNetError(err)
  1480. }
  1481. if err != nil {
  1482. return nil, errors.Trace(err)
  1483. }
  1484. ips := make([]net.IP, len(addrs))
  1485. for i, addr := range addrs {
  1486. ips[i] = addr.IP
  1487. }
  1488. return ips, nil
  1489. }
  1490. // transformDNSPacketConn wraps a *net.UDPConn, intercepting Write calls and
  1491. // applying the specified protocol transform.
  1492. //
  1493. // As transforms operate on strings and DNS requests are binary, the transform
  1494. // should be expressed using hex characters. The DNS packet to be written
  1495. // (input the Write) is converted to hex, transformed, and converted back to
  1496. // binary and then actually written to the UDP socket.
  1497. type transformDNSPacketConn struct {
  1498. *net.UDPConn
  1499. transform transforms.Spec
  1500. seed *prng.Seed
  1501. }
  1502. func (conn *transformDNSPacketConn) Write(b []byte) (int, error) {
  1503. // Limitation: there is no check that a transformed packet remains within
  1504. // the network packet MTU.
  1505. input := hex.EncodeToString(b)
  1506. output, err := conn.transform.ApplyString(conn.seed, input)
  1507. if err != nil {
  1508. return 0, errors.Trace(err)
  1509. }
  1510. packet, err := hex.DecodeString(output)
  1511. if err != nil {
  1512. return 0, errors.Trace(err)
  1513. }
  1514. _, err = conn.UDPConn.Write(packet)
  1515. if err != nil {
  1516. // In the error case, don't report bytes written as the number could
  1517. // exceed the pre-transform length.
  1518. return 0, errors.Trace(err)
  1519. }
  1520. // Report the pre-transform length as bytes written, as the caller may check
  1521. // that the requested len(b) bytes were written.
  1522. return len(b), nil
  1523. }