netmon.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. // Package monitor provides facilities for monitoring network
  4. // interface and route changes. It primarily exists to know when
  5. // portable devices move between different networks.
  6. package netmon
  7. import (
  8. "encoding/json"
  9. "errors"
  10. "net/netip"
  11. "runtime"
  12. "sync"
  13. "time"
  14. "tailscale.com/net/interfaces"
  15. "tailscale.com/types/logger"
  16. "tailscale.com/util/set"
  17. )
  18. // pollWallTimeInterval is how often we check the time to check
  19. // for big jumps in wall (non-monotonic) time as a backup mechanism
  20. // to get notified of a sleeping device waking back up.
  21. // Usually there are also minor network change events on wake that let
  22. // us check the wall time sooner than this.
  23. const pollWallTimeInterval = 15 * time.Second
  24. // message represents a message returned from an osMon.
  25. type message interface {
  26. // Ignore is whether we should ignore this message.
  27. ignore() bool
  28. }
  29. // osMon is the interface that each operating system-specific
  30. // implementation of the link monitor must implement.
  31. type osMon interface {
  32. Close() error
  33. // Receive returns a new network interface change message. It
  34. // should block until there's either something to return, or
  35. // until the osMon is closed. After a Close, the returned
  36. // error is ignored.
  37. Receive() (message, error)
  38. // IsInterestingInterface reports whether the provided interface should
  39. // be considered for network change events.
  40. IsInterestingInterface(iface string) bool
  41. }
  42. // Monitor represents a monitoring instance.
  43. type Monitor struct {
  44. logf logger.Logf
  45. om osMon // nil means not supported on this platform
  46. change chan struct{}
  47. stop chan struct{} // closed on Stop
  48. mu sync.Mutex // guards all following fields
  49. cbs set.HandleSet[ChangeFunc]
  50. ruleDelCB set.HandleSet[RuleDeleteCallback]
  51. ifState *interfaces.State
  52. gwValid bool // whether gw and gwSelfIP are valid
  53. gw netip.Addr // our gateway's IP
  54. gwSelfIP netip.Addr // our own IP address (that corresponds to gw)
  55. started bool
  56. closed bool
  57. goroutines sync.WaitGroup
  58. wallTimer *time.Timer // nil until Started; re-armed AfterFunc per tick
  59. lastWall time.Time
  60. timeJumped bool // whether we need to send a changed=true after a big time jump
  61. }
  62. // ChangeFunc is a callback function registered with Monitor that's called when the
  63. // network changed. The changed parameter is whether the network changed
  64. // enough for State to have changed since the last callback.
  65. type ChangeFunc func(changed bool, state *interfaces.State)
  66. // New instantiates and starts a monitoring instance.
  67. // The returned monitor is inactive until it's started by the Start method.
  68. // Use RegisterChangeCallback to get notified of network changes.
  69. func New(logf logger.Logf) (*Monitor, error) {
  70. logf = logger.WithPrefix(logf, "monitor: ")
  71. m := &Monitor{
  72. logf: logf,
  73. change: make(chan struct{}, 1),
  74. stop: make(chan struct{}),
  75. lastWall: wallTime(),
  76. }
  77. st, err := m.interfaceStateUncached()
  78. if err != nil {
  79. return nil, err
  80. }
  81. m.ifState = st
  82. m.om, err = newOSMon(logf, m)
  83. if err != nil {
  84. return nil, err
  85. }
  86. if m.om == nil {
  87. return nil, errors.New("newOSMon returned nil, nil")
  88. }
  89. return m, nil
  90. }
  91. // InterfaceState returns the latest snapshot of the machine's network
  92. // interfaces.
  93. //
  94. // The returned value is owned by Mon; it must not be modified.
  95. func (m *Monitor) InterfaceState() *interfaces.State {
  96. m.mu.Lock()
  97. defer m.mu.Unlock()
  98. return m.ifState
  99. }
  100. func (m *Monitor) interfaceStateUncached() (*interfaces.State, error) {
  101. return interfaces.GetState()
  102. }
  103. // GatewayAndSelfIP returns the current network's default gateway, and
  104. // the machine's default IP for that gateway.
  105. //
  106. // It's the same as interfaces.LikelyHomeRouterIP, but it caches the
  107. // result until the monitor detects a network change.
  108. func (m *Monitor) GatewayAndSelfIP() (gw, myIP netip.Addr, ok bool) {
  109. m.mu.Lock()
  110. defer m.mu.Unlock()
  111. if m.gwValid {
  112. return m.gw, m.gwSelfIP, true
  113. }
  114. gw, myIP, ok = interfaces.LikelyHomeRouterIP()
  115. if ok {
  116. m.gw, m.gwSelfIP, m.gwValid = gw, myIP, true
  117. }
  118. return gw, myIP, ok
  119. }
  120. // RegisterChangeCallback adds callback to the set of parties to be
  121. // notified (in their own goroutine) when the network state changes.
  122. // To remove this callback, call unregister (or close the monitor).
  123. func (m *Monitor) RegisterChangeCallback(callback ChangeFunc) (unregister func()) {
  124. m.mu.Lock()
  125. defer m.mu.Unlock()
  126. handle := m.cbs.Add(callback)
  127. return func() {
  128. m.mu.Lock()
  129. defer m.mu.Unlock()
  130. delete(m.cbs, handle)
  131. }
  132. }
  133. // RuleDeleteCallback is a callback when a Linux IP policy routing
  134. // rule is deleted. The table is the table number (52, 253, 354) and
  135. // priority is the priority order number (for Tailscale rules
  136. // currently: 5210, 5230, 5250, 5270)
  137. type RuleDeleteCallback func(table uint8, priority uint32)
  138. // RegisterRuleDeleteCallback adds callback to the set of parties to be
  139. // notified (in their own goroutine) when a Linux ip rule is deleted.
  140. // To remove this callback, call unregister (or close the monitor).
  141. func (m *Monitor) RegisterRuleDeleteCallback(callback RuleDeleteCallback) (unregister func()) {
  142. m.mu.Lock()
  143. defer m.mu.Unlock()
  144. handle := m.ruleDelCB.Add(callback)
  145. return func() {
  146. m.mu.Lock()
  147. defer m.mu.Unlock()
  148. delete(m.ruleDelCB, handle)
  149. }
  150. }
  151. // Start starts the monitor.
  152. // A monitor can only be started & closed once.
  153. func (m *Monitor) Start() {
  154. m.mu.Lock()
  155. defer m.mu.Unlock()
  156. if m.started || m.closed {
  157. return
  158. }
  159. m.started = true
  160. if shouldMonitorTimeJump {
  161. m.wallTimer = time.AfterFunc(pollWallTimeInterval, m.pollWallTime)
  162. }
  163. if m.om == nil {
  164. return
  165. }
  166. m.goroutines.Add(2)
  167. go m.pump()
  168. go m.debounce()
  169. }
  170. // Close closes the monitor.
  171. func (m *Monitor) Close() error {
  172. m.mu.Lock()
  173. if m.closed {
  174. m.mu.Unlock()
  175. return nil
  176. }
  177. m.closed = true
  178. close(m.stop)
  179. if m.wallTimer != nil {
  180. m.wallTimer.Stop()
  181. }
  182. var err error
  183. if m.om != nil {
  184. err = m.om.Close()
  185. }
  186. started := m.started
  187. m.mu.Unlock()
  188. if started {
  189. m.goroutines.Wait()
  190. }
  191. return err
  192. }
  193. // InjectEvent forces the monitor to pretend there was a network
  194. // change and re-check the state of the network. Any registered
  195. // ChangeFunc callbacks will be called within the event coalescing
  196. // period (under a fraction of a second).
  197. func (m *Monitor) InjectEvent() {
  198. select {
  199. case m.change <- struct{}{}:
  200. default:
  201. // Another change signal is already
  202. // buffered. Debounce will wake up soon
  203. // enough.
  204. }
  205. }
  206. func (m *Monitor) stopped() bool {
  207. select {
  208. case <-m.stop:
  209. return true
  210. default:
  211. return false
  212. }
  213. }
  214. // pump continuously retrieves messages from the connection, notifying
  215. // the change channel of changes, and stopping when a stop is issued.
  216. func (m *Monitor) pump() {
  217. defer m.goroutines.Done()
  218. for !m.stopped() {
  219. msg, err := m.om.Receive()
  220. if err != nil {
  221. if m.stopped() {
  222. return
  223. }
  224. // Keep retrying while we're not closed.
  225. m.logf("error from link monitor: %v", err)
  226. time.Sleep(time.Second)
  227. continue
  228. }
  229. if rdm, ok := msg.(ipRuleDeletedMessage); ok {
  230. m.notifyRuleDeleted(rdm)
  231. continue
  232. }
  233. if msg.ignore() {
  234. continue
  235. }
  236. m.InjectEvent()
  237. }
  238. }
  239. func (m *Monitor) notifyRuleDeleted(rdm ipRuleDeletedMessage) {
  240. m.mu.Lock()
  241. defer m.mu.Unlock()
  242. for _, cb := range m.ruleDelCB {
  243. go cb(rdm.table, rdm.priority)
  244. }
  245. }
  246. // isInterestingInterface reports whether the provided interface should be
  247. // considered when checking for network state changes.
  248. // The ips parameter should be the IPs of the provided interface.
  249. func (m *Monitor) isInterestingInterface(i interfaces.Interface, ips []netip.Prefix) bool {
  250. return m.om.IsInterestingInterface(i.Name) && interfaces.UseInterestingInterfaces(i, ips)
  251. }
  252. // debounce calls the callback function with a delay between events
  253. // and exits when a stop is issued.
  254. func (m *Monitor) debounce() {
  255. defer m.goroutines.Done()
  256. for {
  257. select {
  258. case <-m.stop:
  259. return
  260. case <-m.change:
  261. }
  262. if curState, err := m.interfaceStateUncached(); err != nil {
  263. m.logf("interfaces.State: %v", err)
  264. } else {
  265. m.mu.Lock()
  266. oldState := m.ifState
  267. changed := !curState.EqualFiltered(oldState, m.isInterestingInterface, interfaces.UseInterestingIPs)
  268. if changed {
  269. m.gwValid = false
  270. m.ifState = curState
  271. if s1, s2 := oldState.String(), curState.String(); s1 == s2 {
  272. m.logf("[unexpected] network state changed, but stringification didn't: %v", s1)
  273. m.logf("[unexpected] old: %s", jsonSummary(oldState))
  274. m.logf("[unexpected] new: %s", jsonSummary(curState))
  275. }
  276. }
  277. // See if we have a queued or new time jump signal.
  278. if shouldMonitorTimeJump && m.checkWallTimeAdvanceLocked() {
  279. m.resetTimeJumpedLocked()
  280. if !changed {
  281. // Only log if it wasn't an interesting change.
  282. m.logf("time jumped (probably wake from sleep); synthesizing major change event")
  283. changed = true
  284. }
  285. }
  286. for _, cb := range m.cbs {
  287. go cb(changed, m.ifState)
  288. }
  289. m.mu.Unlock()
  290. }
  291. select {
  292. case <-m.stop:
  293. return
  294. case <-time.After(250 * time.Millisecond):
  295. }
  296. }
  297. }
  298. func jsonSummary(x any) any {
  299. j, err := json.Marshal(x)
  300. if err != nil {
  301. return err
  302. }
  303. return j
  304. }
  305. func wallTime() time.Time {
  306. // From time package's docs: "The canonical way to strip a
  307. // monotonic clock reading is to use t = t.Round(0)."
  308. return time.Now().Round(0)
  309. }
  310. func (m *Monitor) pollWallTime() {
  311. m.mu.Lock()
  312. defer m.mu.Unlock()
  313. if m.closed {
  314. return
  315. }
  316. if m.checkWallTimeAdvanceLocked() {
  317. m.InjectEvent()
  318. }
  319. m.wallTimer.Reset(pollWallTimeInterval)
  320. }
  321. // shouldMonitorTimeJump is whether we keep a regular periodic timer running in
  322. // the background watching for jumps in wall time.
  323. //
  324. // We don't do this on mobile platforms for battery reasons, and because these
  325. // platforms don't really sleep in the same way.
  326. const shouldMonitorTimeJump = runtime.GOOS != "android" && runtime.GOOS != "ios"
  327. // checkWallTimeAdvanceLocked reports whether wall time jumped more than 150% of
  328. // pollWallTimeInterval, indicating we probably just came out of sleep. Once a
  329. // time jump is detected it must be reset by calling resetTimeJumpedLocked.
  330. func (m *Monitor) checkWallTimeAdvanceLocked() bool {
  331. if !shouldMonitorTimeJump {
  332. panic("unreachable") // if callers are correct
  333. }
  334. now := wallTime()
  335. if now.Sub(m.lastWall) > pollWallTimeInterval*3/2 {
  336. m.timeJumped = true // it is reset by debounce.
  337. }
  338. m.lastWall = now
  339. return m.timeJumped
  340. }
  341. // resetTimeJumpedLocked consumes the signal set by checkWallTimeAdvanceLocked.
  342. func (m *Monitor) resetTimeJumpedLocked() {
  343. m.timeJumped = false
  344. }
  345. type ipRuleDeletedMessage struct {
  346. table uint8
  347. priority uint32
  348. }
  349. func (ipRuleDeletedMessage) ignore() bool { return true }