| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593 |
- // Copyright (c) Tailscale Inc & AUTHORS
- // SPDX-License-Identifier: BSD-3-Clause
- // Package monitor provides facilities for monitoring network
- // interface and route changes. It primarily exists to know when
- // portable devices move between different networks.
- package netmon
- import (
- "encoding/json"
- "errors"
- "net/netip"
- "runtime"
- "sync"
- "time"
- "tailscale.com/net/interfaces"
- "tailscale.com/types/logger"
- "tailscale.com/util/clientmetric"
- "tailscale.com/util/set"
- )
- // pollWallTimeInterval is how often we check the time to check
- // for big jumps in wall (non-monotonic) time as a backup mechanism
- // to get notified of a sleeping device waking back up.
- // Usually there are also minor network change events on wake that let
- // us check the wall time sooner than this.
- const pollWallTimeInterval = 15 * time.Second
- // message represents a message returned from an osMon.
- type message interface {
- // Ignore is whether we should ignore this message.
- ignore() bool
- }
- // osMon is the interface that each operating system-specific
- // implementation of the link monitor must implement.
- type osMon interface {
- Close() error
- // Receive returns a new network interface change message. It
- // should block until there's either something to return, or
- // until the osMon is closed. After a Close, the returned
- // error is ignored.
- Receive() (message, error)
- // IsInterestingInterface reports whether the provided interface should
- // be considered for network change events.
- IsInterestingInterface(iface string) bool
- }
- // Monitor represents a monitoring instance.
- type Monitor struct {
- logf logger.Logf
- om osMon // nil means not supported on this platform
- change chan bool // send false to wake poller, true to also force ChangeDeltas be sent
- stop chan struct{} // closed on Stop
- // Things that must be set early, before use,
- // and not change at runtime.
- tsIfName string // tailscale interface name, if known/set ("tailscale0", "utun3", ...)
- mu sync.Mutex // guards all following fields
- cbs set.HandleSet[ChangeFunc]
- ruleDelCB set.HandleSet[RuleDeleteCallback]
- ifState *interfaces.State
- gwValid bool // whether gw and gwSelfIP are valid
- gw netip.Addr // our gateway's IP
- gwSelfIP netip.Addr // our own IP address (that corresponds to gw)
- started bool
- closed bool
- goroutines sync.WaitGroup
- wallTimer *time.Timer // nil until Started; re-armed AfterFunc per tick
- lastWall time.Time
- timeJumped bool // whether we need to send a changed=true after a big time jump
- }
- // ChangeFunc is a callback function registered with Monitor that's called when the
- // network changed.
- type ChangeFunc func(*ChangeDelta)
- // ChangeDelta describes the difference between two network states.
- type ChangeDelta struct {
- // Monitor is the network monitor that sent this delta.
- Monitor *Monitor
- // Old is the old interface state, if known.
- // It's nil if the old state is unknown.
- // Do not mutate it.
- Old *interfaces.State
- // New is the new network state.
- // It is always non-nil.
- // Do not mutate it.
- New *interfaces.State
- // Major is our legacy boolean of whether the network changed in some major
- // way.
- //
- // Deprecated: do not remove. As of 2023-08-23 we're in a renewed effort to
- // remove it and ask specific qustions of ChangeDelta instead. Look at Old
- // and New (or add methods to ChangeDelta) instead of using Major.
- Major bool
- // TimeJumped is whether there was a big jump in wall time since the last
- // time we checked. This is a hint that a mobile sleeping device might have
- // come out of sleep.
- TimeJumped bool
- // TODO(bradfitz): add some lazy cached fields here as needed with methods
- // on *ChangeDelta to let callers ask specific questions
- }
- // New instantiates and starts a monitoring instance.
- // The returned monitor is inactive until it's started by the Start method.
- // Use RegisterChangeCallback to get notified of network changes.
- func New(logf logger.Logf) (*Monitor, error) {
- logf = logger.WithPrefix(logf, "monitor: ")
- m := &Monitor{
- logf: logf,
- change: make(chan bool, 1),
- stop: make(chan struct{}),
- lastWall: wallTime(),
- }
- st, err := m.interfaceStateUncached()
- if err != nil {
- return nil, err
- }
- m.ifState = st
- m.om, err = newOSMon(logf, m)
- if err != nil {
- return nil, err
- }
- if m.om == nil {
- return nil, errors.New("newOSMon returned nil, nil")
- }
- return m, nil
- }
- // InterfaceState returns the latest snapshot of the machine's network
- // interfaces.
- //
- // The returned value is owned by Mon; it must not be modified.
- func (m *Monitor) InterfaceState() *interfaces.State {
- m.mu.Lock()
- defer m.mu.Unlock()
- return m.ifState
- }
- func (m *Monitor) interfaceStateUncached() (*interfaces.State, error) {
- return interfaces.GetState()
- }
- // SetTailscaleInterfaceName sets the name of the Tailscale interface. For
- // example, "tailscale0", "tun0", "utun3", etc.
- //
- // This must be called only early in tailscaled startup before the monitor is
- // used.
- func (m *Monitor) SetTailscaleInterfaceName(ifName string) {
- m.tsIfName = ifName
- }
- // GatewayAndSelfIP returns the current network's default gateway, and
- // the machine's default IP for that gateway.
- //
- // It's the same as interfaces.LikelyHomeRouterIP, but it caches the
- // result until the monitor detects a network change.
- func (m *Monitor) GatewayAndSelfIP() (gw, myIP netip.Addr, ok bool) {
- m.mu.Lock()
- defer m.mu.Unlock()
- if m.gwValid {
- return m.gw, m.gwSelfIP, true
- }
- gw, myIP, ok = interfaces.LikelyHomeRouterIP()
- changed := false
- if ok {
- changed = m.gw != gw || m.gwSelfIP != myIP
- m.gw, m.gwSelfIP = gw, myIP
- m.gwValid = true
- }
- if changed {
- m.logf("gateway and self IP changed: gw=%v self=%v", m.gw, m.gwSelfIP)
- }
- return gw, myIP, ok
- }
- // RegisterChangeCallback adds callback to the set of parties to be
- // notified (in their own goroutine) when the network state changes.
- // To remove this callback, call unregister (or close the monitor).
- func (m *Monitor) RegisterChangeCallback(callback ChangeFunc) (unregister func()) {
- m.mu.Lock()
- defer m.mu.Unlock()
- handle := m.cbs.Add(callback)
- return func() {
- m.mu.Lock()
- defer m.mu.Unlock()
- delete(m.cbs, handle)
- }
- }
- // RuleDeleteCallback is a callback when a Linux IP policy routing
- // rule is deleted. The table is the table number (52, 253, 354) and
- // priority is the priority order number (for Tailscale rules
- // currently: 5210, 5230, 5250, 5270)
- type RuleDeleteCallback func(table uint8, priority uint32)
- // RegisterRuleDeleteCallback adds callback to the set of parties to be
- // notified (in their own goroutine) when a Linux ip rule is deleted.
- // To remove this callback, call unregister (or close the monitor).
- func (m *Monitor) RegisterRuleDeleteCallback(callback RuleDeleteCallback) (unregister func()) {
- m.mu.Lock()
- defer m.mu.Unlock()
- handle := m.ruleDelCB.Add(callback)
- return func() {
- m.mu.Lock()
- defer m.mu.Unlock()
- delete(m.ruleDelCB, handle)
- }
- }
- // Start starts the monitor.
- // A monitor can only be started & closed once.
- func (m *Monitor) Start() {
- m.mu.Lock()
- defer m.mu.Unlock()
- if m.started || m.closed {
- return
- }
- m.started = true
- if shouldMonitorTimeJump {
- m.wallTimer = time.AfterFunc(pollWallTimeInterval, m.pollWallTime)
- }
- if m.om == nil {
- return
- }
- m.goroutines.Add(2)
- go m.pump()
- go m.debounce()
- }
- // Close closes the monitor.
- func (m *Monitor) Close() error {
- m.mu.Lock()
- if m.closed {
- m.mu.Unlock()
- return nil
- }
- m.closed = true
- close(m.stop)
- if m.wallTimer != nil {
- m.wallTimer.Stop()
- }
- var err error
- if m.om != nil {
- err = m.om.Close()
- }
- started := m.started
- m.mu.Unlock()
- if started {
- m.goroutines.Wait()
- }
- return err
- }
- // InjectEvent forces the monitor to pretend there was a network
- // change and re-check the state of the network. Any registered
- // ChangeFunc callbacks will be called within the event coalescing
- // period (under a fraction of a second).
- func (m *Monitor) InjectEvent() {
- select {
- case m.change <- true:
- default:
- // Another change signal is already
- // buffered. Debounce will wake up soon
- // enough.
- }
- }
- // Poll forces the monitor to pretend there was a network
- // change and re-check the state of the network.
- //
- // This is like InjectEvent but only fires ChangeFunc callbacks
- // if the network state differed at all.
- func (m *Monitor) Poll() {
- select {
- case m.change <- false:
- default:
- }
- }
- func (m *Monitor) stopped() bool {
- select {
- case <-m.stop:
- return true
- default:
- return false
- }
- }
- // pump continuously retrieves messages from the connection, notifying
- // the change channel of changes, and stopping when a stop is issued.
- func (m *Monitor) pump() {
- defer m.goroutines.Done()
- for !m.stopped() {
- msg, err := m.om.Receive()
- if err != nil {
- if m.stopped() {
- return
- }
- // Keep retrying while we're not closed.
- m.logf("error from link monitor: %v", err)
- time.Sleep(time.Second)
- continue
- }
- if rdm, ok := msg.(ipRuleDeletedMessage); ok {
- m.notifyRuleDeleted(rdm)
- continue
- }
- if msg.ignore() {
- continue
- }
- m.Poll()
- }
- }
- func (m *Monitor) notifyRuleDeleted(rdm ipRuleDeletedMessage) {
- m.mu.Lock()
- defer m.mu.Unlock()
- for _, cb := range m.ruleDelCB {
- go cb(rdm.table, rdm.priority)
- }
- }
- // isInterestingInterface reports whether the provided interface should be
- // considered when checking for network state changes.
- // The ips parameter should be the IPs of the provided interface.
- func (m *Monitor) isInterestingInterface(i interfaces.Interface, ips []netip.Prefix) bool {
- if !m.om.IsInterestingInterface(i.Name) {
- return false
- }
- return true
- }
- // debounce calls the callback function with a delay between events
- // and exits when a stop is issued.
- func (m *Monitor) debounce() {
- defer m.goroutines.Done()
- for {
- var forceCallbacks bool
- select {
- case <-m.stop:
- return
- case forceCallbacks = <-m.change:
- }
- if newState, err := m.interfaceStateUncached(); err != nil {
- m.logf("interfaces.State: %v", err)
- } else {
- m.handlePotentialChange(newState, forceCallbacks)
- }
- select {
- case <-m.stop:
- return
- case <-time.After(250 * time.Millisecond):
- }
- }
- }
- var (
- metricChangeEq = clientmetric.NewCounter("netmon_link_change_eq")
- metricChange = clientmetric.NewCounter("netmon_link_change")
- metricChangeTimeJump = clientmetric.NewCounter("netmon_link_change_timejump")
- metricChangeMajor = clientmetric.NewCounter("netmon_link_change_major")
- )
- // handlePotentialChange considers whether newState is different enough to wake
- // up callers and updates the monitor's state if so.
- //
- // If forceCallbacks is true, they're always notified.
- func (m *Monitor) handlePotentialChange(newState *interfaces.State, forceCallbacks bool) {
- m.mu.Lock()
- defer m.mu.Unlock()
- oldState := m.ifState
- timeJumped := shouldMonitorTimeJump && m.checkWallTimeAdvanceLocked()
- if !timeJumped && !forceCallbacks && oldState.Equal(newState) {
- // Exactly equal. Nothing to do.
- metricChangeEq.Add(1)
- return
- }
- delta := &ChangeDelta{
- Monitor: m,
- Old: oldState,
- New: newState,
- TimeJumped: timeJumped,
- }
- delta.Major = m.IsMajorChangeFrom(oldState, newState)
- if delta.Major {
- m.gwValid = false
- m.ifState = newState
- if s1, s2 := oldState.String(), delta.New.String(); s1 == s2 {
- m.logf("[unexpected] network state changed, but stringification didn't: %v", s1)
- m.logf("[unexpected] old: %s", jsonSummary(oldState))
- m.logf("[unexpected] new: %s", jsonSummary(newState))
- }
- }
- // See if we have a queued or new time jump signal.
- if timeJumped {
- m.resetTimeJumpedLocked()
- if !delta.Major {
- // Only log if it wasn't an interesting change.
- m.logf("time jumped (probably wake from sleep); synthesizing major change event")
- delta.Major = true
- }
- }
- metricChange.Add(1)
- if delta.Major {
- metricChangeMajor.Add(1)
- }
- if delta.TimeJumped {
- metricChangeTimeJump.Add(1)
- }
- for _, cb := range m.cbs {
- go cb(delta)
- }
- }
- // IsMajorChangeFrom reports whether the transition from s1 to s2 is
- // a "major" change, where major roughly means it's worth tearing down
- // a bunch of connections and rebinding.
- //
- // TODO(bradiftz): tigten this definition.
- func (m *Monitor) IsMajorChangeFrom(s1, s2 *interfaces.State) bool {
- if s1 == nil && s2 == nil {
- return false
- }
- if s1 == nil || s2 == nil {
- return true
- }
- if s1.HaveV6 != s2.HaveV6 ||
- s1.HaveV4 != s2.HaveV4 ||
- s1.IsExpensive != s2.IsExpensive ||
- s1.DefaultRouteInterface != s2.DefaultRouteInterface ||
- s1.HTTPProxy != s2.HTTPProxy ||
- s1.PAC != s2.PAC {
- return true
- }
- for iname, i := range s1.Interface {
- if iname == m.tsIfName {
- // Ignore changes in the Tailscale interface itself.
- continue
- }
- ips := s1.InterfaceIPs[iname]
- if !m.isInterestingInterface(i, ips) {
- continue
- }
- i2, ok := s2.Interface[iname]
- if !ok {
- return true
- }
- ips2, ok := s2.InterfaceIPs[iname]
- if !ok {
- return true
- }
- if !i.Equal(i2) || !prefixesMajorEqual(ips, ips2) {
- return true
- }
- }
- // Iterate over s2 in case there is a field in s2 that doesn't exist in s1
- for iname, i := range s2.Interface {
- if iname == m.tsIfName {
- // Ignore changes in the Tailscale interface itself.
- continue
- }
- ips := s2.InterfaceIPs[iname]
- if !m.isInterestingInterface(i, ips) {
- continue
- }
- i1, ok := s1.Interface[iname]
- if !ok {
- return true
- }
- ips1, ok := s1.InterfaceIPs[iname]
- if !ok {
- return true
- }
- if !i.Equal(i1) || !prefixesMajorEqual(ips, ips1) {
- return true
- }
- }
- return false
- }
- // prefixesMajorEqual reports whether a and b are equal after ignoring
- // boring things like link-local, loopback, and multicast addresses.
- func prefixesMajorEqual(a, b []netip.Prefix) bool {
- // trim returns a subslice of p with link local unicast,
- // loopback, and multicast prefixes removed from the front.
- trim := func(p []netip.Prefix) []netip.Prefix {
- for len(p) > 0 {
- a := p[0].Addr()
- if a.IsLinkLocalUnicast() || a.IsLoopback() || a.IsMulticast() {
- p = p[1:]
- continue
- }
- break
- }
- return p
- }
- for {
- a = trim(a)
- b = trim(b)
- if len(a) == 0 || len(b) == 0 {
- return len(a) == 0 && len(b) == 0
- }
- if a[0] != b[0] {
- return false
- }
- a, b = a[1:], b[1:]
- }
- }
- func jsonSummary(x any) any {
- j, err := json.Marshal(x)
- if err != nil {
- return err
- }
- return j
- }
- func wallTime() time.Time {
- // From time package's docs: "The canonical way to strip a
- // monotonic clock reading is to use t = t.Round(0)."
- return time.Now().Round(0)
- }
- func (m *Monitor) pollWallTime() {
- m.mu.Lock()
- defer m.mu.Unlock()
- if m.closed {
- return
- }
- if m.checkWallTimeAdvanceLocked() {
- m.InjectEvent()
- }
- m.wallTimer.Reset(pollWallTimeInterval)
- }
- // shouldMonitorTimeJump is whether we keep a regular periodic timer running in
- // the background watching for jumps in wall time.
- //
- // We don't do this on mobile platforms for battery reasons, and because these
- // platforms don't really sleep in the same way.
- const shouldMonitorTimeJump = runtime.GOOS != "android" && runtime.GOOS != "ios"
- // checkWallTimeAdvanceLocked reports whether wall time jumped more than 150% of
- // pollWallTimeInterval, indicating we probably just came out of sleep. Once a
- // time jump is detected it must be reset by calling resetTimeJumpedLocked.
- func (m *Monitor) checkWallTimeAdvanceLocked() bool {
- if !shouldMonitorTimeJump {
- panic("unreachable") // if callers are correct
- }
- now := wallTime()
- if now.Sub(m.lastWall) > pollWallTimeInterval*3/2 {
- m.timeJumped = true // it is reset by debounce.
- }
- m.lastWall = now
- return m.timeJumped
- }
- // resetTimeJumpedLocked consumes the signal set by checkWallTimeAdvanceLocked.
- func (m *Monitor) resetTimeJumpedLocked() {
- m.timeJumped = false
- }
- type ipRuleDeletedMessage struct {
- table uint8
- priority uint32
- }
- func (ipRuleDeletedMessage) ignore() bool { return true }
|