conn.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564
  1. package netlink
  2. import (
  3. "math/rand"
  4. "sync"
  5. "sync/atomic"
  6. "syscall"
  7. "time"
  8. "golang.org/x/net/bpf"
  9. )
  10. // A Conn is a connection to netlink. A Conn can be used to send and
  11. // receives messages to and from netlink.
  12. //
  13. // A Conn is safe for concurrent use, but to avoid contention in
  14. // high-throughput applications, the caller should almost certainly create a
  15. // pool of Conns and distribute them among workers.
  16. //
  17. // A Conn is capable of manipulating netlink subsystems from within a specific
  18. // Linux network namespace, but special care must be taken when doing so. See
  19. // the documentation of Config for details.
  20. type Conn struct {
  21. // Atomics must come first.
  22. //
  23. // seq is an atomically incremented integer used to provide sequence
  24. // numbers when Conn.Send is called.
  25. seq uint32
  26. // mu serializes access to the netlink socket for the request/response
  27. // transaction within Execute.
  28. mu sync.RWMutex
  29. // sock is the operating system-specific implementation of
  30. // a netlink sockets connection.
  31. sock Socket
  32. // pid is the PID assigned by netlink.
  33. pid uint32
  34. // d provides debugging capabilities for a Conn if not nil.
  35. d *debugger
  36. }
  37. // A Socket is an operating-system specific implementation of netlink
  38. // sockets used by Conn.
  39. type Socket interface {
  40. Close() error
  41. Send(m Message) error
  42. SendMessages(m []Message) error
  43. Receive() ([]Message, error)
  44. }
  45. // Dial dials a connection to netlink, using the specified netlink family.
  46. // Config specifies optional configuration for Conn. If config is nil, a default
  47. // configuration will be used.
  48. func Dial(family int, config *Config) (*Conn, error) {
  49. // Use OS-specific dial() to create Socket
  50. c, pid, err := dial(family, config)
  51. if err != nil {
  52. return nil, err
  53. }
  54. return NewConn(c, pid), nil
  55. }
  56. // NewConn creates a Conn using the specified Socket and PID for netlink
  57. // communications.
  58. //
  59. // NewConn is primarily useful for tests. Most applications should use
  60. // Dial instead.
  61. func NewConn(sock Socket, pid uint32) *Conn {
  62. // Seed the sequence number using a random number generator.
  63. r := rand.New(rand.NewSource(time.Now().UnixNano()))
  64. seq := r.Uint32()
  65. // Configure a debugger if arguments are set.
  66. var d *debugger
  67. if len(debugArgs) > 0 {
  68. d = newDebugger(debugArgs)
  69. }
  70. return &Conn{
  71. seq: seq,
  72. sock: sock,
  73. pid: pid,
  74. d: d,
  75. }
  76. }
  77. // debug executes fn with the debugger if the debugger is not nil.
  78. func (c *Conn) debug(fn func(d *debugger)) {
  79. if c.d == nil {
  80. return
  81. }
  82. fn(c.d)
  83. }
  84. // Close closes the connection and unblocks any pending read operations.
  85. func (c *Conn) Close() error {
  86. // Close does not acquire a lock because it must be able to interrupt any
  87. // blocked system calls, such as when Receive is waiting on a multicast
  88. // group message.
  89. //
  90. // We rely on the kernel to deal with concurrent operations to the netlink
  91. // socket itself.
  92. return newOpError("close", c.sock.Close())
  93. }
  94. // Execute sends a single Message to netlink using Send, receives one or more
  95. // replies using Receive, and then checks the validity of the replies against
  96. // the request using Validate.
  97. //
  98. // Execute acquires a lock for the duration of the function call which blocks
  99. // concurrent calls to Send, SendMessages, and Receive, in order to ensure
  100. // consistency between netlink request/reply messages.
  101. //
  102. // See the documentation of Send, Receive, and Validate for details about
  103. // each function.
  104. func (c *Conn) Execute(m Message) ([]Message, error) {
  105. // Acquire the write lock and invoke the internal implementations of Send
  106. // and Receive which require the lock already be held.
  107. c.mu.Lock()
  108. defer c.mu.Unlock()
  109. req, err := c.lockedSend(m)
  110. if err != nil {
  111. return nil, err
  112. }
  113. res, err := c.lockedReceive()
  114. if err != nil {
  115. return nil, err
  116. }
  117. if err := Validate(req, res); err != nil {
  118. return nil, err
  119. }
  120. return res, nil
  121. }
  122. // SendMessages sends multiple Messages to netlink. The handling of
  123. // a Header's Length, Sequence and PID fields is the same as when
  124. // calling Send.
  125. func (c *Conn) SendMessages(msgs []Message) ([]Message, error) {
  126. // Wait for any concurrent calls to Execute to finish before proceeding.
  127. c.mu.RLock()
  128. defer c.mu.RUnlock()
  129. for i := range msgs {
  130. c.fixMsg(&msgs[i], nlmsgLength(len(msgs[i].Data)))
  131. }
  132. c.debug(func(d *debugger) {
  133. for _, m := range msgs {
  134. d.debugf(1, "send msgs: %+v", m)
  135. }
  136. })
  137. if err := c.sock.SendMessages(msgs); err != nil {
  138. c.debug(func(d *debugger) {
  139. d.debugf(1, "send msgs: err: %v", err)
  140. })
  141. return nil, newOpError("send-messages", err)
  142. }
  143. return msgs, nil
  144. }
  145. // Send sends a single Message to netlink. In most cases, a Header's Length,
  146. // Sequence, and PID fields should be set to 0, so they can be populated
  147. // automatically before the Message is sent. On success, Send returns a copy
  148. // of the Message with all parameters populated, for later validation.
  149. //
  150. // If Header.Length is 0, it will be automatically populated using the
  151. // correct length for the Message, including its payload.
  152. //
  153. // If Header.Sequence is 0, it will be automatically populated using the
  154. // next sequence number for this connection.
  155. //
  156. // If Header.PID is 0, it will be automatically populated using a PID
  157. // assigned by netlink.
  158. func (c *Conn) Send(m Message) (Message, error) {
  159. // Wait for any concurrent calls to Execute to finish before proceeding.
  160. c.mu.RLock()
  161. defer c.mu.RUnlock()
  162. return c.lockedSend(m)
  163. }
  164. // lockedSend implements Send, but must be called with c.mu acquired for reading.
  165. // We rely on the kernel to deal with concurrent reads and writes to the netlink
  166. // socket itself.
  167. func (c *Conn) lockedSend(m Message) (Message, error) {
  168. c.fixMsg(&m, nlmsgLength(len(m.Data)))
  169. c.debug(func(d *debugger) {
  170. d.debugf(1, "send: %+v", m)
  171. })
  172. if err := c.sock.Send(m); err != nil {
  173. c.debug(func(d *debugger) {
  174. d.debugf(1, "send: err: %v", err)
  175. })
  176. return Message{}, newOpError("send", err)
  177. }
  178. return m, nil
  179. }
  180. // Receive receives one or more messages from netlink. Multi-part messages are
  181. // handled transparently and returned as a single slice of Messages, with the
  182. // final empty "multi-part done" message removed.
  183. //
  184. // If any of the messages indicate a netlink error, that error will be returned.
  185. func (c *Conn) Receive() ([]Message, error) {
  186. // Wait for any concurrent calls to Execute to finish before proceeding.
  187. c.mu.RLock()
  188. defer c.mu.RUnlock()
  189. return c.lockedReceive()
  190. }
  191. // lockedReceive implements Receive, but must be called with c.mu acquired for reading.
  192. // We rely on the kernel to deal with concurrent reads and writes to the netlink
  193. // socket itself.
  194. func (c *Conn) lockedReceive() ([]Message, error) {
  195. msgs, err := c.receive()
  196. if err != nil {
  197. c.debug(func(d *debugger) {
  198. d.debugf(1, "recv: err: %v", err)
  199. })
  200. return nil, err
  201. }
  202. c.debug(func(d *debugger) {
  203. for _, m := range msgs {
  204. d.debugf(1, "recv: %+v", m)
  205. }
  206. })
  207. // When using nltest, it's possible for zero messages to be returned by receive.
  208. if len(msgs) == 0 {
  209. return msgs, nil
  210. }
  211. // Trim the final message with multi-part done indicator if
  212. // present.
  213. if m := msgs[len(msgs)-1]; m.Header.Flags&Multi != 0 && m.Header.Type == Done {
  214. return msgs[:len(msgs)-1], nil
  215. }
  216. return msgs, nil
  217. }
  218. // receive is the internal implementation of Conn.Receive, which can be called
  219. // recursively to handle multi-part messages.
  220. func (c *Conn) receive() ([]Message, error) {
  221. // NB: All non-nil errors returned from this function *must* be of type
  222. // OpError in order to maintain the appropriate contract with callers of
  223. // this package.
  224. //
  225. // This contract also applies to functions called within this function,
  226. // such as checkMessage.
  227. var res []Message
  228. for {
  229. msgs, err := c.sock.Receive()
  230. if err != nil {
  231. return nil, newOpError("receive", err)
  232. }
  233. // If this message is multi-part, we will need to continue looping to
  234. // drain all the messages from the socket.
  235. var multi bool
  236. for _, m := range msgs {
  237. if err := checkMessage(m); err != nil {
  238. return nil, err
  239. }
  240. // Does this message indicate a multi-part message?
  241. if m.Header.Flags&Multi == 0 {
  242. // No, check the next messages.
  243. continue
  244. }
  245. // Does this message indicate the last message in a series of
  246. // multi-part messages from a single read?
  247. multi = m.Header.Type != Done
  248. }
  249. res = append(res, msgs...)
  250. if !multi {
  251. // No more messages coming.
  252. return res, nil
  253. }
  254. }
  255. }
  256. // A groupJoinLeaver is a Socket that supports joining and leaving
  257. // netlink multicast groups.
  258. type groupJoinLeaver interface {
  259. Socket
  260. JoinGroup(group uint32) error
  261. LeaveGroup(group uint32) error
  262. }
  263. // JoinGroup joins a netlink multicast group by its ID.
  264. func (c *Conn) JoinGroup(group uint32) error {
  265. conn, ok := c.sock.(groupJoinLeaver)
  266. if !ok {
  267. return notSupported("join-group")
  268. }
  269. return newOpError("join-group", conn.JoinGroup(group))
  270. }
  271. // LeaveGroup leaves a netlink multicast group by its ID.
  272. func (c *Conn) LeaveGroup(group uint32) error {
  273. conn, ok := c.sock.(groupJoinLeaver)
  274. if !ok {
  275. return notSupported("leave-group")
  276. }
  277. return newOpError("leave-group", conn.LeaveGroup(group))
  278. }
  279. // A bpfSetter is a Socket that supports setting and removing BPF filters.
  280. type bpfSetter interface {
  281. Socket
  282. bpf.Setter
  283. RemoveBPF() error
  284. }
  285. // SetBPF attaches an assembled BPF program to a Conn.
  286. func (c *Conn) SetBPF(filter []bpf.RawInstruction) error {
  287. conn, ok := c.sock.(bpfSetter)
  288. if !ok {
  289. return notSupported("set-bpf")
  290. }
  291. return newOpError("set-bpf", conn.SetBPF(filter))
  292. }
  293. // RemoveBPF removes a BPF filter from a Conn.
  294. func (c *Conn) RemoveBPF() error {
  295. conn, ok := c.sock.(bpfSetter)
  296. if !ok {
  297. return notSupported("remove-bpf")
  298. }
  299. return newOpError("remove-bpf", conn.RemoveBPF())
  300. }
  301. // A deadlineSetter is a Socket that supports setting deadlines.
  302. type deadlineSetter interface {
  303. Socket
  304. SetDeadline(time.Time) error
  305. SetReadDeadline(time.Time) error
  306. SetWriteDeadline(time.Time) error
  307. }
  308. // SetDeadline sets the read and write deadlines associated with the connection.
  309. func (c *Conn) SetDeadline(t time.Time) error {
  310. conn, ok := c.sock.(deadlineSetter)
  311. if !ok {
  312. return notSupported("set-deadline")
  313. }
  314. return newOpError("set-deadline", conn.SetDeadline(t))
  315. }
  316. // SetReadDeadline sets the read deadline associated with the connection.
  317. func (c *Conn) SetReadDeadline(t time.Time) error {
  318. conn, ok := c.sock.(deadlineSetter)
  319. if !ok {
  320. return notSupported("set-read-deadline")
  321. }
  322. return newOpError("set-read-deadline", conn.SetReadDeadline(t))
  323. }
  324. // SetWriteDeadline sets the write deadline associated with the connection.
  325. func (c *Conn) SetWriteDeadline(t time.Time) error {
  326. conn, ok := c.sock.(deadlineSetter)
  327. if !ok {
  328. return notSupported("set-write-deadline")
  329. }
  330. return newOpError("set-write-deadline", conn.SetWriteDeadline(t))
  331. }
  332. // A ConnOption is a boolean option that may be set for a Conn.
  333. type ConnOption int
  334. // Possible ConnOption values. These constants are equivalent to the Linux
  335. // setsockopt boolean options for netlink sockets.
  336. const (
  337. PacketInfo ConnOption = iota
  338. BroadcastError
  339. NoENOBUFS
  340. ListenAllNSID
  341. CapAcknowledge
  342. ExtendedAcknowledge
  343. GetStrictCheck
  344. )
  345. // An optionSetter is a Socket that supports setting netlink options.
  346. type optionSetter interface {
  347. Socket
  348. SetOption(option ConnOption, enable bool) error
  349. }
  350. // SetOption enables or disables a netlink socket option for the Conn.
  351. func (c *Conn) SetOption(option ConnOption, enable bool) error {
  352. conn, ok := c.sock.(optionSetter)
  353. if !ok {
  354. return notSupported("set-option")
  355. }
  356. return newOpError("set-option", conn.SetOption(option, enable))
  357. }
  358. // A bufferSetter is a Socket that supports setting connection buffer sizes.
  359. type bufferSetter interface {
  360. Socket
  361. SetReadBuffer(bytes int) error
  362. SetWriteBuffer(bytes int) error
  363. }
  364. // SetReadBuffer sets the size of the operating system's receive buffer
  365. // associated with the Conn.
  366. func (c *Conn) SetReadBuffer(bytes int) error {
  367. conn, ok := c.sock.(bufferSetter)
  368. if !ok {
  369. return notSupported("set-read-buffer")
  370. }
  371. return newOpError("set-read-buffer", conn.SetReadBuffer(bytes))
  372. }
  373. // SetWriteBuffer sets the size of the operating system's transmit buffer
  374. // associated with the Conn.
  375. func (c *Conn) SetWriteBuffer(bytes int) error {
  376. conn, ok := c.sock.(bufferSetter)
  377. if !ok {
  378. return notSupported("set-write-buffer")
  379. }
  380. return newOpError("set-write-buffer", conn.SetWriteBuffer(bytes))
  381. }
  382. // A syscallConner is a Socket that supports syscall.Conn.
  383. type syscallConner interface {
  384. Socket
  385. syscall.Conn
  386. }
  387. var _ syscall.Conn = &Conn{}
  388. // SyscallConn returns a raw network connection. This implements the
  389. // syscall.Conn interface.
  390. //
  391. // SyscallConn is intended for advanced use cases, such as getting and setting
  392. // arbitrary socket options using the netlink socket's file descriptor.
  393. //
  394. // Once invoked, it is the caller's responsibility to ensure that operations
  395. // performed using Conn and the syscall.RawConn do not conflict with
  396. // each other.
  397. func (c *Conn) SyscallConn() (syscall.RawConn, error) {
  398. sc, ok := c.sock.(syscallConner)
  399. if !ok {
  400. return nil, notSupported("syscall-conn")
  401. }
  402. // TODO(mdlayher): mutex or similar to enforce syscall.RawConn contract of
  403. // FD remaining valid for duration of calls?
  404. return sc.SyscallConn()
  405. }
  406. // fixMsg updates the fields of m using the logic specified in Send.
  407. func (c *Conn) fixMsg(m *Message, ml int) {
  408. if m.Header.Length == 0 {
  409. m.Header.Length = uint32(nlmsgAlign(ml))
  410. }
  411. if m.Header.Sequence == 0 {
  412. m.Header.Sequence = c.nextSequence()
  413. }
  414. if m.Header.PID == 0 {
  415. m.Header.PID = c.pid
  416. }
  417. }
  418. // nextSequence atomically increments Conn's sequence number and returns
  419. // the incremented value.
  420. func (c *Conn) nextSequence() uint32 {
  421. return atomic.AddUint32(&c.seq, 1)
  422. }
  423. // Validate validates one or more reply Messages against a request Message,
  424. // ensuring that they contain matching sequence numbers and PIDs.
  425. func Validate(request Message, replies []Message) error {
  426. for _, m := range replies {
  427. // Check for mismatched sequence, unless:
  428. // - request had no sequence, meaning we are probably validating
  429. // a multicast reply
  430. if m.Header.Sequence != request.Header.Sequence && request.Header.Sequence != 0 {
  431. return newOpError("validate", errMismatchedSequence)
  432. }
  433. // Check for mismatched PID, unless:
  434. // - request had no PID, meaning we are either:
  435. // - validating a multicast reply
  436. // - netlink has not yet assigned us a PID
  437. // - response had no PID, meaning it's from the kernel as a multicast reply
  438. if m.Header.PID != request.Header.PID && request.Header.PID != 0 && m.Header.PID != 0 {
  439. return newOpError("validate", errMismatchedPID)
  440. }
  441. }
  442. return nil
  443. }
  444. // Config contains options for a Conn.
  445. type Config struct {
  446. // Groups is a bitmask which specifies multicast groups. If set to 0,
  447. // no multicast group subscriptions will be made.
  448. Groups uint32
  449. // NetNS specifies the network namespace the Conn will operate in.
  450. //
  451. // If set (non-zero), Conn will enter the specified network namespace and
  452. // an error will occur in Dial if the operation fails.
  453. //
  454. // If not set (zero), a best-effort attempt will be made to enter the
  455. // network namespace of the calling thread: this means that any changes made
  456. // to the calling thread's network namespace will also be reflected in Conn.
  457. // If this operation fails (due to lack of permissions or because network
  458. // namespaces are disabled by kernel configuration), Dial will not return
  459. // an error, and the Conn will operate in the default network namespace of
  460. // the process. This enables non-privileged use of Conn in applications
  461. // which do not require elevated privileges.
  462. //
  463. // Entering a network namespace is a privileged operation (root or
  464. // CAP_SYS_ADMIN are required), and most applications should leave this set
  465. // to 0.
  466. NetNS int
  467. // DisableNSLockThread is a no-op.
  468. //
  469. // Deprecated: internal changes have made this option obsolete and it has no
  470. // effect. Do not use.
  471. DisableNSLockThread bool
  472. }