set.go 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903
  1. // Copyright 2018 Google LLC. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package nftables
  15. import (
  16. "encoding/binary"
  17. "errors"
  18. "fmt"
  19. "strings"
  20. "time"
  21. "github.com/google/nftables/expr"
  22. "github.com/google/nftables/binaryutil"
  23. "github.com/mdlayher/netlink"
  24. "golang.org/x/sys/unix"
  25. )
  26. // SetConcatTypeBits defines concatination bits, originally defined in
  27. // https://git.netfilter.org/iptables/tree/iptables/nft.c?id=26753888720d8e7eb422ae4311348347f5a05cb4#n1002
  28. const (
  29. SetConcatTypeBits = 6
  30. SetConcatTypeMask = (1 << SetConcatTypeBits) - 1
  31. // below consts added because not found in go unix package
  32. // https://git.netfilter.org/nftables/tree/include/linux/netfilter/nf_tables.h?id=d1289bff58e1878c3162f574c603da993e29b113#n306
  33. NFT_SET_CONCAT = 0x80
  34. // https://git.netfilter.org/nftables/tree/include/linux/netfilter/nf_tables.h?id=d1289bff58e1878c3162f574c603da993e29b113#n330
  35. NFTA_SET_DESC_CONCAT = 2
  36. // https://git.netfilter.org/nftables/tree/include/linux/netfilter/nf_tables.h?id=d1289bff58e1878c3162f574c603da993e29b113#n428
  37. NFTA_SET_ELEM_KEY_END = 10
  38. // https://git.netfilter.org/nftables/tree/include/linux/netfilter/nf_tables.h?id=d1289bff58e1878c3162f574c603da993e29b113#n429
  39. NFTA_SET_ELEM_EXPRESSIONS = 0x11
  40. )
  41. var allocSetID uint32
  42. // SetDatatype represents a datatype declared by nft.
  43. type SetDatatype struct {
  44. Name string
  45. Bytes uint32
  46. // nftMagic represents the magic value that nft uses for
  47. // certain types (ie: IP addresses). We populate SET_KEY_TYPE
  48. // identically, so `nft list ...` commands produce correct output.
  49. nftMagic uint32
  50. }
  51. // GetNFTMagic returns a custom datatype based on user's parameters
  52. func (s *SetDatatype) GetNFTMagic() uint32 {
  53. return s.nftMagic
  54. }
  55. // SetNFTMagic returns a custom datatype based on user's parameters
  56. func (s *SetDatatype) SetNFTMagic(nftMagic uint32) {
  57. s.nftMagic = nftMagic
  58. }
  59. // NFT datatypes. See: https://git.netfilter.org/nftables/tree/include/datatype.h
  60. var (
  61. TypeInvalid = SetDatatype{Name: "invalid", nftMagic: 0}
  62. TypeVerdict = SetDatatype{Name: "verdict", Bytes: 0, nftMagic: 1}
  63. TypeNFProto = SetDatatype{Name: "nf_proto", Bytes: 1, nftMagic: 2}
  64. TypeBitmask = SetDatatype{Name: "bitmask", Bytes: 0, nftMagic: 3}
  65. TypeInteger = SetDatatype{Name: "integer", Bytes: 4, nftMagic: 4}
  66. TypeString = SetDatatype{Name: "string", Bytes: 0, nftMagic: 5}
  67. TypeLLAddr = SetDatatype{Name: "ll_addr", Bytes: 0, nftMagic: 6}
  68. TypeIPAddr = SetDatatype{Name: "ipv4_addr", Bytes: 4, nftMagic: 7}
  69. TypeIP6Addr = SetDatatype{Name: "ipv6_addr", Bytes: 16, nftMagic: 8}
  70. TypeEtherAddr = SetDatatype{Name: "ether_addr", Bytes: 6, nftMagic: 9}
  71. TypeEtherType = SetDatatype{Name: "ether_type", Bytes: 2, nftMagic: 10}
  72. TypeARPOp = SetDatatype{Name: "arp_op", Bytes: 2, nftMagic: 11}
  73. TypeInetProto = SetDatatype{Name: "inet_proto", Bytes: 1, nftMagic: 12}
  74. TypeInetService = SetDatatype{Name: "inet_service", Bytes: 2, nftMagic: 13}
  75. TypeICMPType = SetDatatype{Name: "icmp_type", Bytes: 1, nftMagic: 14}
  76. TypeTCPFlag = SetDatatype{Name: "tcp_flag", Bytes: 1, nftMagic: 15}
  77. TypeDCCPPktType = SetDatatype{Name: "dccp_pkttype", Bytes: 1, nftMagic: 16}
  78. TypeMHType = SetDatatype{Name: "mh_type", Bytes: 1, nftMagic: 17}
  79. TypeTime = SetDatatype{Name: "time", Bytes: 8, nftMagic: 18}
  80. TypeMark = SetDatatype{Name: "mark", Bytes: 4, nftMagic: 19}
  81. TypeIFIndex = SetDatatype{Name: "iface_index", Bytes: 4, nftMagic: 20}
  82. TypeARPHRD = SetDatatype{Name: "iface_type", Bytes: 2, nftMagic: 21}
  83. TypeRealm = SetDatatype{Name: "realm", Bytes: 4, nftMagic: 22}
  84. TypeClassID = SetDatatype{Name: "classid", Bytes: 4, nftMagic: 23}
  85. TypeUID = SetDatatype{Name: "uid", Bytes: sizeOfUIDT, nftMagic: 24}
  86. TypeGID = SetDatatype{Name: "gid", Bytes: sizeOfGIDT, nftMagic: 25}
  87. TypeCTState = SetDatatype{Name: "ct_state", Bytes: 4, nftMagic: 26}
  88. TypeCTDir = SetDatatype{Name: "ct_dir", Bytes: 1, nftMagic: 27}
  89. TypeCTStatus = SetDatatype{Name: "ct_status", Bytes: 4, nftMagic: 28}
  90. TypeICMP6Type = SetDatatype{Name: "icmpv6_type", Bytes: 1, nftMagic: 29}
  91. TypeCTLabel = SetDatatype{Name: "ct_label", Bytes: ctLabelBitSize / 8, nftMagic: 30}
  92. TypePktType = SetDatatype{Name: "pkt_type", Bytes: 1, nftMagic: 31}
  93. TypeICMPCode = SetDatatype{Name: "icmp_code", Bytes: 1, nftMagic: 32}
  94. TypeICMPV6Code = SetDatatype{Name: "icmpv6_code", Bytes: 1, nftMagic: 33}
  95. TypeICMPXCode = SetDatatype{Name: "icmpx_code", Bytes: 1, nftMagic: 34}
  96. TypeDevGroup = SetDatatype{Name: "devgroup", Bytes: 4, nftMagic: 35}
  97. TypeDSCP = SetDatatype{Name: "dscp", Bytes: 1, nftMagic: 36}
  98. TypeECN = SetDatatype{Name: "ecn", Bytes: 1, nftMagic: 37}
  99. TypeFIBAddr = SetDatatype{Name: "fib_addrtype", Bytes: 4, nftMagic: 38}
  100. TypeBoolean = SetDatatype{Name: "boolean", Bytes: 1, nftMagic: 39}
  101. TypeCTEventBit = SetDatatype{Name: "ct_event", Bytes: 4, nftMagic: 40}
  102. TypeIFName = SetDatatype{Name: "ifname", Bytes: ifNameSize, nftMagic: 41}
  103. TypeIGMPType = SetDatatype{Name: "igmp_type", Bytes: 1, nftMagic: 42}
  104. TypeTimeDate = SetDatatype{Name: "time", Bytes: 8, nftMagic: 43}
  105. TypeTimeHour = SetDatatype{Name: "hour", Bytes: 4, nftMagic: 44}
  106. TypeTimeDay = SetDatatype{Name: "day", Bytes: 1, nftMagic: 45}
  107. TypeCGroupV2 = SetDatatype{Name: "cgroupsv2", Bytes: 8, nftMagic: 46}
  108. nftDatatypes = map[string]SetDatatype{
  109. TypeVerdict.Name: TypeVerdict,
  110. TypeNFProto.Name: TypeNFProto,
  111. TypeBitmask.Name: TypeBitmask,
  112. TypeInteger.Name: TypeInteger,
  113. TypeString.Name: TypeString,
  114. TypeLLAddr.Name: TypeLLAddr,
  115. TypeIPAddr.Name: TypeIPAddr,
  116. TypeIP6Addr.Name: TypeIP6Addr,
  117. TypeEtherAddr.Name: TypeEtherAddr,
  118. TypeEtherType.Name: TypeEtherType,
  119. TypeARPOp.Name: TypeARPOp,
  120. TypeInetProto.Name: TypeInetProto,
  121. TypeInetService.Name: TypeInetService,
  122. TypeICMPType.Name: TypeICMPType,
  123. TypeTCPFlag.Name: TypeTCPFlag,
  124. TypeDCCPPktType.Name: TypeDCCPPktType,
  125. TypeMHType.Name: TypeMHType,
  126. TypeTime.Name: TypeTime,
  127. TypeMark.Name: TypeMark,
  128. TypeIFIndex.Name: TypeIFIndex,
  129. TypeARPHRD.Name: TypeARPHRD,
  130. TypeRealm.Name: TypeRealm,
  131. TypeClassID.Name: TypeClassID,
  132. TypeUID.Name: TypeUID,
  133. TypeGID.Name: TypeGID,
  134. TypeCTState.Name: TypeCTState,
  135. TypeCTDir.Name: TypeCTDir,
  136. TypeCTStatus.Name: TypeCTStatus,
  137. TypeICMP6Type.Name: TypeICMP6Type,
  138. TypeCTLabel.Name: TypeCTLabel,
  139. TypePktType.Name: TypePktType,
  140. TypeICMPCode.Name: TypeICMPCode,
  141. TypeICMPV6Code.Name: TypeICMPV6Code,
  142. TypeICMPXCode.Name: TypeICMPXCode,
  143. TypeDevGroup.Name: TypeDevGroup,
  144. TypeDSCP.Name: TypeDSCP,
  145. TypeECN.Name: TypeECN,
  146. TypeFIBAddr.Name: TypeFIBAddr,
  147. TypeBoolean.Name: TypeBoolean,
  148. TypeCTEventBit.Name: TypeCTEventBit,
  149. TypeIFName.Name: TypeIFName,
  150. TypeIGMPType.Name: TypeIGMPType,
  151. TypeTimeDate.Name: TypeTimeDate,
  152. TypeTimeHour.Name: TypeTimeHour,
  153. TypeTimeDay.Name: TypeTimeDay,
  154. TypeCGroupV2.Name: TypeCGroupV2,
  155. }
  156. // ctLabelBitSize is defined in https://git.netfilter.org/nftables/tree/src/ct.c.
  157. ctLabelBitSize uint32 = 128
  158. // ifNameSize is called IFNAMSIZ in linux/if.h.
  159. ifNameSize uint32 = 16
  160. // bits/typesizes.h
  161. sizeOfUIDT uint32 = 4
  162. sizeOfGIDT uint32 = 4
  163. )
  164. // ErrTooManyTypes is the error returned by ConcatSetType, if nftMagic would overflow.
  165. var ErrTooManyTypes = errors.New("too many types to concat")
  166. // MustConcatSetType does the same as ConcatSetType, but panics instead of an
  167. // error. It simplifies safe initialization of global variables.
  168. func MustConcatSetType(types ...SetDatatype) SetDatatype {
  169. t, err := ConcatSetType(types...)
  170. if err != nil {
  171. panic(err)
  172. }
  173. return t
  174. }
  175. // ConcatSetType constructs a new SetDatatype which consists of a concatenation
  176. // of the passed types. It returns ErrTooManyTypes, if nftMagic would overflow
  177. // (more than 5 types).
  178. func ConcatSetType(types ...SetDatatype) (SetDatatype, error) {
  179. if len(types) > 32/SetConcatTypeBits {
  180. return SetDatatype{}, ErrTooManyTypes
  181. }
  182. var magic, bytes uint32
  183. names := make([]string, len(types))
  184. for i, t := range types {
  185. bytes += t.Bytes
  186. // concatenated types pad the length to multiples of the register size (4 bytes)
  187. // see https://git.netfilter.org/nftables/tree/src/datatype.c?id=488356b895024d0944b20feb1f930558726e0877#n1162
  188. if t.Bytes%4 != 0 {
  189. bytes += 4 - (t.Bytes % 4)
  190. }
  191. names[i] = t.Name
  192. magic <<= SetConcatTypeBits
  193. magic |= t.nftMagic & SetConcatTypeMask
  194. }
  195. return SetDatatype{Name: strings.Join(names, " . "), Bytes: bytes, nftMagic: magic}, nil
  196. }
  197. // ConcatSetTypeElements uses the ConcatSetType name to calculate and return
  198. // a list of base types which were used to construct the concatenated type
  199. func ConcatSetTypeElements(t SetDatatype) []SetDatatype {
  200. names := strings.Split(t.Name, " . ")
  201. types := make([]SetDatatype, len(names))
  202. for i, n := range names {
  203. types[i] = nftDatatypes[n]
  204. }
  205. return types
  206. }
  207. // Set represents an nftables set. Anonymous sets are only valid within the
  208. // context of a single batch.
  209. type Set struct {
  210. Table *Table
  211. ID uint32
  212. Name string
  213. Anonymous bool
  214. Constant bool
  215. Interval bool
  216. IsMap bool
  217. HasTimeout bool
  218. Counter bool
  219. // Can be updated per evaluation path, per `nft list ruleset`
  220. // indicates that set contains "flags dynamic"
  221. // https://git.netfilter.org/libnftnl/tree/include/linux/netfilter/nf_tables.h?id=84d12cfacf8ddd857a09435f3d982ab6250d250c#n298
  222. Dynamic bool
  223. // Indicates that the set contains a concatenation
  224. // https://git.netfilter.org/nftables/tree/include/linux/netfilter/nf_tables.h?id=d1289bff58e1878c3162f574c603da993e29b113#n306
  225. Concatenation bool
  226. Timeout time.Duration
  227. KeyType SetDatatype
  228. DataType SetDatatype
  229. }
  230. // SetElement represents a data point within a set.
  231. type SetElement struct {
  232. Key []byte
  233. Val []byte
  234. // Field used for definition of ending interval value in concatenated types
  235. // https://git.netfilter.org/libnftnl/tree/include/set_elem.h?id=e2514c0eff4da7e8e0aabd410f7b7d0b7564c880#n11
  236. KeyEnd []byte
  237. IntervalEnd bool
  238. // To support vmap, a caller must be able to pass Verdict type of data.
  239. // If IsMap is true and VerdictData is not nil, then Val of SetElement will be ignored
  240. // and VerdictData will be wrapped into Attribute data.
  241. VerdictData *expr.Verdict
  242. // To support aging of set elements
  243. Timeout time.Duration
  244. }
  245. func (s *SetElement) decode() func(b []byte) error {
  246. return func(b []byte) error {
  247. ad, err := netlink.NewAttributeDecoder(b)
  248. if err != nil {
  249. return fmt.Errorf("failed to create nested attribute decoder: %v", err)
  250. }
  251. ad.ByteOrder = binary.BigEndian
  252. for ad.Next() {
  253. switch ad.Type() {
  254. case unix.NFTA_SET_ELEM_KEY:
  255. s.Key, err = decodeElement(ad.Bytes())
  256. if err != nil {
  257. return err
  258. }
  259. case NFTA_SET_ELEM_KEY_END:
  260. s.KeyEnd, err = decodeElement(ad.Bytes())
  261. if err != nil {
  262. return err
  263. }
  264. case unix.NFTA_SET_ELEM_DATA:
  265. s.Val, err = decodeElement(ad.Bytes())
  266. if err != nil {
  267. return err
  268. }
  269. case unix.NFTA_SET_ELEM_FLAGS:
  270. flags := ad.Uint32()
  271. s.IntervalEnd = (flags & unix.NFT_SET_ELEM_INTERVAL_END) != 0
  272. case unix.NFTA_SET_ELEM_TIMEOUT:
  273. s.Timeout = time.Duration(time.Millisecond * time.Duration(ad.Uint64()))
  274. }
  275. }
  276. return ad.Err()
  277. }
  278. }
  279. func decodeElement(d []byte) ([]byte, error) {
  280. ad, err := netlink.NewAttributeDecoder(d)
  281. if err != nil {
  282. return nil, fmt.Errorf("failed to create nested attribute decoder: %v", err)
  283. }
  284. ad.ByteOrder = binary.BigEndian
  285. var b []byte
  286. for ad.Next() {
  287. switch ad.Type() {
  288. case unix.NFTA_SET_ELEM_KEY:
  289. fallthrough
  290. case unix.NFTA_SET_ELEM_DATA:
  291. b = ad.Bytes()
  292. }
  293. }
  294. if err := ad.Err(); err != nil {
  295. return nil, err
  296. }
  297. return b, nil
  298. }
  299. // SetAddElements applies data points to an nftables set.
  300. func (cc *Conn) SetAddElements(s *Set, vals []SetElement) error {
  301. cc.mu.Lock()
  302. defer cc.mu.Unlock()
  303. if s.Anonymous {
  304. return errors.New("anonymous sets cannot be updated")
  305. }
  306. elements, err := s.makeElemList(vals, s.ID)
  307. if err != nil {
  308. return err
  309. }
  310. cc.messages = append(cc.messages, netlink.Message{
  311. Header: netlink.Header{
  312. Type: netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_NEWSETELEM),
  313. Flags: netlink.Request | netlink.Acknowledge | netlink.Create,
  314. },
  315. Data: append(extraHeader(uint8(s.Table.Family), 0), cc.marshalAttr(elements)...),
  316. })
  317. return nil
  318. }
  319. func (s *Set) makeElemList(vals []SetElement, id uint32) ([]netlink.Attribute, error) {
  320. var elements []netlink.Attribute
  321. for i, v := range vals {
  322. item := make([]netlink.Attribute, 0)
  323. var flags uint32
  324. if v.IntervalEnd {
  325. flags |= unix.NFT_SET_ELEM_INTERVAL_END
  326. item = append(item, netlink.Attribute{Type: unix.NFTA_SET_ELEM_FLAGS | unix.NLA_F_NESTED, Data: binaryutil.BigEndian.PutUint32(flags)})
  327. }
  328. encodedKey, err := netlink.MarshalAttributes([]netlink.Attribute{{Type: unix.NFTA_DATA_VALUE, Data: v.Key}})
  329. if err != nil {
  330. return nil, fmt.Errorf("marshal key %d: %v", i, err)
  331. }
  332. item = append(item, netlink.Attribute{Type: unix.NFTA_SET_ELEM_KEY | unix.NLA_F_NESTED, Data: encodedKey})
  333. if len(v.KeyEnd) > 0 {
  334. encodedKeyEnd, err := netlink.MarshalAttributes([]netlink.Attribute{{Type: unix.NFTA_DATA_VALUE, Data: v.KeyEnd}})
  335. if err != nil {
  336. return nil, fmt.Errorf("marshal key end %d: %v", i, err)
  337. }
  338. item = append(item, netlink.Attribute{Type: NFTA_SET_ELEM_KEY_END | unix.NLA_F_NESTED, Data: encodedKeyEnd})
  339. }
  340. if s.HasTimeout && v.Timeout != 0 {
  341. // Set has Timeout flag set, which means an individual element can specify its own timeout.
  342. item = append(item, netlink.Attribute{Type: unix.NFTA_SET_ELEM_TIMEOUT, Data: binaryutil.BigEndian.PutUint64(uint64(v.Timeout.Milliseconds()))})
  343. }
  344. // The following switch statement deal with 3 different types of elements.
  345. // 1. v is an element of vmap
  346. // 2. v is an element of a regular map
  347. // 3. v is an element of a regular set (default)
  348. switch {
  349. case v.VerdictData != nil:
  350. // Since VerdictData is not nil, v is vmap element, need to add to the attributes
  351. encodedVal := []byte{}
  352. encodedKind, err := netlink.MarshalAttributes([]netlink.Attribute{
  353. {Type: unix.NFTA_DATA_VALUE, Data: binaryutil.BigEndian.PutUint32(uint32(v.VerdictData.Kind))},
  354. })
  355. if err != nil {
  356. return nil, fmt.Errorf("marshal item %d: %v", i, err)
  357. }
  358. encodedVal = append(encodedVal, encodedKind...)
  359. if len(v.VerdictData.Chain) != 0 {
  360. encodedChain, err := netlink.MarshalAttributes([]netlink.Attribute{
  361. {Type: unix.NFTA_SET_ELEM_DATA, Data: []byte(v.VerdictData.Chain + "\x00")},
  362. })
  363. if err != nil {
  364. return nil, fmt.Errorf("marshal item %d: %v", i, err)
  365. }
  366. encodedVal = append(encodedVal, encodedChain...)
  367. }
  368. encodedVerdict, err := netlink.MarshalAttributes([]netlink.Attribute{
  369. {Type: unix.NFTA_SET_ELEM_DATA | unix.NLA_F_NESTED, Data: encodedVal}})
  370. if err != nil {
  371. return nil, fmt.Errorf("marshal item %d: %v", i, err)
  372. }
  373. item = append(item, netlink.Attribute{Type: unix.NFTA_SET_ELEM_DATA | unix.NLA_F_NESTED, Data: encodedVerdict})
  374. case len(v.Val) > 0:
  375. // Since v.Val's length is not 0 then, v is a regular map element, need to add to the attributes
  376. encodedVal, err := netlink.MarshalAttributes([]netlink.Attribute{{Type: unix.NFTA_DATA_VALUE, Data: v.Val}})
  377. if err != nil {
  378. return nil, fmt.Errorf("marshal item %d: %v", i, err)
  379. }
  380. item = append(item, netlink.Attribute{Type: unix.NFTA_SET_ELEM_DATA | unix.NLA_F_NESTED, Data: encodedVal})
  381. default:
  382. // If niether of previous cases matche, it means 'e' is an element of a regular Set, no need to add to the attributes
  383. }
  384. encodedItem, err := netlink.MarshalAttributes(item)
  385. if err != nil {
  386. return nil, fmt.Errorf("marshal item %d: %v", i, err)
  387. }
  388. elements = append(elements, netlink.Attribute{Type: uint16(i+1) | unix.NLA_F_NESTED, Data: encodedItem})
  389. }
  390. encodedElem, err := netlink.MarshalAttributes(elements)
  391. if err != nil {
  392. return nil, fmt.Errorf("marshal elements: %v", err)
  393. }
  394. return []netlink.Attribute{
  395. {Type: unix.NFTA_SET_NAME, Data: []byte(s.Name + "\x00")},
  396. {Type: unix.NFTA_LOOKUP_SET_ID, Data: binaryutil.BigEndian.PutUint32(id)},
  397. {Type: unix.NFTA_SET_TABLE, Data: []byte(s.Table.Name + "\x00")},
  398. {Type: unix.NFTA_SET_ELEM_LIST_ELEMENTS | unix.NLA_F_NESTED, Data: encodedElem},
  399. }, nil
  400. }
  401. // AddSet adds the specified Set.
  402. func (cc *Conn) AddSet(s *Set, vals []SetElement) error {
  403. cc.mu.Lock()
  404. defer cc.mu.Unlock()
  405. // Based on nft implementation & linux source.
  406. // Link: https://github.com/torvalds/linux/blob/49a57857aeea06ca831043acbb0fa5e0f50602fd/net/netfilter/nf_tables_api.c#L3395
  407. // Another reference: https://git.netfilter.org/nftables/tree/src
  408. if s.Anonymous && !s.Constant {
  409. return errors.New("anonymous structs must be constant")
  410. }
  411. if s.ID == 0 {
  412. allocSetID++
  413. s.ID = allocSetID
  414. if s.Anonymous {
  415. s.Name = "__set%d"
  416. if s.IsMap {
  417. s.Name = "__map%d"
  418. }
  419. }
  420. }
  421. var flags uint32
  422. if s.Anonymous {
  423. flags |= unix.NFT_SET_ANONYMOUS
  424. }
  425. if s.Constant {
  426. flags |= unix.NFT_SET_CONSTANT
  427. }
  428. if s.Interval {
  429. flags |= unix.NFT_SET_INTERVAL
  430. }
  431. if s.IsMap {
  432. flags |= unix.NFT_SET_MAP
  433. }
  434. if s.HasTimeout {
  435. flags |= unix.NFT_SET_TIMEOUT
  436. }
  437. if s.Dynamic {
  438. flags |= unix.NFT_SET_EVAL
  439. }
  440. if s.Concatenation {
  441. flags |= NFT_SET_CONCAT
  442. }
  443. tableInfo := []netlink.Attribute{
  444. {Type: unix.NFTA_SET_TABLE, Data: []byte(s.Table.Name + "\x00")},
  445. {Type: unix.NFTA_SET_NAME, Data: []byte(s.Name + "\x00")},
  446. {Type: unix.NFTA_SET_FLAGS, Data: binaryutil.BigEndian.PutUint32(flags)},
  447. {Type: unix.NFTA_SET_KEY_TYPE, Data: binaryutil.BigEndian.PutUint32(s.KeyType.nftMagic)},
  448. {Type: unix.NFTA_SET_KEY_LEN, Data: binaryutil.BigEndian.PutUint32(s.KeyType.Bytes)},
  449. {Type: unix.NFTA_SET_ID, Data: binaryutil.BigEndian.PutUint32(s.ID)},
  450. }
  451. if s.IsMap {
  452. // Check if it is vmap case
  453. if s.DataType.nftMagic == 1 {
  454. // For Verdict data type, the expected magic is 0xfffff0
  455. tableInfo = append(tableInfo, netlink.Attribute{Type: unix.NFTA_SET_DATA_TYPE, Data: binaryutil.BigEndian.PutUint32(uint32(unix.NFT_DATA_VERDICT))},
  456. netlink.Attribute{Type: unix.NFTA_SET_DATA_LEN, Data: binaryutil.BigEndian.PutUint32(s.DataType.Bytes)})
  457. } else {
  458. tableInfo = append(tableInfo, netlink.Attribute{Type: unix.NFTA_SET_DATA_TYPE, Data: binaryutil.BigEndian.PutUint32(s.DataType.nftMagic)},
  459. netlink.Attribute{Type: unix.NFTA_SET_DATA_LEN, Data: binaryutil.BigEndian.PutUint32(s.DataType.Bytes)})
  460. }
  461. }
  462. if s.HasTimeout && s.Timeout != 0 {
  463. // If Set's global timeout is specified, add it to set's attributes
  464. tableInfo = append(tableInfo, netlink.Attribute{Type: unix.NFTA_SET_TIMEOUT, Data: binaryutil.BigEndian.PutUint64(uint64(s.Timeout.Milliseconds()))})
  465. }
  466. if s.Constant {
  467. // nft cli tool adds the number of elements to set/map's descriptor
  468. // It make sense to do only if a set or map are constant, otherwise skip NFTA_SET_DESC attribute
  469. numberOfElements, err := netlink.MarshalAttributes([]netlink.Attribute{
  470. {Type: unix.NFTA_DATA_VALUE, Data: binaryutil.BigEndian.PutUint32(uint32(len(vals)))},
  471. })
  472. if err != nil {
  473. return fmt.Errorf("fail to marshal number of elements %d: %v", len(vals), err)
  474. }
  475. tableInfo = append(tableInfo, netlink.Attribute{Type: unix.NLA_F_NESTED | unix.NFTA_SET_DESC, Data: numberOfElements})
  476. }
  477. if s.Concatenation {
  478. // Length of concatenated types is a must, otherwise segfaults when executing nft list ruleset
  479. var concatDefinition []byte
  480. elements := ConcatSetTypeElements(s.KeyType)
  481. for i, v := range elements {
  482. // Marshal base type size value
  483. valData, err := netlink.MarshalAttributes([]netlink.Attribute{
  484. {Type: unix.NFTA_DATA_VALUE, Data: binaryutil.BigEndian.PutUint32(v.Bytes)},
  485. })
  486. if err != nil {
  487. return fmt.Errorf("fail to marshal element key size %d: %v", i, err)
  488. }
  489. // Marshal base type size description
  490. descSize, err := netlink.MarshalAttributes([]netlink.Attribute{
  491. {Type: unix.NFTA_SET_DESC_SIZE, Data: valData},
  492. })
  493. concatDefinition = append(concatDefinition, descSize...)
  494. }
  495. // Marshal all base type descriptions into concatenation size description
  496. concatBytes, err := netlink.MarshalAttributes([]netlink.Attribute{{Type: unix.NLA_F_NESTED | NFTA_SET_DESC_CONCAT, Data: concatDefinition}})
  497. if err != nil {
  498. return fmt.Errorf("fail to marshal concat definition %v", err)
  499. }
  500. // Marshal concat size description as set description
  501. tableInfo = append(tableInfo, netlink.Attribute{Type: unix.NLA_F_NESTED | unix.NFTA_SET_DESC, Data: concatBytes})
  502. }
  503. if s.Anonymous || s.Constant || s.Interval {
  504. tableInfo = append(tableInfo,
  505. // Semantically useless - kept for binary compatability with nft
  506. netlink.Attribute{Type: unix.NFTA_SET_USERDATA, Data: []byte("\x00\x04\x02\x00\x00\x00")})
  507. } else if !s.IsMap {
  508. // Per https://git.netfilter.org/nftables/tree/src/mnl.c?id=187c6d01d35722618c2711bbc49262c286472c8f#n1165
  509. tableInfo = append(tableInfo,
  510. netlink.Attribute{Type: unix.NFTA_SET_USERDATA, Data: []byte("\x00\x04\x01\x00\x00\x00")})
  511. }
  512. if s.Counter {
  513. data, err := netlink.MarshalAttributes([]netlink.Attribute{
  514. {Type: unix.NFTA_LIST_ELEM, Data: []byte("counter\x00")},
  515. {Type: unix.NFTA_SET_ELEM_PAD | unix.NFTA_SET_ELEM_DATA, Data: []byte{}},
  516. })
  517. if err != nil {
  518. return err
  519. }
  520. tableInfo = append(tableInfo, netlink.Attribute{Type: unix.NLA_F_NESTED | NFTA_SET_ELEM_EXPRESSIONS, Data: data})
  521. }
  522. cc.messages = append(cc.messages, netlink.Message{
  523. Header: netlink.Header{
  524. Type: netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_NEWSET),
  525. Flags: netlink.Request | netlink.Acknowledge | netlink.Create,
  526. },
  527. Data: append(extraHeader(uint8(s.Table.Family), 0), cc.marshalAttr(tableInfo)...),
  528. })
  529. // Set the values of the set if initial values were provided.
  530. if len(vals) > 0 {
  531. hdrType := unix.NFT_MSG_NEWSETELEM
  532. elements, err := s.makeElemList(vals, s.ID)
  533. if err != nil {
  534. return err
  535. }
  536. cc.messages = append(cc.messages, netlink.Message{
  537. Header: netlink.Header{
  538. Type: netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | hdrType),
  539. Flags: netlink.Request | netlink.Acknowledge | netlink.Create,
  540. },
  541. Data: append(extraHeader(uint8(s.Table.Family), 0), cc.marshalAttr(elements)...),
  542. })
  543. }
  544. return nil
  545. }
  546. // DelSet deletes a specific set, along with all elements it contains.
  547. func (cc *Conn) DelSet(s *Set) {
  548. cc.mu.Lock()
  549. defer cc.mu.Unlock()
  550. data := cc.marshalAttr([]netlink.Attribute{
  551. {Type: unix.NFTA_SET_TABLE, Data: []byte(s.Table.Name + "\x00")},
  552. {Type: unix.NFTA_SET_NAME, Data: []byte(s.Name + "\x00")},
  553. })
  554. cc.messages = append(cc.messages, netlink.Message{
  555. Header: netlink.Header{
  556. Type: netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_DELSET),
  557. Flags: netlink.Request | netlink.Acknowledge,
  558. },
  559. Data: append(extraHeader(uint8(s.Table.Family), 0), data...),
  560. })
  561. }
  562. // SetDeleteElements deletes data points from an nftables set.
  563. func (cc *Conn) SetDeleteElements(s *Set, vals []SetElement) error {
  564. cc.mu.Lock()
  565. defer cc.mu.Unlock()
  566. if s.Anonymous {
  567. return errors.New("anonymous sets cannot be updated")
  568. }
  569. elements, err := s.makeElemList(vals, s.ID)
  570. if err != nil {
  571. return err
  572. }
  573. cc.messages = append(cc.messages, netlink.Message{
  574. Header: netlink.Header{
  575. Type: netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_DELSETELEM),
  576. Flags: netlink.Request | netlink.Acknowledge | netlink.Create,
  577. },
  578. Data: append(extraHeader(uint8(s.Table.Family), 0), cc.marshalAttr(elements)...),
  579. })
  580. return nil
  581. }
  582. // FlushSet deletes all data points from an nftables set.
  583. func (cc *Conn) FlushSet(s *Set) {
  584. cc.mu.Lock()
  585. defer cc.mu.Unlock()
  586. data := cc.marshalAttr([]netlink.Attribute{
  587. {Type: unix.NFTA_SET_TABLE, Data: []byte(s.Table.Name + "\x00")},
  588. {Type: unix.NFTA_SET_NAME, Data: []byte(s.Name + "\x00")},
  589. })
  590. cc.messages = append(cc.messages, netlink.Message{
  591. Header: netlink.Header{
  592. Type: netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_DELSETELEM),
  593. Flags: netlink.Request | netlink.Acknowledge,
  594. },
  595. Data: append(extraHeader(uint8(s.Table.Family), 0), data...),
  596. })
  597. }
  598. var setHeaderType = netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_NEWSET)
  599. func setsFromMsg(msg netlink.Message) (*Set, error) {
  600. if got, want := msg.Header.Type, setHeaderType; got != want {
  601. return nil, fmt.Errorf("unexpected header type: got %v, want %v", got, want)
  602. }
  603. ad, err := netlink.NewAttributeDecoder(msg.Data[4:])
  604. if err != nil {
  605. return nil, err
  606. }
  607. ad.ByteOrder = binary.BigEndian
  608. var set Set
  609. for ad.Next() {
  610. switch ad.Type() {
  611. case unix.NFTA_SET_NAME:
  612. set.Name = ad.String()
  613. case unix.NFTA_SET_ID:
  614. set.ID = binary.BigEndian.Uint32(ad.Bytes())
  615. case unix.NFTA_SET_TIMEOUT:
  616. set.Timeout = time.Duration(time.Millisecond * time.Duration(binary.BigEndian.Uint64(ad.Bytes())))
  617. set.HasTimeout = true
  618. case unix.NFTA_SET_FLAGS:
  619. flags := ad.Uint32()
  620. set.Constant = (flags & unix.NFT_SET_CONSTANT) != 0
  621. set.Anonymous = (flags & unix.NFT_SET_ANONYMOUS) != 0
  622. set.Interval = (flags & unix.NFT_SET_INTERVAL) != 0
  623. set.IsMap = (flags & unix.NFT_SET_MAP) != 0
  624. set.HasTimeout = (flags & unix.NFT_SET_TIMEOUT) != 0
  625. set.Concatenation = (flags & NFT_SET_CONCAT) != 0
  626. case unix.NFTA_SET_KEY_TYPE:
  627. nftMagic := ad.Uint32()
  628. if invalidMagic, ok := validateKeyType(nftMagic); !ok {
  629. return nil, fmt.Errorf("could not determine key type %+v", invalidMagic)
  630. }
  631. set.KeyType.nftMagic = nftMagic
  632. for _, dt := range nftDatatypes {
  633. // If this is a non-concatenated type, we can assign the descriptor.
  634. if nftMagic == dt.nftMagic {
  635. set.KeyType = dt
  636. break
  637. }
  638. }
  639. case unix.NFTA_SET_DATA_TYPE:
  640. nftMagic := ad.Uint32()
  641. // Special case for the data type verdict, in the message it is stored as 0xffffff00 but it is defined as 1
  642. if nftMagic == 0xffffff00 {
  643. set.KeyType = TypeVerdict
  644. break
  645. }
  646. for _, dt := range nftDatatypes {
  647. if nftMagic == dt.nftMagic {
  648. set.DataType = dt
  649. break
  650. }
  651. }
  652. if set.DataType.nftMagic == 0 {
  653. return nil, fmt.Errorf("could not determine data type %x", nftMagic)
  654. }
  655. }
  656. }
  657. return &set, nil
  658. }
  659. func validateKeyType(bits uint32) ([]uint32, bool) {
  660. var unpackTypes []uint32
  661. var invalidTypes []uint32
  662. found := false
  663. valid := true
  664. for bits != 0 {
  665. unpackTypes = append(unpackTypes, bits&SetConcatTypeMask)
  666. bits = bits >> SetConcatTypeBits
  667. }
  668. for _, t := range unpackTypes {
  669. for _, dt := range nftDatatypes {
  670. if t == dt.nftMagic {
  671. found = true
  672. }
  673. }
  674. if !found {
  675. invalidTypes = append(invalidTypes, t)
  676. valid = false
  677. }
  678. found = false
  679. }
  680. return invalidTypes, valid
  681. }
  682. var elemHeaderType = netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_NEWSETELEM)
  683. func elementsFromMsg(msg netlink.Message) ([]SetElement, error) {
  684. if got, want := msg.Header.Type, elemHeaderType; got != want {
  685. return nil, fmt.Errorf("unexpected header type: got %v, want %v", got, want)
  686. }
  687. ad, err := netlink.NewAttributeDecoder(msg.Data[4:])
  688. if err != nil {
  689. return nil, err
  690. }
  691. ad.ByteOrder = binary.BigEndian
  692. var elements []SetElement
  693. for ad.Next() {
  694. b := ad.Bytes()
  695. if ad.Type() == unix.NFTA_SET_ELEM_LIST_ELEMENTS {
  696. ad, err := netlink.NewAttributeDecoder(b)
  697. if err != nil {
  698. return nil, err
  699. }
  700. ad.ByteOrder = binary.BigEndian
  701. for ad.Next() {
  702. var elem SetElement
  703. switch ad.Type() {
  704. case unix.NFTA_LIST_ELEM:
  705. ad.Do(elem.decode())
  706. }
  707. elements = append(elements, elem)
  708. }
  709. }
  710. }
  711. return elements, nil
  712. }
  713. // GetSets returns the sets in the specified table.
  714. func (cc *Conn) GetSets(t *Table) ([]*Set, error) {
  715. conn, closer, err := cc.netlinkConn()
  716. if err != nil {
  717. return nil, err
  718. }
  719. defer func() { _ = closer() }()
  720. data, err := netlink.MarshalAttributes([]netlink.Attribute{
  721. {Type: unix.NFTA_SET_TABLE, Data: []byte(t.Name + "\x00")},
  722. })
  723. if err != nil {
  724. return nil, err
  725. }
  726. message := netlink.Message{
  727. Header: netlink.Header{
  728. Type: netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_GETSET),
  729. Flags: netlink.Request | netlink.Acknowledge | netlink.Dump,
  730. },
  731. Data: append(extraHeader(uint8(t.Family), 0), data...),
  732. }
  733. if _, err := conn.SendMessages([]netlink.Message{message}); err != nil {
  734. return nil, fmt.Errorf("SendMessages: %v", err)
  735. }
  736. reply, err := receiveAckAware(conn, message.Header.Flags)
  737. if err != nil {
  738. return nil, fmt.Errorf("Receive: %v", err)
  739. }
  740. var sets []*Set
  741. for _, msg := range reply {
  742. s, err := setsFromMsg(msg)
  743. if err != nil {
  744. return nil, err
  745. }
  746. s.Table = &Table{Name: t.Name, Use: t.Use, Flags: t.Flags, Family: t.Family}
  747. sets = append(sets, s)
  748. }
  749. return sets, nil
  750. }
  751. // GetSetByName returns the set in the specified table if matching name is found.
  752. func (cc *Conn) GetSetByName(t *Table, name string) (*Set, error) {
  753. conn, closer, err := cc.netlinkConn()
  754. if err != nil {
  755. return nil, err
  756. }
  757. defer func() { _ = closer() }()
  758. data, err := netlink.MarshalAttributes([]netlink.Attribute{
  759. {Type: unix.NFTA_SET_TABLE, Data: []byte(t.Name + "\x00")},
  760. {Type: unix.NFTA_SET_NAME, Data: []byte(name + "\x00")},
  761. })
  762. if err != nil {
  763. return nil, err
  764. }
  765. message := netlink.Message{
  766. Header: netlink.Header{
  767. Type: netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_GETSET),
  768. Flags: netlink.Request | netlink.Acknowledge,
  769. },
  770. Data: append(extraHeader(uint8(t.Family), 0), data...),
  771. }
  772. if _, err := conn.SendMessages([]netlink.Message{message}); err != nil {
  773. return nil, fmt.Errorf("SendMessages: %w", err)
  774. }
  775. reply, err := receiveAckAware(conn, message.Header.Flags)
  776. if err != nil {
  777. return nil, fmt.Errorf("Receive: %w", err)
  778. }
  779. if len(reply) != 1 {
  780. return nil, fmt.Errorf("Receive: expected to receive 1 message but got %d", len(reply))
  781. }
  782. rs, err := setsFromMsg(reply[0])
  783. if err != nil {
  784. return nil, err
  785. }
  786. rs.Table = &Table{Name: t.Name, Use: t.Use, Flags: t.Flags, Family: t.Family}
  787. return rs, nil
  788. }
  789. // GetSetElements returns the elements in the specified set.
  790. func (cc *Conn) GetSetElements(s *Set) ([]SetElement, error) {
  791. conn, closer, err := cc.netlinkConn()
  792. if err != nil {
  793. return nil, err
  794. }
  795. defer func() { _ = closer() }()
  796. data, err := netlink.MarshalAttributes([]netlink.Attribute{
  797. {Type: unix.NFTA_SET_TABLE, Data: []byte(s.Table.Name + "\x00")},
  798. {Type: unix.NFTA_SET_NAME, Data: []byte(s.Name + "\x00")},
  799. })
  800. if err != nil {
  801. return nil, err
  802. }
  803. message := netlink.Message{
  804. Header: netlink.Header{
  805. Type: netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_GETSETELEM),
  806. Flags: netlink.Request | netlink.Acknowledge | netlink.Dump,
  807. },
  808. Data: append(extraHeader(uint8(s.Table.Family), 0), data...),
  809. }
  810. if _, err := conn.SendMessages([]netlink.Message{message}); err != nil {
  811. return nil, fmt.Errorf("SendMessages: %v", err)
  812. }
  813. reply, err := receiveAckAware(conn, message.Header.Flags)
  814. if err != nil {
  815. return nil, fmt.Errorf("Receive: %v", err)
  816. }
  817. var elems []SetElement
  818. for _, msg := range reply {
  819. s, err := elementsFromMsg(msg)
  820. if err != nil {
  821. return nil, err
  822. }
  823. elems = append(elems, s...)
  824. }
  825. return elems, nil
  826. }