packetman_linux.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734
  1. /*
  2. * Copyright (c) 2020, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. package packetman
  20. import (
  21. "context"
  22. "encoding/binary"
  23. "log"
  24. "net"
  25. "strconv"
  26. "strings"
  27. "sync"
  28. "syscall"
  29. "time"
  30. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  31. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
  32. "github.com/florianl/go-nfqueue"
  33. "github.com/google/gopacket"
  34. "github.com/google/gopacket/layers"
  35. cache "github.com/patrickmn/go-cache"
  36. )
  37. func IsSupported() bool {
  38. return true
  39. }
  40. const (
  41. defaultSocketMark = 0x70736970 // "PSIP"
  42. appliedSpecCacheTTL = 1 * time.Minute
  43. )
  44. // Manipulator is a SYN-ACK packet manipulator.
  45. //
  46. // NFQUEUE/Netlink is used to intercept SYN-ACK packets, on all local
  47. // interfaces, with source port equal to one of the ProtocolPorts specified in
  48. // Config. For each intercepted SYN-ACK packet, the SelectSpecName callback in
  49. // Config is invoked; the callback determines which packet transformation spec
  50. // to apply, based on, for example, client GeoIP, protocol, or other
  51. // considerations.
  52. //
  53. // Protocol network listeners use GetAppliedSpecName to determine which
  54. // transformation spec was applied to a given accepted connection.
  55. //
  56. // When a manipulations are to be applied to a SYN-ACK packet, NFQUEUE is
  57. // instructed to drop the packet and one or more new packets, created by
  58. // applying transformations to the original SYN-ACK packet, are injected via
  59. // raw sockets. Raw sockets are used as NFQUEUE supports only replacing the
  60. // original packet with one alternative packet.
  61. //
  62. // To avoid an intercept loop, injected packets are marked (SO_MARK) and the
  63. // filter for NFQUEUE excludes packets with this mark.
  64. //
  65. // To avoid breaking TCP in unexpected cases, Manipulator fails open --
  66. // allowing the original packet to proceed -- when packet parsing fails. For
  67. // the same reason, the queue-bypass NFQUEUE option is set.
  68. //
  69. // As an iptables filter ensures only SYN-ACK packets are sent to the
  70. // NFQUEUEs, the overhead of packet interception, parsing, and injection is
  71. // incurred no more than once per TCP connection.
  72. //
  73. // NFQUEUE with queue-bypass requires Linux kernel 2.6.39; 3.16 or later is
  74. // validated and recommended.
  75. //
  76. // Due to use of NFQUEUE, larger than max socket buffer sizes, and raw
  77. // sockets, Manipulator requires CAP_NET_ADMIN and CAP_NET_RAW.
  78. type Manipulator struct {
  79. config *Config
  80. mutex sync.Mutex
  81. runContext context.Context
  82. stopRunning context.CancelFunc
  83. waitGroup *sync.WaitGroup
  84. injectIPv4FD int
  85. injectIPv6FD int
  86. nfqueue *nfqueue.Nfqueue
  87. compiledSpecsMutex sync.Mutex
  88. compiledSpecs map[string]*compiledSpec
  89. appliedSpecCache *cache.Cache
  90. }
  91. // NewManipulator creates a new Manipulator.
  92. func NewManipulator(config *Config) (*Manipulator, error) {
  93. m := &Manipulator{
  94. config: config,
  95. }
  96. err := m.SetSpecs(config.Specs)
  97. if err != nil {
  98. return nil, errors.Trace(err)
  99. }
  100. // To avoid memory exhaustion, do not retain unconsumed appliedSpecCache
  101. // entries for a longer time than it may reasonably take to complete the TCP
  102. // handshake.
  103. m.appliedSpecCache = cache.New(appliedSpecCacheTTL, appliedSpecCacheTTL/2)
  104. return m, nil
  105. }
  106. // Start initializes NFQUEUEs and raw sockets for packet manipulation. Start
  107. // returns when initialization is complete; once it returns, the caller may
  108. // assume that any SYN-ACK packets on configured ports will be intercepted. In
  109. // the case of initialization failure, Start will undo any partial
  110. // initialization. When Start succeeds, the caller must call Stop to free
  111. // resources and restore networking state.
  112. func (m *Manipulator) Start() (retErr error) {
  113. m.mutex.Lock()
  114. defer m.mutex.Unlock()
  115. if m.runContext != nil {
  116. return errors.TraceNew("already running")
  117. }
  118. if len(m.config.ProtocolPorts) == 0 {
  119. // There are no ports to intercept, so there is nothing to run. Skip
  120. // subsequent operations which assume at least one intercept port is
  121. // configured. This is a success case, and a subseqent call to Stop is a
  122. // no-op.
  123. return nil
  124. }
  125. err := m.configureIPTables(true)
  126. if err != nil {
  127. return errors.Trace(err)
  128. }
  129. defer func() {
  130. if retErr != nil {
  131. m.configureIPTables(false)
  132. }
  133. }()
  134. m.injectIPv4FD, err = syscall.Socket(syscall.AF_INET, syscall.SOCK_RAW, syscall.IPPROTO_RAW)
  135. if err != nil {
  136. return errors.Trace(err)
  137. }
  138. defer func() {
  139. if retErr != nil {
  140. syscall.Close(m.injectIPv4FD)
  141. }
  142. }()
  143. err = syscall.SetsockoptInt(m.injectIPv4FD, syscall.IPPROTO_IP, syscall.IP_HDRINCL, 1)
  144. if err != nil {
  145. return errors.Trace(err)
  146. }
  147. err = syscall.SetsockoptInt(m.injectIPv4FD, syscall.SOL_SOCKET, syscall.SO_MARK, m.getSocketMark())
  148. if err != nil {
  149. return errors.Trace(err)
  150. }
  151. m.injectIPv6FD, err = syscall.Socket(syscall.AF_INET6, syscall.SOCK_RAW, syscall.IPPROTO_RAW)
  152. if err != nil && !m.config.AllowNoIPv6NetworkConfiguration {
  153. return errors.Trace(err)
  154. }
  155. defer func() {
  156. if retErr != nil {
  157. syscall.Close(m.injectIPv6FD)
  158. }
  159. }()
  160. if m.injectIPv6FD != 0 {
  161. err = syscall.SetsockoptInt(m.injectIPv6FD, syscall.IPPROTO_IPV6, syscall.IP_HDRINCL, 1)
  162. if err != nil {
  163. // There's no AllowNoIPv6NetworkConfiguration in this case: if we can
  164. // create an IPv6 socket, we must be able to set its options.
  165. return errors.Trace(err)
  166. }
  167. err = syscall.SetsockoptInt(m.injectIPv6FD, syscall.SOL_SOCKET, syscall.SO_MARK, m.getSocketMark())
  168. if err != nil {
  169. return errors.Trace(err)
  170. }
  171. }
  172. // Use a reasonable buffer size to avoid excess allocation. As we're
  173. // intercepting only locally generated SYN-ACK packets, which should have no
  174. // payload, this size should be more than sufficient.
  175. maxPacketLen := uint32(1500)
  176. // The kernel default is 1024:
  177. // https://github.com/torvalds/linux/blob/cd8dead0c39457e58ec1d36db93aedca811d48f1/net/netfilter/nfnetlink_queue.c#L51,
  178. // via https://github.com/florianl/go-nfqueue/issues/3.
  179. // We use a larger queue size to accomodate more concurrent SYN-ACK packets.
  180. maxQueueLen := uint32(2048)
  181. // Timeout note: on a small subset of production servers, we have found that
  182. // setting a non-zero read timeout results in occasional "orphaned" packets
  183. // which remain in the queue but are not delivered to handleInterceptedPacket
  184. // for a verdict. This phenomenon leads to a stall in nfqueue processing once
  185. // the queue fills up with packets apparently awaiting a verdict. The shorter
  186. // the timeout, the faster that orphaned packets accumulate. With no timeout,
  187. // and reads in blocking mode, this phenomenon does not occur.
  188. m.nfqueue, err = nfqueue.Open(
  189. &nfqueue.Config{
  190. NfQueue: uint16(m.config.QueueNumber),
  191. MaxPacketLen: maxPacketLen,
  192. MaxQueueLen: maxQueueLen,
  193. Copymode: nfqueue.NfQnlCopyPacket,
  194. Logger: newNfqueueLogger(m.config.Logger),
  195. ReadTimeout: 0,
  196. WriteTimeout: 0,
  197. })
  198. if err != nil {
  199. return errors.Trace(err)
  200. }
  201. defer func() {
  202. if retErr != nil {
  203. m.nfqueue.Close()
  204. }
  205. }()
  206. // Set a netlink socket receive buffer size that is significantly larger than
  207. // the typical default of 212992. This avoids ENOBUFS in the case of many
  208. // netlink messages from the kernel (capped by the max queue size). Note that
  209. // the CAP_NET_ADMIN may be required when this exceeds the configured max
  210. // buffer size.
  211. err = m.nfqueue.Con.SetReadBuffer(1703936)
  212. if err != nil {
  213. return errors.Trace(err)
  214. }
  215. runContext, stopRunning := context.WithCancel(context.Background())
  216. defer func() {
  217. if retErr != nil {
  218. stopRunning()
  219. }
  220. }()
  221. err = m.nfqueue.Register(runContext, m.handleInterceptedPacket)
  222. if err != nil {
  223. return errors.Trace(err)
  224. }
  225. m.runContext = runContext
  226. m.stopRunning = stopRunning
  227. return nil
  228. }
  229. // Stop halts packet manipulation, frees resources, and restores networking
  230. // state.
  231. func (m *Manipulator) Stop() {
  232. m.mutex.Lock()
  233. defer m.mutex.Unlock()
  234. if m.runContext == nil {
  235. return
  236. }
  237. // Call stopRunning before interrupting the blocked read; this ensures that
  238. // the nfqueue socketCallback loop will exit after the read is interrupted.
  239. m.stopRunning()
  240. // Interrupt a blocked read.
  241. m.nfqueue.Con.SetDeadline(time.Unix(0, 1))
  242. // There's no socketCallback exit synchronization exposed by nfqueue. Calling
  243. // nfqueue.Close while socketCallback is still running can result in errors
  244. // such as "nfqueuenfqueue_gteq_1.12.go:134: Could not unbind from queue:
  245. // netlink send: sendmsg: bad file descriptor"; and closing the raw socket
  246. // file descriptors while socketCallback is still running can result in
  247. // errors such as "packetman.(*Manipulator).injectPackets#604: bad file
  248. // descriptor".
  249. //
  250. // Attempt to avoid invalid file descriptor operations and spurious error
  251. // messages by sleeping for a short period, allowing socketCallback to poll
  252. // the context and exit.
  253. time.Sleep(100 * time.Millisecond)
  254. m.nfqueue.Close()
  255. syscall.Close(m.injectIPv4FD)
  256. if m.injectIPv6FD != 0 {
  257. syscall.Close(m.injectIPv6FD)
  258. }
  259. m.configureIPTables(false)
  260. }
  261. // SetSpecs installs a new set of packet transformation Spec values, replacing
  262. // the initial specs from Config.Specs, or any previous SetSpecs call. When
  263. // SetSpecs returns an error, the previous set of specs is retained.
  264. func (m *Manipulator) SetSpecs(specs []*Spec) error {
  265. compiledSpecs := make(map[string]*compiledSpec)
  266. for _, spec := range specs {
  267. if spec.Name == "" {
  268. return errors.TraceNew("invalid spec name")
  269. }
  270. if _, ok := compiledSpecs[spec.Name]; ok {
  271. return errors.TraceNew("duplicate spec name")
  272. }
  273. compiledSpec, err := compileSpec(spec)
  274. if err != nil {
  275. return errors.Trace(err)
  276. }
  277. compiledSpecs[spec.Name] = compiledSpec
  278. }
  279. m.compiledSpecsMutex.Lock()
  280. m.compiledSpecs = compiledSpecs
  281. m.compiledSpecsMutex.Unlock()
  282. return nil
  283. }
  284. func makeConnectionID(
  285. srcIP net.IP, srcPort uint16, dstIP net.IP, dstPort uint16) string {
  286. // Create a unique connection ID, for appliedSpecCache, from the 4-tuple
  287. // srcIP, dstIP, srcPort, dstPort. In the SYN/ACK context, src is the server
  288. // and dst is the client.
  289. //
  290. // Limitation: there may be many repeat connections from one dstIP,
  291. // especially if many clients are behind the same NAT. Each TCP connection
  292. // will have a distinct dstPort. In principle, there remains a race between
  293. // populating appliedSpecCache, the TCP connection terminating on the
  294. // client-side and the NAT reusing the dstPort, and consuming
  295. // appliedSpecCache.
  296. // From: https://github.com/golang/go/blob/b88efc7e7ac15f9e0b5d8d9c82f870294f6a3839/src/net/ip.go#L55
  297. var v4InV6Prefix = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff}
  298. const uint16Len = 2
  299. var connID [net.IPv6len + uint16Len + net.IPv6len + uint16Len]byte
  300. offset := 0
  301. if len(srcIP) == net.IPv4len {
  302. copy(connID[offset:], v4InV6Prefix)
  303. offset += len(v4InV6Prefix)
  304. copy(connID[offset:], srcIP)
  305. offset += len(srcIP)
  306. } else { // net.IPv6len
  307. copy(connID[offset:], srcIP)
  308. offset += len(srcIP)
  309. }
  310. binary.BigEndian.PutUint16(connID[offset:], srcPort)
  311. offset += uint16Len
  312. if len(dstIP) == net.IPv4len {
  313. copy(connID[offset:], v4InV6Prefix)
  314. offset += len(v4InV6Prefix)
  315. copy(connID[offset:], dstIP)
  316. offset += len(dstIP)
  317. } else { // net.IPv6len
  318. copy(connID[offset:], dstIP)
  319. offset += len(dstIP)
  320. }
  321. binary.BigEndian.PutUint16(connID[offset:], dstPort)
  322. offset += uint16Len
  323. return string(connID[:])
  324. }
  325. type appliedSpec struct {
  326. specName string
  327. extraData interface{}
  328. }
  329. // GetAppliedSpecName returns the packet manipulation spec name applied to the
  330. // TCP connection, represented by its local and remote address components,
  331. // that was ultimately accepted by a network listener. The second return value
  332. // is the arbitrary extra data returned by GetSpecName.
  333. //
  334. // This allows SelectSpecName, the spec selector, to be non-deterministic
  335. // while also allowing for accurate packet manipulation metrics to be
  336. // associated with each TCP connection.
  337. //
  338. // For a given connection, GetAppliedSpecName must be called before a TTL
  339. // clears the stored value. Calling GetAppliedSpecName immediately clears the
  340. // stored value for the given connection.
  341. //
  342. // To obtain the correct result GetAppliedSpecName must be called with a
  343. // RemoteAddr which reflects the true immediate network peer address. In
  344. // particular, for proxied net.Conns which present a synthetic RemoteAddr with
  345. // the original address of a proxied client (e.g., armon/go-proxyproto, or
  346. // psiphon/server.meekConn) the true peer RemoteAddr must instead be
  347. // provided.
  348. func (m *Manipulator) GetAppliedSpecName(
  349. localAddr, remoteAddr *net.TCPAddr) (string, interface{}, error) {
  350. connID := makeConnectionID(
  351. localAddr.IP,
  352. uint16(localAddr.Port),
  353. remoteAddr.IP,
  354. uint16(remoteAddr.Port))
  355. value, found := m.appliedSpecCache.Get(connID)
  356. if !found {
  357. return "", nil, errors.TraceNew("connection not found")
  358. }
  359. appliedSpec := value.(appliedSpec)
  360. m.appliedSpecCache.Delete(connID)
  361. return appliedSpec.specName, appliedSpec.extraData, nil
  362. }
  363. func (m *Manipulator) setAppliedSpecName(
  364. interceptedPacket gopacket.Packet,
  365. specName string,
  366. extraData interface{}) {
  367. srcIP, dstIP, _, _ := m.getPacketAddressInfo(interceptedPacket)
  368. interceptedTCP := interceptedPacket.Layer(layers.LayerTypeTCP).(*layers.TCP)
  369. connID := makeConnectionID(
  370. srcIP,
  371. uint16(interceptedTCP.SrcPort),
  372. dstIP,
  373. uint16(interceptedTCP.DstPort))
  374. m.appliedSpecCache.Set(
  375. connID,
  376. appliedSpec{
  377. specName: specName,
  378. extraData: extraData,
  379. },
  380. cache.DefaultExpiration)
  381. }
  382. func (m *Manipulator) getSocketMark() int {
  383. if m.config.SocketMark == 0 {
  384. return defaultSocketMark
  385. }
  386. return m.config.SocketMark
  387. }
  388. func (m *Manipulator) handleInterceptedPacket(attr nfqueue.Attribute) int {
  389. if attr.PacketID == nil || attr.Payload == nil {
  390. m.config.Logger.WithTrace().Warning("missing nfqueue data")
  391. return 0
  392. }
  393. // Trigger packet manipulation only if the packet is a SYN-ACK and has no
  394. // payload (which a transformation _may_ discard). The iptables filter for
  395. // NFQUEUE should already ensure that only SYN-ACK packets are sent through
  396. // the queue. To avoid breaking all TCP connections in an unanticipated case,
  397. // fail open -- allow the packet -- if these conditions are not met or if
  398. // parsing the packet fails.
  399. packet, err := m.parseInterceptedPacket(*attr.Payload)
  400. if err != nil {
  401. // Fail open in this case.
  402. m.nfqueue.SetVerdict(*attr.PacketID, nfqueue.NfAccept)
  403. m.config.Logger.WithTraceFields(
  404. common.LogFields{"error": err}).Warning("unexpected packet")
  405. return 0
  406. }
  407. spec, extraData, err := m.getCompiledSpec(packet)
  408. if err != nil {
  409. // Fail open in this case.
  410. m.nfqueue.SetVerdict(*attr.PacketID, nfqueue.NfAccept)
  411. m.config.Logger.WithTraceFields(
  412. common.LogFields{"error": err}).Warning("get strategy failed")
  413. return 0
  414. }
  415. // Call setAppliedSpecName cache _before_ accepting the packet or injecting
  416. // manipulated packets to avoid a potential race in which the TCP handshake
  417. // completes and GetAppliedSpecName is called before the cache is populated.
  418. if spec == nil {
  419. // No packet manipulation in this case.
  420. m.setAppliedSpecName(packet, "", extraData)
  421. m.nfqueue.SetVerdict(*attr.PacketID, nfqueue.NfAccept)
  422. return 0
  423. }
  424. m.setAppliedSpecName(packet, spec.name, extraData)
  425. m.nfqueue.SetVerdict(*attr.PacketID, nfqueue.NfDrop)
  426. err = m.injectPackets(packet, spec)
  427. if err != nil {
  428. m.config.Logger.WithTraceFields(
  429. common.LogFields{"error": err}).Warning("inject packets failed")
  430. return 0
  431. }
  432. return 0
  433. }
  434. func (m *Manipulator) parseInterceptedPacket(packetData []byte) (gopacket.Packet, error) {
  435. // Note that NFQUEUE doesn't send an Ethernet layer. This first layer is
  436. // either IPv4 or IPv6.
  437. //
  438. // As we parse only one packet per TCP connection, we are not using the
  439. // faster DecodingLayerParser API,
  440. // https://godoc.org/github.com/google/gopacket#hdr-Fast_Decoding_With_DecodingLayerParser,
  441. // or zero-copy approaches.
  442. //
  443. // TODO: use a stub gopacket.Decoder as the first layer to avoid the extra
  444. // NewPacket call? Use distinct NFQUEUE queue numbers and nfqueue instances
  445. // for IPv4 and IPv6?
  446. packet := gopacket.NewPacket(packetData, layers.LayerTypeIPv4, gopacket.Default)
  447. if packet.ErrorLayer() != nil {
  448. packet = gopacket.NewPacket(packetData, layers.LayerTypeIPv6, gopacket.Default)
  449. }
  450. errLayer := packet.ErrorLayer()
  451. if errLayer != nil {
  452. return nil, errors.Trace(errLayer.Error())
  453. }
  454. // After this check, Layer([IPv4,IPv6]/TCP) return values are assumed to be
  455. // non-nil and unchecked layer type assertions are assumed safe.
  456. tcpLayer := packet.Layer(layers.LayerTypeTCP)
  457. if tcpLayer == nil {
  458. return nil, errors.TraceNew("missing TCP layer")
  459. }
  460. if packet.Layer(gopacket.LayerTypePayload) != nil {
  461. return nil, errors.TraceNew("unexpected payload layer")
  462. }
  463. tcp := tcpLayer.(*layers.TCP)
  464. // Any of the ECN TCP flags (https://tools.ietf.org/html/rfc3168 and
  465. // rfc3540), ECE/CWR/NS, may be set in a SYN-ACK, and are retained.
  466. //
  467. // Limitation: these additional flags are retained as-is on injected packets
  468. // only when no TCP flag transformation is applied.
  469. if !tcp.SYN || !tcp.ACK ||
  470. tcp.FIN || tcp.RST || tcp.PSH || tcp.URG {
  471. return nil, errors.TraceNew("unexpected TCP flags")
  472. }
  473. stripEOLOption(packet)
  474. return packet, nil
  475. }
  476. func (m *Manipulator) getCompiledSpec(
  477. interceptedPacket gopacket.Packet) (*compiledSpec, interface{}, error) {
  478. _, dstIP, _, _ := m.getPacketAddressInfo(interceptedPacket)
  479. interceptedTCP := interceptedPacket.Layer(layers.LayerTypeTCP).(*layers.TCP)
  480. protocolPort := interceptedTCP.SrcPort
  481. clientIP := dstIP
  482. specName, extraData := m.config.SelectSpecName(int(protocolPort), clientIP)
  483. if specName == "" {
  484. return nil, extraData, nil
  485. }
  486. // Concurrency note: m.compiledSpecs may be replaced by SetSpecs, but any
  487. // reference to an individual compiledSpec remains valid; each compiledSpec
  488. // is read-only.
  489. m.compiledSpecsMutex.Lock()
  490. spec, ok := m.compiledSpecs[specName]
  491. m.compiledSpecsMutex.Unlock()
  492. if !ok {
  493. return nil, nil, errors.Tracef("invalid spec name: %s", specName)
  494. }
  495. return spec, extraData, nil
  496. }
  497. func (m *Manipulator) injectPackets(interceptedPacket gopacket.Packet, spec *compiledSpec) error {
  498. // A sockAddr parameter with dstIP (but not port) set appears to be required
  499. // even with the IP_HDRINCL socket option.
  500. _, _, injectFD, sockAddr := m.getPacketAddressInfo(interceptedPacket)
  501. injectPackets, err := spec.apply(interceptedPacket)
  502. if err != nil {
  503. return errors.Trace(err)
  504. }
  505. for _, injectPacket := range injectPackets {
  506. err = syscall.Sendto(injectFD, injectPacket, 0, sockAddr)
  507. if err != nil {
  508. return errors.Trace(err)
  509. }
  510. }
  511. return nil
  512. }
  513. func (m *Manipulator) getPacketAddressInfo(interceptedPacket gopacket.Packet) (net.IP, net.IP, int, syscall.Sockaddr) {
  514. var srcIP, dstIP net.IP
  515. var injectFD int
  516. var sockAddr syscall.Sockaddr
  517. ipv4Layer := interceptedPacket.Layer(layers.LayerTypeIPv4)
  518. if ipv4Layer != nil {
  519. interceptedIPv4 := ipv4Layer.(*layers.IPv4)
  520. srcIP = interceptedIPv4.SrcIP
  521. dstIP = interceptedIPv4.DstIP
  522. injectFD = m.injectIPv4FD
  523. var ipv4 [4]byte
  524. copy(ipv4[:], interceptedIPv4.DstIP.To4())
  525. sockAddr = &syscall.SockaddrInet4{Addr: ipv4, Port: 0}
  526. } else {
  527. interceptedIPv6 := interceptedPacket.Layer(layers.LayerTypeIPv6).(*layers.IPv6)
  528. srcIP = interceptedIPv6.SrcIP
  529. dstIP = interceptedIPv6.DstIP
  530. injectFD = m.injectIPv6FD
  531. var ipv6 [16]byte
  532. copy(ipv6[:], interceptedIPv6.DstIP.To16())
  533. sockAddr = &syscall.SockaddrInet6{Addr: ipv6, Port: 0}
  534. }
  535. return srcIP, dstIP, injectFD, sockAddr
  536. }
  537. func (m *Manipulator) configureIPTables(addRules bool) error {
  538. execCommands := func(mode string) error {
  539. ports := make([]string, len(m.config.ProtocolPorts))
  540. for i, port := range m.config.ProtocolPorts {
  541. ports[i] = strconv.Itoa(port)
  542. }
  543. socketMark := strconv.Itoa(m.getSocketMark())
  544. args := []string{
  545. mode, "OUTPUT",
  546. "--protocol", "tcp",
  547. "--match", "multiport",
  548. "--source-ports", strings.Join(ports, ","),
  549. "--match", "mark",
  550. "!", "--mark", socketMark,
  551. "--tcp-flags", "ALL", "SYN,ACK",
  552. "-j", "NFQUEUE",
  553. "--queue-bypass",
  554. "--queue-num", strconv.Itoa(m.config.QueueNumber),
  555. }
  556. err := common.RunNetworkConfigCommand(
  557. m.config.Logger,
  558. m.config.SudoNetworkConfigCommands,
  559. "iptables",
  560. args...)
  561. if mode != "-D" && err != nil {
  562. return errors.Trace(err)
  563. }
  564. err = common.RunNetworkConfigCommand(
  565. m.config.Logger,
  566. m.config.SudoNetworkConfigCommands,
  567. "ip6tables",
  568. args...)
  569. if mode != "-D" && err != nil {
  570. if m.config.AllowNoIPv6NetworkConfiguration {
  571. m.config.Logger.WithTraceFields(
  572. common.LogFields{
  573. "error": err}).Warning(
  574. "configure IPv6 NFQUEUE failed")
  575. } else {
  576. return errors.Trace(err)
  577. }
  578. }
  579. return nil
  580. }
  581. // To avoid duplicates, first try to drop existing rules, then add. Also try
  582. // to revert any partial configuration in the case of an error.
  583. _ = execCommands("-D")
  584. if addRules {
  585. err := execCommands("-I")
  586. if err != nil {
  587. _ = execCommands("-D")
  588. }
  589. return errors.Trace(err)
  590. }
  591. return nil
  592. }
  593. func newNfqueueLogger(logger common.Logger) *log.Logger {
  594. return log.New(
  595. &nfqueueLoggerWriter{logger: logger},
  596. "nfqueue",
  597. log.Lshortfile)
  598. }
  599. type nfqueueLoggerWriter struct {
  600. logger common.Logger
  601. }
  602. func (n *nfqueueLoggerWriter) Write(p []byte) (int, error) {
  603. n.logger.WithTraceFields(
  604. common.LogFields{"log": string(p)}).Warning("nfqueue log")
  605. return len(p), nil
  606. }