redact.go 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. /*
  2. * Copyright (c) 2022, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. package common
  20. import (
  21. std_errors "errors"
  22. "net/url"
  23. "path/filepath"
  24. "regexp"
  25. "strings"
  26. )
  27. // RedactURLError transforms an error, when it is a url.Error, removing
  28. // the URL value. This is to avoid logging private user data in cases
  29. // where the URL may be a user input value.
  30. // This function is used with errors returned by net/http and net/url,
  31. // which are (currently) of type url.Error. In particular, the round trip
  32. // function used by our HttpProxy, http.Client.Do, returns errors of type
  33. // url.Error, with the URL being the url sent from the user's tunneled
  34. // applications:
  35. // https://github.com/golang/go/blob/release-branch.go1.4/src/net/http/client.go#L394
  36. func RedactURLError(err error) error {
  37. if urlErr, ok := err.(*url.Error); ok {
  38. err = &url.Error{
  39. Op: urlErr.Op,
  40. URL: "",
  41. Err: urlErr.Err,
  42. }
  43. }
  44. return err
  45. }
  46. var redactIPAddressAndPortRegex = regexp.MustCompile(
  47. // IP address
  48. `(` +
  49. // IPv4
  50. //
  51. // An IPv4 address can also be represented as an unsigned integer, or with
  52. // octal or with hex octet values, but we do not check for any of these
  53. // uncommon representations as some may match non-IP values and we don't
  54. // expect the "net" package, etc., to emit them.)
  55. `\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|` +
  56. // IPv6
  57. //
  58. // Optional brackets for IPv6 with port
  59. `\[?` +
  60. `(` +
  61. // Uncompressed IPv6; ensure there are 8 segments to avoid matching, e.g., a
  62. // timestamp
  63. `(([a-fA-F0-9]{1,4}:){7}[a-fA-F0-9]{1,4})|` +
  64. // Compressed IPv6
  65. `([a-fA-F0-9:]*::[a-fA-F0-9:]+)|([a-fA-F0-9:]+::[a-fA-F0-9:]*)` +
  66. `)` +
  67. // Optional mapped/translated/embeded IPv4 suffix
  68. `(.\d{1,3}\.\d{1,3}\.\d{1,3})?` +
  69. `\]?` +
  70. `)` +
  71. // Optional port number
  72. `(:\d+)?`)
  73. // RedactIPAddresses returns a copy of the input with all IP addresses (and
  74. // optional ports) replaced by "[redacted]". This is intended to be used to
  75. // redact addresses from "net" package I/O error messages and otherwise avoid
  76. // inadvertently recording direct server IPs via error message logs; and, in
  77. // metrics, to reduce the error space due to superfluous source port data.
  78. //
  79. // RedactIPAddresses uses a simple regex match which liberally matches IP
  80. // address-like patterns and will match invalid addresses; for example, it
  81. // will match port numbers greater than 65535. We err on the side of redaction
  82. // and are not as concerned, in this context, with false positive matches. If
  83. // a user configures an upstream proxy address with an invalid IP or port
  84. // value, we prefer to redact it.
  85. //
  86. // See the redactIPAddressAndPortRegex comment for some uncommon IP address
  87. // representations that are not matched.
  88. func RedactIPAddresses(b []byte) []byte {
  89. return redactIPAddressAndPortRegex.ReplaceAll(b, []byte("[redacted]"))
  90. }
  91. // RedactIPAddressesString is RedactIPAddresses for strings.
  92. func RedactIPAddressesString(s string) string {
  93. return redactIPAddressAndPortRegex.ReplaceAllString(s, "[redacted]")
  94. }
  95. // EscapeRedactIPAddressString escapes the IP or IP:port addresses in the
  96. // input in such a way that they won't be redacted when part of the input to
  97. // RedactIPAddresses.
  98. //
  99. // The escape encoding is not guaranteed to be reversable or suitable for
  100. // machine processing; the goal is to simply ensure the original value is
  101. // human readable.
  102. func EscapeRedactIPAddressString(address string) string {
  103. address = strings.ReplaceAll(address, ".", "\\.")
  104. address = strings.ReplaceAll(address, ":", "\\:")
  105. return address
  106. }
  107. var redactFilePathRegex = regexp.MustCompile(
  108. // File path
  109. `(` +
  110. // Leading characters
  111. `[^ ]*` +
  112. // At least one path separator
  113. `/` +
  114. // Path component; take until next space
  115. `[^ ]*` +
  116. `)+`)
  117. // RedactFilePaths returns a copy of the input with all file paths
  118. // replaced by "[redacted]". First any occurrences of the provided file paths
  119. // are replaced and then an attempt is made to replace any other file paths by
  120. // searching with a heuristic. The latter is a best effort attempt it is not
  121. // guaranteed that it will catch every file path.
  122. func RedactFilePaths(s string, filePaths ...string) string {
  123. for _, filePath := range filePaths {
  124. s = strings.ReplaceAll(s, filePath, "[redacted]")
  125. }
  126. return redactFilePathRegex.ReplaceAllLiteralString(filepath.ToSlash(s), "[redacted]")
  127. }
  128. // RedactFilePathsError is RedactFilePaths for errors.
  129. func RedactFilePathsError(err error, filePaths ...string) error {
  130. return std_errors.New(RedactFilePaths(err.Error(), filePaths...))
  131. }
  132. // RedactNetError removes network address information from a "net" package
  133. // error message. Addresses may be domains or IP addresses.
  134. //
  135. // Limitations: some non-address error context can be lost; this function
  136. // makes assumptions about how the Go "net" package error messages are
  137. // formatted and will fail to redact network addresses if this assumptions
  138. // become untrue.
  139. func RedactNetError(err error) error {
  140. // Example "net" package error messages:
  141. //
  142. // - lookup <domain>: no such host
  143. // - lookup <domain>: No address associated with hostname
  144. // - dial tcp <address>: connectex: No connection could be made because the target machine actively refused it
  145. // - write tcp <address>-><address>: write: connection refused
  146. if err == nil {
  147. return err
  148. }
  149. errstr := err.Error()
  150. index := strings.Index(errstr, ": ")
  151. if index == -1 {
  152. return err
  153. }
  154. return std_errors.New("[redacted]" + errstr[index:])
  155. }