/* * Copyright (c) 2022, Psiphon Inc. * All rights reserved. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * */ package common import ( std_errors "errors" "net/url" "path/filepath" "regexp" "strings" ) // RedactURLError transforms an error, when it is a url.Error, removing // the URL value. This is to avoid logging private user data in cases // where the URL may be a user input value. // This function is used with errors returned by net/http and net/url, // which are (currently) of type url.Error. In particular, the round trip // function used by our HttpProxy, http.Client.Do, returns errors of type // url.Error, with the URL being the url sent from the user's tunneled // applications: // https://github.com/golang/go/blob/release-branch.go1.4/src/net/http/client.go#L394 func RedactURLError(err error) error { if urlErr, ok := err.(*url.Error); ok { err = &url.Error{ Op: urlErr.Op, URL: "", Err: urlErr.Err, } } return err } var redactIPAddressAndPortRegex = regexp.MustCompile( // IP address `(` + // IPv4 // // An IPv4 address can also be represented as an unsigned integer, or with // octal or with hex octet values, but we do not check for any of these // uncommon representations as some may match non-IP values and we don't // expect the "net" package, etc., to emit them.) `\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|` + // IPv6 // // Optional brackets for IPv6 with port `\[?` + `(` + // Uncompressed IPv6; ensure there are 8 segments to avoid matching, e.g., a // timestamp `(([a-fA-F0-9]{1,4}:){7}[a-fA-F0-9]{1,4})|` + // Compressed IPv6 `([a-fA-F0-9:]*::[a-fA-F0-9:]+)|([a-fA-F0-9:]+::[a-fA-F0-9:]*)` + `)` + // Optional mapped/translated/embeded IPv4 suffix `(.\d{1,3}\.\d{1,3}\.\d{1,3})?` + `\]?` + `)` + // Optional port number `(:\d+)?`) // RedactIPAddresses returns a copy of the input with all IP addresses (and // optional ports) replaced by "[redacted]". This is intended to be used to // redact addresses from "net" package I/O error messages and otherwise avoid // inadvertently recording direct server IPs via error message logs; and, in // metrics, to reduce the error space due to superfluous source port data. // // RedactIPAddresses uses a simple regex match which liberally matches IP // address-like patterns and will match invalid addresses; for example, it // will match port numbers greater than 65535. We err on the side of redaction // and are not as concerned, in this context, with false positive matches. If // a user configures an upstream proxy address with an invalid IP or port // value, we prefer to redact it. // // See the redactIPAddressAndPortRegex comment for some uncommon IP address // representations that are not matched. func RedactIPAddresses(b []byte) []byte { return redactIPAddressAndPortRegex.ReplaceAll(b, []byte("[redacted]")) } // RedactIPAddressesString is RedactIPAddresses for strings. func RedactIPAddressesString(s string) string { return redactIPAddressAndPortRegex.ReplaceAllString(s, "[redacted]") } // EscapeRedactIPAddressString escapes the IP or IP:port addresses in the // input in such a way that they won't be redacted when part of the input to // RedactIPAddresses. // // The escape encoding is not guaranteed to be reversable or suitable for // machine processing; the goal is to simply ensure the original value is // human readable. func EscapeRedactIPAddressString(address string) string { address = strings.ReplaceAll(address, ".", "\\.") address = strings.ReplaceAll(address, ":", "\\:") return address } var redactFilePathRegex = regexp.MustCompile( // File path `(` + // Leading characters `[^ ]*` + // At least one path separator `/` + // Path component; take until next space `[^ ]*` + `)+`) // RedactFilePaths returns a copy of the input with all file paths // replaced by "[redacted]". First any occurrences of the provided file paths // are replaced and then an attempt is made to replace any other file paths by // searching with a heuristic. The latter is a best effort attempt it is not // guaranteed that it will catch every file path. func RedactFilePaths(s string, filePaths ...string) string { for _, filePath := range filePaths { s = strings.ReplaceAll(s, filePath, "[redacted]") } return redactFilePathRegex.ReplaceAllLiteralString(filepath.ToSlash(s), "[redacted]") } // RedactFilePathsError is RedactFilePaths for errors. func RedactFilePathsError(err error, filePaths ...string) error { return std_errors.New(RedactFilePaths(err.Error(), filePaths...)) } // RedactNetError removes network address information from a "net" package // error message. Addresses may be domains or IP addresses. // // Limitations: some non-address error context can be lost; this function // makes assumptions about how the Go "net" package error messages are // formatted and will fail to redact network addresses if this assumptions // become untrue. func RedactNetError(err error) error { // Example "net" package error messages: // // - lookup : no such host // - lookup : No address associated with hostname // - dial tcp
: connectex: No connection could be made because the target machine actively refused it // - write tcp
->
: write: connection refused if err == nil { return err } errstr := err.Error() index := strings.Index(errstr, ": ") if index == -1 { return err } return std_errors.New("[redacted]" + errstr[index:]) }