| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261 |
- /*
- * Copyright (c) 2022, Psiphon Inc.
- * All rights reserved.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- */
- // Package transforms provides a mechanism to define and apply string data
- // transformations, with the transformations defined by regular expressions
- // to match data to be transformed, and regular expression generators to
- // specify additional or replacement data.
- package transforms
- import (
- "regexp"
- "regexp/syntax"
- "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
- "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
- "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/regen"
- )
- const (
- SCOPE_ANY = ""
- )
- // Spec is a transform spec. A spec is a list of individual transforms to be
- // applied in order. Each transform is defined by two elements: a regular
- // expression to by matched against the input; and a regular expression
- // generator which generates new data. Subgroups from the regular expression
- // may be specified in the regular expression generator, and are populated
- // with the subgroup match, and in this way parts of the original matching
- // data may be retained in the transformed data.
- //
- // For example, with the transform [2]string{"([a-b])", "\\$\\
- // {1\\}c"}, substrings consisting of the characters 'a' and 'b' will be
- // transformed into the same substring with a single character 'c' appended.
- type Spec [][2]string
- // Specs is a set of named Specs.
- type Specs map[string]Spec
- // Validate checks that all entries in a set of Specs is well-formed, with
- // valid regular expressions.
- func (specs Specs) Validate(prefixMode bool) error {
- seed, err := prng.NewSeed()
- if err != nil {
- return errors.Trace(err)
- }
- for _, spec := range specs {
- // Call Apply to compile/validate the regular expressions and generators.
- if prefixMode {
- if len(spec) != 1 || len(spec[0]) != 2 {
- return errors.TraceNew("prefix mode requires exactly one transform")
- }
- _, _, err := spec.ApplyPrefix(seed, 0)
- if err != nil {
- return errors.Trace(err)
- }
- } else {
- _, err := spec.ApplyString(seed, "")
- if err != nil {
- return errors.Trace(err)
- }
- }
- }
- return nil
- }
- // ScopedSpecNames groups a list of Specs, referenced by their Spec name, with
- // the group defined by a scope. The meaning of scope depends on the context
- // in which the transforms are to be used.
- //
- // For example, in the context of DNS request transforms, the scope is the DNS
- // server for which a specific group of transforms is known to be effective.
- //
- // The scope name "" is SCOPE_ANY, and matches any input scope name when there
- // is no specific entry for that scope name in ScopedSpecNames.
- type ScopedSpecNames map[string][]string
- // Validate checks that the ScopedSpecNames is well-formed and referenced Spec
- // names are defined in the corresponding input specs.
- func (scopedSpecs ScopedSpecNames) Validate(specs Specs) error {
- for _, scoped := range scopedSpecs {
- for _, specName := range scoped {
- _, ok := specs[specName]
- if !ok {
- return errors.Tracef("undefined spec name: %s", specName)
- }
- }
- }
- return nil
- }
- // Select picks a Spec from Specs based on the input scope and scoping rules.
- // If the input scope name is defined in scopedSpecs, that match takes
- // precedence. Otherwise SCOPE_ANY is selected, when present.
- //
- // After the scope is resolved, Select randomly selects from the matching Spec
- // list.
- //
- // Select will return "", nil when no selection can be made.
- func (specs Specs) Select(scope string, scopedSpecs ScopedSpecNames) (string, Spec) {
- if scope != SCOPE_ANY {
- scoped, ok := scopedSpecs[scope]
- if ok {
- // If the specific scope is defined but empty, this means select
- // nothing -- don't fall through to SCOPE_ANY.
- if len(scoped) == 0 {
- return "", nil
- }
- specName := scoped[prng.Intn(len(scoped))]
- spec, ok := specs[specName]
- if !ok {
- // specName is not found in specs, which should not happen if
- // Validate passes; select nothing in this case.
- return "", nil
- }
- return specName, spec
- }
- // Fall through to SCOPE_ANY.
- }
- anyScope, ok := scopedSpecs[SCOPE_ANY]
- if !ok || len(anyScope) == 0 {
- // No SCOPE_ANY, or SCOPE_ANY is an empty list.
- return "", nil
- }
- specName := anyScope[prng.Intn(len(anyScope))]
- spec, ok := specs[specName]
- if !ok {
- return "", nil
- }
- return specName, spec
- }
- // ApplyPrefix unlike other Apply methods, does not apply the Spec to an input.
- // It instead generates a sequence of bytes according to the Spec, and returns
- // at least minLength bytes if the Spec generates fewer than minLength bytes.
- //
- // The input seed is used for all random number generation. The same seed can be
- // supplied to produce the same output, for replay.
- func (spec Spec) ApplyPrefix(seed *prng.Seed, minLength int) ([]byte, int, error) {
- if len(spec) != 1 || len(spec[0]) != 2 {
- return nil, 0, errors.TraceNew("prefix mode requires exactly one transform")
- }
- rng := prng.NewPRNGWithSeed(seed)
- args := ®en.GeneratorArgs{
- RngSource: rng,
- ByteMode: true,
- }
- gen, err := regen.NewGenerator(spec[0][1], args)
- if err != nil {
- return nil, 0, errors.Trace(err)
- }
- prefix, err := gen.Generate()
- if err != nil {
- return nil, 0, errors.Trace(err)
- }
- prefixLen := len(prefix)
- if len(prefix) < minLength {
- // Add random padding to fill up to minLength.
- padding := rng.Bytes(minLength - len(prefix))
- prefix = append(prefix, padding...)
- }
- return prefix, prefixLen, nil
- }
- // ApplyString applies the Spec to the input string, producing the output string.
- //
- // The input seed is used for all random generation. The same seed can be
- // supplied to produce the same output, for replay.
- func (spec Spec) ApplyString(seed *prng.Seed, input string) (string, error) {
- value := input
- for _, transform := range spec {
- re, replacement, err := makeRegexAndRepl(seed, transform)
- if err != nil {
- return "", errors.Trace(err)
- }
- value = re.ReplaceAllString(value, string(replacement))
- }
- return value, nil
- }
- // Apply applies the Spec to the input bytes, producing the output bytes.
- //
- // The input seed is used for all random generation. The same seed can be
- // supplied to produce the same output, for replay.
- func (spec Spec) Apply(seed *prng.Seed, input []byte) ([]byte, error) {
- value := input
- for _, transform := range spec {
- re, replacement, err := makeRegexAndRepl(seed, transform)
- if err != nil {
- return nil, errors.Trace(err)
- }
- value = re.ReplaceAll(value, replacement)
- }
- return value, nil
- }
- // makeRegexAndRepl generates the regex and replacement for a given seed and
- // transform. The same seed can be supplied to produce the same output, for
- // replay.
- func makeRegexAndRepl(seed *prng.Seed, transform [2]string) (*regexp.Regexp, []byte, error) {
- // TODO: the compiled regexp and regen could be cached, but the seed is an
- // issue with caching the regen.
- args := ®en.GeneratorArgs{
- RngSource: prng.NewPRNGWithSeed(seed),
- Flags: syntax.OneLine | syntax.NonGreedy,
- }
- rg, err := regen.NewGenerator(transform[1], args)
- if err != nil {
- return nil, nil, errors.Trace(err)
- }
- replacement, err := rg.Generate()
- if err != nil {
- return nil, nil, errors.Trace(err)
- }
- re, err := regexp.Compile(transform[0])
- if err != nil {
- return nil, nil, errors.Trace(err)
- }
- return re, replacement, nil
- }
|