transforms.go 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. /*
  2. * Copyright (c) 2022, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. // Package transforms provides a mechanism to define and apply string data
  20. // transformations, with the transformations defined by regular expressions
  21. // to match data to be transformed, and regular expression generators to
  22. // specify additional or replacement data.
  23. package transforms
  24. import (
  25. "regexp"
  26. "regexp/syntax"
  27. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
  28. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
  29. regen "github.com/zach-klippenstein/goregen"
  30. )
  31. const (
  32. SCOPE_ANY = ""
  33. )
  34. // Spec is a transform spec. A spec is a list of individual transforms to be
  35. // applied in order. Each transform is defined by two elements: a regular
  36. // expression to by matched against the input; and a regular expression
  37. // generator which generates new data. Subgroups from the regular expression
  38. // may be specified in the regular expression generator, and are populated
  39. // with the subgroup match, and in this way parts of the original matching
  40. // data may be retained in the transformed data.
  41. //
  42. // For example, with the transform [2]string{"([a-b])", "\\$\\
  43. // {1\\}c"}, substrings consisting of the characters 'a' and 'b' will be
  44. // transformed into the same substring with a single character 'c' appended.
  45. type Spec [][2]string
  46. // Specs is a set of named Specs.
  47. type Specs map[string]Spec
  48. // Validate checks that all entries in a set of Specs is well-formed, with
  49. // valid regular expressions.
  50. func (specs Specs) Validate() error {
  51. seed, err := prng.NewSeed()
  52. if err != nil {
  53. return errors.Trace(err)
  54. }
  55. for _, spec := range specs {
  56. // Call Apply to compile/validate the regular expressions and generators.
  57. _, err := spec.ApplyString(seed, "")
  58. if err != nil {
  59. return errors.Trace(err)
  60. }
  61. }
  62. return nil
  63. }
  64. // ScopedSpecNames groups a list of Specs, referenced by their Spec name, with
  65. // the group defined by a scope. The meaning of scope depends on the context
  66. // in which the transforms are to be used.
  67. //
  68. // For example, in the context of DNS request transforms, the scope is the DNS
  69. // server for which a specific group of transforms is known to be effective.
  70. //
  71. // The scope name "" is SCOPE_ANY, and matches any input scope name when there
  72. // is no specific entry for that scope name in ScopedSpecNames.
  73. type ScopedSpecNames map[string][]string
  74. // Validate checks that the ScopedSpecNames is well-formed and referenced Spec
  75. // names are defined in the corresponding input specs.
  76. func (scopedSpecs ScopedSpecNames) Validate(specs Specs) error {
  77. for _, scoped := range scopedSpecs {
  78. for _, specName := range scoped {
  79. _, ok := specs[specName]
  80. if !ok {
  81. return errors.Tracef("undefined spec name: %s", specName)
  82. }
  83. }
  84. }
  85. return nil
  86. }
  87. // Select picks a Spec from Specs based on the input scope and scoping rules.
  88. // If the input scope name is defined in scopedSpecs, that match takes
  89. // precedence. Otherwise SCOPE_ANY is selected, when present.
  90. //
  91. // After the scope is resolved, Select randomly selects from the matching Spec
  92. // list.
  93. //
  94. // Select will return "", nil when no selection can be made.
  95. func (specs Specs) Select(scope string, scopedSpecs ScopedSpecNames) (string, Spec) {
  96. if scope != SCOPE_ANY {
  97. scoped, ok := scopedSpecs[scope]
  98. if ok {
  99. // If the specific scope is defined but empty, this means select
  100. // nothing -- don't fall through to SCOPE_ANY.
  101. if len(scoped) == 0 {
  102. return "", nil
  103. }
  104. specName := scoped[prng.Intn(len(scoped))]
  105. spec, ok := specs[specName]
  106. if !ok {
  107. // specName is not found in specs, which should not happen if
  108. // Validate passes; select nothing in this case.
  109. return "", nil
  110. }
  111. return specName, spec
  112. }
  113. // Fall through to SCOPE_ANY.
  114. }
  115. anyScope, ok := scopedSpecs[SCOPE_ANY]
  116. if !ok || len(anyScope) == 0 {
  117. // No SCOPE_ANY, or SCOPE_ANY is an empty list.
  118. return "", nil
  119. }
  120. specName := anyScope[prng.Intn(len(anyScope))]
  121. spec, ok := specs[specName]
  122. if !ok {
  123. return "", nil
  124. }
  125. return specName, spec
  126. }
  127. // ApplyString applies the Spec to the input string, producing the output string.
  128. //
  129. // The input seed is used for all random generation. The same seed can be
  130. // supplied to produce the same output, for replay.
  131. func (spec Spec) ApplyString(seed *prng.Seed, input string) (string, error) {
  132. value := input
  133. for _, transform := range spec {
  134. re, replacement, err := makeRegexAndRepl(seed, transform)
  135. if err != nil {
  136. return "", errors.Trace(err)
  137. }
  138. value = re.ReplaceAllString(value, replacement)
  139. }
  140. return value, nil
  141. }
  142. // Apply applies the Spec to the input bytes, producing the output bytes.
  143. //
  144. // The input seed is used for all random generation. The same seed can be
  145. // supplied to produce the same output, for replay.
  146. func (spec Spec) Apply(seed *prng.Seed, input []byte) ([]byte, error) {
  147. value := input
  148. for _, transform := range spec {
  149. re, replacement, err := makeRegexAndRepl(seed, transform)
  150. if err != nil {
  151. return nil, errors.Trace(err)
  152. }
  153. value = re.ReplaceAll(value, []byte(replacement))
  154. }
  155. return value, nil
  156. }
  157. // makeRegexAndRepl generates the regex and replacement for a given seed and
  158. // transform. The same seed can be supplied to produce the same output, for
  159. // replay.
  160. func makeRegexAndRepl(seed *prng.Seed, transform [2]string) (*regexp.Regexp, string, error) {
  161. // TODO: the compiled regexp and regen could be cached, but the seed is an
  162. // issue with caching the regen.
  163. args := &regen.GeneratorArgs{
  164. RngSource: prng.NewPRNGWithSeed(seed),
  165. Flags: syntax.OneLine | syntax.NonGreedy,
  166. }
  167. rg, err := regen.NewGenerator(transform[1], args)
  168. if err != nil {
  169. return nil, "", errors.Trace(err)
  170. }
  171. replacement := rg.Generate()
  172. re, err := regexp.Compile(transform[0])
  173. if err != nil {
  174. return nil, "", errors.Trace(err)
  175. }
  176. return re, replacement, nil
  177. }