psinet.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. /*
  2. * Copyright (c) 2016, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. // Package psinet implements psinet database services. The psinet database is a
  20. // JSON-format file containing information about the Psiphon network, including
  21. // sponsors, home pages, stats regexes, available upgrades, and other servers for
  22. // discovery. This package also implements the Psiphon discovery algorithm.
  23. package psinet
  24. import (
  25. "crypto/md5"
  26. "encoding/json"
  27. "math"
  28. "math/rand"
  29. "strconv"
  30. "strings"
  31. "time"
  32. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  33. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
  34. )
  35. const (
  36. MAX_DATABASE_AGE_FOR_SERVER_ENTRY_VALIDITY = 48 * time.Hour
  37. )
  38. // Database serves Psiphon API data requests. It's safe for
  39. // concurrent usage. The Reload function supports hot reloading
  40. // of Psiphon network data while the server is running.
  41. type Database struct {
  42. common.ReloadableFile
  43. Sponsors map[string]*Sponsor `json:"sponsors"`
  44. Versions map[string][]ClientVersion `json:"client_versions"`
  45. DefaultSponsorID string `json:"default_sponsor_id"`
  46. DefaultAlertActionURLs map[string][]string `json:"default_alert_action_urls"`
  47. ValidServerEntryTags map[string]bool `json:"valid_server_entry_tags"`
  48. DiscoveryServers []*DiscoveryServer `json:"discovery_servers"`
  49. fileModTime time.Time
  50. }
  51. type DiscoveryServer struct {
  52. DiscoveryDateRange []time.Time `json:"discovery_date_range"`
  53. EncodedServerEntry string `json:"encoded_server_entry"`
  54. }
  55. type Sponsor struct {
  56. ID string `json:"id"`
  57. HomePages map[string][]HomePage `json:"home_pages"`
  58. MobileHomePages map[string][]HomePage `json:"mobile_home_pages"`
  59. AlertActionURLs map[string][]string `json:"alert_action_urls"`
  60. HttpsRequestRegexes []HttpsRequestRegex `json:"https_request_regexes"`
  61. domainBytesChecksum []byte `json:"-"`
  62. }
  63. type ClientVersion struct {
  64. Version string `json:"version"`
  65. }
  66. type HomePage struct {
  67. Region string `json:"region"`
  68. URL string `json:"url"`
  69. }
  70. type HttpsRequestRegex struct {
  71. Regex string `json:"regex"`
  72. Replace string `json:"replace"`
  73. }
  74. // NewDatabase initializes a Database, calling Reload on the specified
  75. // filename.
  76. func NewDatabase(filename string) (*Database, error) {
  77. database := &Database{}
  78. database.ReloadableFile = common.NewReloadableFile(
  79. filename,
  80. true,
  81. func(fileContent []byte, fileModTime time.Time) error {
  82. var newDatabase *Database
  83. err := json.Unmarshal(fileContent, &newDatabase)
  84. if err != nil {
  85. return errors.Trace(err)
  86. }
  87. // Note: an unmarshal directly into &database would fail
  88. // to reset to zero value fields not present in the JSON.
  89. database.Sponsors = newDatabase.Sponsors
  90. database.Versions = newDatabase.Versions
  91. database.DefaultSponsorID = newDatabase.DefaultSponsorID
  92. database.DefaultAlertActionURLs = newDatabase.DefaultAlertActionURLs
  93. database.ValidServerEntryTags = newDatabase.ValidServerEntryTags
  94. database.DiscoveryServers = newDatabase.DiscoveryServers
  95. database.fileModTime = fileModTime
  96. for _, sponsor := range database.Sponsors {
  97. value, err := json.Marshal(sponsor.HttpsRequestRegexes)
  98. if err != nil {
  99. return errors.Trace(err)
  100. }
  101. // MD5 hash is used solely as a data checksum and not for any
  102. // security purpose.
  103. checksum := md5.Sum(value)
  104. sponsor.domainBytesChecksum = checksum[:]
  105. }
  106. return nil
  107. })
  108. _, err := database.Reload()
  109. if err != nil {
  110. return nil, errors.Trace(err)
  111. }
  112. return database, nil
  113. }
  114. // GetRandomizedHomepages returns a randomly ordered list of home pages
  115. // for the specified sponsor, region, and platform.
  116. func (db *Database) GetRandomizedHomepages(
  117. sponsorID, clientRegion, clientASN string, isMobilePlatform bool) []string {
  118. homepages := db.GetHomepages(sponsorID, clientRegion, clientASN, isMobilePlatform)
  119. if len(homepages) > 1 {
  120. shuffledHomepages := make([]string, len(homepages))
  121. perm := rand.Perm(len(homepages))
  122. for i, v := range perm {
  123. shuffledHomepages[v] = homepages[i]
  124. }
  125. return shuffledHomepages
  126. }
  127. return homepages
  128. }
  129. // GetHomepages returns a list of home pages for the specified sponsor,
  130. // region, and platform.
  131. func (db *Database) GetHomepages(
  132. sponsorID, clientRegion, clientASN string, isMobilePlatform bool) []string {
  133. db.ReloadableFile.RLock()
  134. defer db.ReloadableFile.RUnlock()
  135. sponsorHomePages := make([]string, 0)
  136. // Sponsor id does not exist: fail gracefully
  137. sponsor, ok := db.Sponsors[sponsorID]
  138. if !ok {
  139. sponsor, ok = db.Sponsors[db.DefaultSponsorID]
  140. if !ok {
  141. return sponsorHomePages
  142. }
  143. }
  144. if sponsor == nil {
  145. return sponsorHomePages
  146. }
  147. homePages := sponsor.HomePages
  148. if isMobilePlatform {
  149. if len(sponsor.MobileHomePages) > 0 {
  150. homePages = sponsor.MobileHomePages
  151. }
  152. }
  153. // Case: lookup succeeded and corresponding homepages found for region
  154. homePagesByRegion, ok := homePages[clientRegion]
  155. if ok {
  156. for _, homePage := range homePagesByRegion {
  157. sponsorHomePages = append(
  158. sponsorHomePages, homepageQueryParameterSubstitution(homePage.URL, clientRegion, clientASN))
  159. }
  160. }
  161. // Case: lookup failed or no corresponding homepages found for region --> use default
  162. if len(sponsorHomePages) == 0 {
  163. defaultHomePages, ok := homePages["None"]
  164. if ok {
  165. for _, homePage := range defaultHomePages {
  166. // client_region query parameter substitution
  167. sponsorHomePages = append(
  168. sponsorHomePages, homepageQueryParameterSubstitution(homePage.URL, clientRegion, clientASN))
  169. }
  170. }
  171. }
  172. return sponsorHomePages
  173. }
  174. func homepageQueryParameterSubstitution(
  175. url, clientRegion, clientASN string) string {
  176. return strings.Replace(
  177. strings.Replace(url, "client_region=XX", "client_region="+clientRegion, 1),
  178. "client_asn=XX", "client_asn="+clientASN, 1)
  179. }
  180. // GetAlertActionURLs returns a list of alert action URLs for the specified
  181. // alert reason and sponsor.
  182. func (db *Database) GetAlertActionURLs(
  183. alertReason, sponsorID, clientRegion, clientASN string) []string {
  184. db.ReloadableFile.RLock()
  185. defer db.ReloadableFile.RUnlock()
  186. // Prefer URLs from the Sponsor.AlertActionURLs. When there are no sponsor
  187. // URLs, then select from Database.DefaultAlertActionURLs.
  188. actionURLs := []string{}
  189. sponsor := db.Sponsors[sponsorID]
  190. if sponsor != nil {
  191. for _, URL := range sponsor.AlertActionURLs[alertReason] {
  192. actionURLs = append(
  193. actionURLs, homepageQueryParameterSubstitution(URL, clientRegion, clientASN))
  194. }
  195. }
  196. if len(actionURLs) == 0 {
  197. for _, URL := range db.DefaultAlertActionURLs[alertReason] {
  198. actionURLs = append(
  199. actionURLs, homepageQueryParameterSubstitution(URL, clientRegion, clientASN))
  200. }
  201. }
  202. return actionURLs
  203. }
  204. // GetUpgradeClientVersion returns a new client version when an upgrade is
  205. // indicated for the specified client current version. The result is "" when
  206. // no upgrade is available. Caller should normalize clientPlatform.
  207. func (db *Database) GetUpgradeClientVersion(clientVersion, clientPlatform string) string {
  208. db.ReloadableFile.RLock()
  209. defer db.ReloadableFile.RUnlock()
  210. // Check lastest version number against client version number
  211. clientVersions, ok := db.Versions[clientPlatform]
  212. if !ok {
  213. return ""
  214. }
  215. if len(clientVersions) == 0 {
  216. return ""
  217. }
  218. // NOTE: Assumes versions list is in ascending version order
  219. lastVersion := clientVersions[len(clientVersions)-1].Version
  220. lastVersionInt, err := strconv.Atoi(lastVersion)
  221. if err != nil {
  222. return ""
  223. }
  224. clientVersionInt, err := strconv.Atoi(clientVersion)
  225. if err != nil {
  226. return ""
  227. }
  228. // Return latest version if upgrade needed
  229. if lastVersionInt > clientVersionInt {
  230. return lastVersion
  231. }
  232. return ""
  233. }
  234. // GetHttpsRequestRegexes returns bytes transferred stats regexes and the
  235. // associated checksum for the specified sponsor. The checksum may be nil.
  236. func (db *Database) GetHttpsRequestRegexes(sponsorID string) ([]map[string]string, []byte) {
  237. db.ReloadableFile.RLock()
  238. defer db.ReloadableFile.RUnlock()
  239. regexes := make([]map[string]string, 0)
  240. sponsor, ok := db.Sponsors[sponsorID]
  241. if !ok {
  242. sponsor = db.Sponsors[db.DefaultSponsorID]
  243. }
  244. if sponsor == nil {
  245. return regexes, nil
  246. }
  247. // If neither sponsorID or DefaultSponsorID were found, sponsor will be the
  248. // zero value of the map, an empty Sponsor struct.
  249. for _, sponsorRegex := range sponsor.HttpsRequestRegexes {
  250. regex := make(map[string]string)
  251. regex["replace"] = sponsorRegex.Replace
  252. regex["regex"] = sponsorRegex.Regex
  253. regexes = append(regexes, regex)
  254. }
  255. return regexes, sponsor.domainBytesChecksum
  256. }
  257. // GetDomainBytesChecksum returns the bytes transferred stats regexes
  258. // checksum for the specified sponsor. The checksum may be nil.
  259. func (db *Database) GetDomainBytesChecksum(sponsorID string) []byte {
  260. db.ReloadableFile.RLock()
  261. defer db.ReloadableFile.RUnlock()
  262. sponsor, ok := db.Sponsors[sponsorID]
  263. if !ok {
  264. sponsor = db.Sponsors[db.DefaultSponsorID]
  265. }
  266. if sponsor == nil {
  267. return nil
  268. }
  269. return sponsor.domainBytesChecksum
  270. }
  271. // DiscoverServers selects new encoded server entries to be "discovered" by
  272. // the client, using the discoveryValue -- a function of the client's IP
  273. // address -- as the input into the discovery algorithm.
  274. func (db *Database) DiscoverServers(discoveryValue int) []string {
  275. db.ReloadableFile.RLock()
  276. defer db.ReloadableFile.RUnlock()
  277. var servers []*DiscoveryServer
  278. discoveryDate := time.Now().UTC()
  279. candidateServers := make([]*DiscoveryServer, 0)
  280. for _, server := range db.DiscoveryServers {
  281. // All servers that are discoverable on this day are eligible for discovery
  282. if len(server.DiscoveryDateRange) == 2 &&
  283. discoveryDate.After(server.DiscoveryDateRange[0]) &&
  284. discoveryDate.Before(server.DiscoveryDateRange[1]) {
  285. candidateServers = append(candidateServers, server)
  286. }
  287. }
  288. timeInSeconds := int(discoveryDate.Unix())
  289. servers = selectServers(candidateServers, timeInSeconds, discoveryValue)
  290. encodedServerEntries := make([]string, 0)
  291. for _, server := range servers {
  292. encodedServerEntries = append(encodedServerEntries, server.EncodedServerEntry)
  293. }
  294. return encodedServerEntries
  295. }
  296. // Combine client IP address and time-of-day strategies to give out different
  297. // discovery servers to different clients. The aim is to achieve defense against
  298. // enumerability. We also want to achieve a degree of load balancing clients
  299. // and these strategies are expected to have reasonably random distribution,
  300. // even for a cluster of users coming from the same network.
  301. //
  302. // We only select one server: multiple results makes enumeration easier; the
  303. // strategies have a built-in load balancing effect; and date range discoverability
  304. // means a client will actually learn more servers later even if they happen to
  305. // always pick the same result at this point.
  306. //
  307. // This is a blended strategy: as long as there are enough servers to pick from,
  308. // both aspects determine which server is selected. IP address is given the
  309. // priority: if there are only a couple of servers, for example, IP address alone
  310. // determines the outcome.
  311. func selectServers(
  312. servers []*DiscoveryServer, timeInSeconds, discoveryValue int) []*DiscoveryServer {
  313. TIME_GRANULARITY := 3600
  314. if len(servers) == 0 {
  315. return nil
  316. }
  317. // Time truncated to an hour
  318. timeStrategyValue := timeInSeconds / TIME_GRANULARITY
  319. // Divide servers into buckets. The bucket count is chosen such that the number
  320. // of buckets and the number of items in each bucket are close (using sqrt).
  321. // IP address selects the bucket, time selects the item in the bucket.
  322. // NOTE: this code assumes that the range of possible timeStrategyValues
  323. // and discoveryValues are sufficient to index to all bucket items.
  324. bucketCount := calculateBucketCount(len(servers))
  325. buckets := bucketizeServerList(servers, bucketCount)
  326. if len(buckets) == 0 {
  327. return nil
  328. }
  329. bucket := buckets[discoveryValue%len(buckets)]
  330. if len(bucket) == 0 {
  331. return nil
  332. }
  333. server := bucket[timeStrategyValue%len(bucket)]
  334. serverList := make([]*DiscoveryServer, 1)
  335. serverList[0] = server
  336. return serverList
  337. }
  338. // Number of buckets such that first strategy picks among about the same number
  339. // of choices as the second strategy. Gives an edge to the "outer" strategy.
  340. func calculateBucketCount(length int) int {
  341. return int(math.Ceil(math.Sqrt(float64(length))))
  342. }
  343. // bucketizeServerList creates nearly equal sized slices of the input list.
  344. func bucketizeServerList(servers []*DiscoveryServer, bucketCount int) [][]*DiscoveryServer {
  345. // This code creates the same partitions as legacy servers:
  346. // https://github.com/Psiphon-Inc/psiphon-automation/blob/685f91a85bcdb33a75a200d936eadcb0686eadd7/Automation/psi_ops_discovery.py
  347. //
  348. // Both use the same algorithm from:
  349. // http://stackoverflow.com/questions/2659900/python-slicing-a-list-into-n-nearly-equal-length-partitions
  350. // TODO: this partition is constant for fixed Database content, so it could
  351. // be done once and cached in the Database ReloadableFile reloadAction.
  352. buckets := make([][]*DiscoveryServer, bucketCount)
  353. division := float64(len(servers)) / float64(bucketCount)
  354. for i := 0; i < bucketCount; i++ {
  355. start := int((division * float64(i)) + 0.5)
  356. end := int((division * (float64(i) + 1)) + 0.5)
  357. buckets[i] = servers[start:end]
  358. }
  359. return buckets
  360. }
  361. // IsValidServerEntryTag checks if the specified server entry tag is valid.
  362. func (db *Database) IsValidServerEntryTag(serverEntryTag string) bool {
  363. db.ReloadableFile.RLock()
  364. defer db.ReloadableFile.RUnlock()
  365. // Default to "valid" if the valid list is unexpectedly empty or stale. This
  366. // helps prevent premature client-side server-entry pruning when there is an
  367. // issue with updating the database.
  368. if len(db.ValidServerEntryTags) == 0 ||
  369. db.fileModTime.Add(MAX_DATABASE_AGE_FOR_SERVER_ENTRY_VALIDITY).Before(time.Now()) {
  370. return true
  371. }
  372. // The tag must be in the map and have the value "true".
  373. return db.ValidServerEntryTags[serverEntryTag]
  374. }