psinet.go 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577
  1. /*
  2. * Copyright (c) 2016, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. // Package psinet implements psinet database services. The psinet database is a
  20. // JSON-format file containing information about the Psiphon network, including
  21. // sponsors, home pages, stats regexes, available upgrades, and other servers for
  22. // discovery. This package also implements the Psiphon discovery algorithm.
  23. package psinet
  24. import (
  25. "encoding/hex"
  26. "encoding/json"
  27. "fmt"
  28. "math"
  29. "math/rand"
  30. "strconv"
  31. "strings"
  32. "time"
  33. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  34. )
  35. const (
  36. MAX_DATABASE_AGE_FOR_SERVER_ENTRY_VALIDITY = 48 * time.Hour
  37. )
  38. // Database serves Psiphon API data requests. It's safe for
  39. // concurrent usage. The Reload function supports hot reloading
  40. // of Psiphon network data while the server is running.
  41. type Database struct {
  42. common.ReloadableFile
  43. Hosts map[string]Host `json:"hosts"`
  44. Servers []Server `json:"servers"`
  45. Sponsors map[string]Sponsor `json:"sponsors"`
  46. Versions map[string][]ClientVersion `json:"client_versions"`
  47. DefaultSponsorID string `json:"default_sponsor_id"`
  48. ValidServerEntryTags map[string]bool `json:"valid_server_entry_tags"`
  49. fileModTime time.Time
  50. }
  51. type Host struct {
  52. DatacenterName string `json:"datacenter_name"`
  53. Id string `json:"id"`
  54. IpAddress string `json:"ip_address"`
  55. IsTCS bool `json:"is_TCS"`
  56. MeekCookieEncryptionPublicKey string `json:"meek_cookie_encryption_public_key"`
  57. MeekServerObfuscatedKey string `json:"meek_server_obfuscated_key"`
  58. MeekServerPort int `json:"meek_server_port"`
  59. TacticsRequestPublicKey string `json:"tactics_request_public_key"`
  60. TacticsRequestObfuscatedKey string `json:"tactics_request_obfuscated_key"`
  61. Region string `json:"region"`
  62. }
  63. type Server struct {
  64. AlternateSshObfuscatedPorts []string `json:"alternate_ssh_obfuscated_ports"`
  65. Capabilities map[string]bool `json:"capabilities"`
  66. DiscoveryDateRange []string `json:"discovery_date_range"`
  67. EgressIpAddress string `json:"egress_ip_address"`
  68. HostId string `json:"host_id"`
  69. Id string `json:"id"`
  70. InternalIpAddress string `json:"internal_ip_address"`
  71. IpAddress string `json:"ip_address"`
  72. IsEmbedded bool `json:"is_embedded"`
  73. IsPermanent bool `json:"is_permanent"`
  74. PropogationChannelId string `json:"propagation_channel_id"`
  75. SshHostKey string `json:"ssh_host_key"`
  76. SshObfuscatedKey string `json:"ssh_obfuscated_key"`
  77. SshObfuscatedPort int `json:"ssh_obfuscated_port"`
  78. SshObfuscatedQUICPort int `json:"ssh_obfuscated_quic_port"`
  79. SshObfuscatedTapdancePort int `json:"ssh_obfuscated_tapdance_port"`
  80. SshPassword string `json:"ssh_password"`
  81. SshPort string `json:"ssh_port"`
  82. SshUsername string `json:"ssh_username"`
  83. WebServerCertificate string `json:"web_server_certificate"`
  84. WebServerPort string `json:"web_server_port"`
  85. WebServerSecret string `json:"web_server_secret"`
  86. ConfigurationVersion int `json:"configuration_version"`
  87. }
  88. type Sponsor struct {
  89. Banner string
  90. HomePages map[string][]HomePage `json:"home_pages"`
  91. HttpsRequestRegexes []HttpsRequestRegex `json:"https_request_regexes"`
  92. Id string `json:"id"`
  93. MobileHomePages map[string][]HomePage `json:"mobile_home_pages"`
  94. Name string `json:"name"`
  95. PageViewRegexes []PageViewRegex `json:"page_view_regexes"`
  96. WebsiteBanner string `json:"website_banner"`
  97. WebsiteBannerLink string `json:"website_banner_link"`
  98. }
  99. type ClientVersion struct {
  100. Version string `json:"version"`
  101. }
  102. type HomePage struct {
  103. Region string `json:"region"`
  104. Url string `json:"url"`
  105. }
  106. type HttpsRequestRegex struct {
  107. Regex string `json:"regex"`
  108. Replace string `json:"replace"`
  109. }
  110. type MobileHomePage struct {
  111. Region string `json:"region"`
  112. Url string `json:"url"`
  113. }
  114. type PageViewRegex struct {
  115. Regex string `json:"regex"`
  116. Replace string `json:"replace"`
  117. }
  118. // NewDatabase initializes a Database, calling Reload on the specified
  119. // filename.
  120. func NewDatabase(filename string) (*Database, error) {
  121. database := &Database{}
  122. database.ReloadableFile = common.NewReloadableFile(
  123. filename,
  124. true,
  125. func(fileContent []byte, fileModTime time.Time) error {
  126. var newDatabase Database
  127. err := json.Unmarshal(fileContent, &newDatabase)
  128. if err != nil {
  129. return common.ContextError(err)
  130. }
  131. // Note: an unmarshal directly into &database would fail
  132. // to reset to zero value fields not present in the JSON.
  133. database.Hosts = newDatabase.Hosts
  134. database.Servers = newDatabase.Servers
  135. database.Sponsors = newDatabase.Sponsors
  136. database.Versions = newDatabase.Versions
  137. database.DefaultSponsorID = newDatabase.DefaultSponsorID
  138. database.ValidServerEntryTags = newDatabase.ValidServerEntryTags
  139. database.fileModTime = fileModTime
  140. return nil
  141. })
  142. _, err := database.Reload()
  143. if err != nil {
  144. return nil, common.ContextError(err)
  145. }
  146. return database, nil
  147. }
  148. // GetRandomizedHomepages returns a randomly ordered list of home pages
  149. // for the specified sponsor, region, and platform.
  150. func (db *Database) GetRandomizedHomepages(sponsorID, clientRegion string, isMobilePlatform bool) []string {
  151. homepages := db.GetHomepages(sponsorID, clientRegion, isMobilePlatform)
  152. if len(homepages) > 1 {
  153. shuffledHomepages := make([]string, len(homepages))
  154. perm := rand.Perm(len(homepages))
  155. for i, v := range perm {
  156. shuffledHomepages[v] = homepages[i]
  157. }
  158. return shuffledHomepages
  159. }
  160. return homepages
  161. }
  162. // GetHomepages returns a list of home pages for the specified sponsor,
  163. // region, and platform.
  164. func (db *Database) GetHomepages(sponsorID, clientRegion string, isMobilePlatform bool) []string {
  165. db.ReloadableFile.RLock()
  166. defer db.ReloadableFile.RUnlock()
  167. sponsorHomePages := make([]string, 0)
  168. // Sponsor id does not exist: fail gracefully
  169. sponsor, ok := db.Sponsors[sponsorID]
  170. if !ok {
  171. sponsor, ok = db.Sponsors[db.DefaultSponsorID]
  172. if !ok {
  173. return sponsorHomePages
  174. }
  175. }
  176. homePages := sponsor.HomePages
  177. if isMobilePlatform {
  178. if len(sponsor.MobileHomePages) > 0 {
  179. homePages = sponsor.MobileHomePages
  180. }
  181. }
  182. // Case: lookup succeeded and corresponding homepages found for region
  183. homePagesByRegion, ok := homePages[clientRegion]
  184. if ok {
  185. for _, homePage := range homePagesByRegion {
  186. sponsorHomePages = append(sponsorHomePages, strings.Replace(homePage.Url, "client_region=XX", "client_region="+clientRegion, 1))
  187. }
  188. }
  189. // Case: lookup failed or no corresponding homepages found for region --> use default
  190. if len(sponsorHomePages) == 0 {
  191. defaultHomePages, ok := homePages["None"]
  192. if ok {
  193. for _, homePage := range defaultHomePages {
  194. // client_region query parameter substitution
  195. sponsorHomePages = append(sponsorHomePages, strings.Replace(homePage.Url, "client_region=XX", "client_region="+clientRegion, 1))
  196. }
  197. }
  198. }
  199. return sponsorHomePages
  200. }
  201. // GetUpgradeClientVersion returns a new client version when an upgrade is
  202. // indicated for the specified client current version. The result is "" when
  203. // no upgrade is available. Caller should normalize clientPlatform.
  204. func (db *Database) GetUpgradeClientVersion(clientVersion, clientPlatform string) string {
  205. db.ReloadableFile.RLock()
  206. defer db.ReloadableFile.RUnlock()
  207. // Check lastest version number against client version number
  208. clientVersions, ok := db.Versions[clientPlatform]
  209. if !ok {
  210. return ""
  211. }
  212. if len(clientVersions) == 0 {
  213. return ""
  214. }
  215. // NOTE: Assumes versions list is in ascending version order
  216. lastVersion := clientVersions[len(clientVersions)-1].Version
  217. lastVersionInt, err := strconv.Atoi(lastVersion)
  218. if err != nil {
  219. return ""
  220. }
  221. clientVersionInt, err := strconv.Atoi(clientVersion)
  222. if err != nil {
  223. return ""
  224. }
  225. // Return latest version if upgrade needed
  226. if lastVersionInt > clientVersionInt {
  227. return lastVersion
  228. }
  229. return ""
  230. }
  231. // GetHttpsRequestRegexes returns bytes transferred stats regexes for the
  232. // specified sponsor.
  233. func (db *Database) GetHttpsRequestRegexes(sponsorID string) []map[string]string {
  234. db.ReloadableFile.RLock()
  235. defer db.ReloadableFile.RUnlock()
  236. regexes := make([]map[string]string, 0)
  237. sponsor, ok := db.Sponsors[sponsorID]
  238. if !ok {
  239. sponsor, _ = db.Sponsors[db.DefaultSponsorID]
  240. }
  241. // If neither sponsorID or DefaultSponsorID were found, sponsor will be the
  242. // zero value of the map, an empty Sponsor struct.
  243. for _, sponsorRegex := range sponsor.HttpsRequestRegexes {
  244. regex := make(map[string]string)
  245. regex["replace"] = sponsorRegex.Replace
  246. regex["regex"] = sponsorRegex.Regex
  247. regexes = append(regexes, regex)
  248. }
  249. return regexes
  250. }
  251. // DiscoverServers selects new encoded server entries to be "discovered" by
  252. // the client, using the discoveryValue -- a function of the client's IP
  253. // address -- as the input into the discovery algorithm.
  254. // The server list (db.Servers) loaded from JSON is stored as an array instead of
  255. // a map to ensure servers are discovered deterministically. Each iteration over a
  256. // map in go is seeded with a random value which causes non-deterministic ordering.
  257. func (db *Database) DiscoverServers(discoveryValue int) []string {
  258. db.ReloadableFile.RLock()
  259. defer db.ReloadableFile.RUnlock()
  260. var servers []Server
  261. discoveryDate := time.Now().UTC()
  262. candidateServers := make([]Server, 0)
  263. for _, server := range db.Servers {
  264. var start time.Time
  265. var end time.Time
  266. var err error
  267. // All servers that are discoverable on this day are eligible for discovery
  268. if len(server.DiscoveryDateRange) != 0 {
  269. start, err = time.Parse("2006-01-02T15:04:05", server.DiscoveryDateRange[0])
  270. if err != nil {
  271. continue
  272. }
  273. end, err = time.Parse("2006-01-02T15:04:05", server.DiscoveryDateRange[1])
  274. if err != nil {
  275. continue
  276. }
  277. if discoveryDate.After(start) && discoveryDate.Before(end) {
  278. candidateServers = append(candidateServers, server)
  279. }
  280. }
  281. }
  282. timeInSeconds := int(discoveryDate.Unix())
  283. servers = selectServers(candidateServers, timeInSeconds, discoveryValue)
  284. encodedServerEntries := make([]string, 0)
  285. for _, server := range servers {
  286. encodedServerEntries = append(encodedServerEntries, db.getEncodedServerEntry(server))
  287. }
  288. return encodedServerEntries
  289. }
  290. // Combine client IP address and time-of-day strategies to give out different
  291. // discovery servers to different clients. The aim is to achieve defense against
  292. // enumerability. We also want to achieve a degree of load balancing clients
  293. // and these strategies are expected to have reasonably random distribution,
  294. // even for a cluster of users coming from the same network.
  295. //
  296. // We only select one server: multiple results makes enumeration easier; the
  297. // strategies have a built-in load balancing effect; and date range discoverability
  298. // means a client will actually learn more servers later even if they happen to
  299. // always pick the same result at this point.
  300. //
  301. // This is a blended strategy: as long as there are enough servers to pick from,
  302. // both aspects determine which server is selected. IP address is given the
  303. // priority: if there are only a couple of servers, for example, IP address alone
  304. // determines the outcome.
  305. func selectServers(servers []Server, timeInSeconds, discoveryValue int) []Server {
  306. TIME_GRANULARITY := 3600
  307. if len(servers) == 0 {
  308. return nil
  309. }
  310. // Time truncated to an hour
  311. timeStrategyValue := timeInSeconds / TIME_GRANULARITY
  312. // Divide servers into buckets. The bucket count is chosen such that the number
  313. // of buckets and the number of items in each bucket are close (using sqrt).
  314. // IP address selects the bucket, time selects the item in the bucket.
  315. // NOTE: this code assumes that the range of possible timeStrategyValues
  316. // and discoveryValues are sufficient to index to all bucket items.
  317. bucketCount := calculateBucketCount(len(servers))
  318. buckets := bucketizeServerList(servers, bucketCount)
  319. if len(buckets) == 0 {
  320. return nil
  321. }
  322. bucket := buckets[discoveryValue%len(buckets)]
  323. if len(bucket) == 0 {
  324. return nil
  325. }
  326. server := bucket[timeStrategyValue%len(bucket)]
  327. serverList := make([]Server, 1)
  328. serverList[0] = server
  329. return serverList
  330. }
  331. // Number of buckets such that first strategy picks among about the same number
  332. // of choices as the second strategy. Gives an edge to the "outer" strategy.
  333. func calculateBucketCount(length int) int {
  334. return int(math.Ceil(math.Sqrt(float64(length))))
  335. }
  336. // bucketizeServerList creates nearly equal sized slices of the input list.
  337. func bucketizeServerList(servers []Server, bucketCount int) [][]Server {
  338. // This code creates the same partitions as legacy servers:
  339. // https://bitbucket.org/psiphon/psiphon-circumvention-system/src/03bc1a7e51e7c85a816e370bb3a6c755fd9c6fee/Automation/psi_ops_discovery.py
  340. //
  341. // Both use the same algorithm from:
  342. // http://stackoverflow.com/questions/2659900/python-slicing-a-list-into-n-nearly-equal-length-partitions
  343. // TODO: this partition is constant for fixed Database content, so it could
  344. // be done once and cached in the Database ReloadableFile reloadAction.
  345. buckets := make([][]Server, bucketCount)
  346. division := float64(len(servers)) / float64(bucketCount)
  347. for i := 0; i < bucketCount; i++ {
  348. start := int((division * float64(i)) + 0.5)
  349. end := int((division * (float64(i) + 1)) + 0.5)
  350. buckets[i] = servers[start:end]
  351. }
  352. return buckets
  353. }
  354. // Return hex encoded server entry string for comsumption by client.
  355. // Newer clients ignore the legacy fields and only utilize the extended (new) config.
  356. func (db *Database) getEncodedServerEntry(server Server) string {
  357. host, hostExists := db.Hosts[server.HostId]
  358. if !hostExists {
  359. return ""
  360. }
  361. // TCS web server certificate has PEM headers and newlines, so strip those now
  362. // for legacy format compatibility
  363. webServerCertificate := server.WebServerCertificate
  364. if host.IsTCS {
  365. splitCert := strings.Split(server.WebServerCertificate, "\n")
  366. if len(splitCert) <= 2 {
  367. webServerCertificate = ""
  368. } else {
  369. webServerCertificate = strings.Join(splitCert[1:len(splitCert)-2], "")
  370. }
  371. }
  372. // Double-check that we're not giving our blank server credentials
  373. if len(server.IpAddress) <= 1 || len(server.WebServerPort) <= 1 || len(server.WebServerSecret) <= 1 || len(webServerCertificate) <= 1 {
  374. return ""
  375. }
  376. // Extended (new) entry fields are in a JSON string
  377. var extendedConfig struct {
  378. IpAddress string `json:"ipAddress"`
  379. WebServerPort string `json:"webServerPort"` // not an int
  380. WebServerSecret string `json:"webServerSecret"`
  381. WebServerCertificate string `json:"webServerCertificate"`
  382. SshPort int `json:"sshPort"`
  383. SshUsername string `json:"sshUsername"`
  384. SshPassword string `json:"sshPassword"`
  385. SshHostKey string `json:"sshHostKey"`
  386. SshObfuscatedPort int `json:"sshObfuscatedPort"`
  387. SshObfuscatedQUICPort int `json:"sshObfuscatedQUICPort"`
  388. SshObfuscatedTapdancePort int `json:"sshObfuscatedTapdancePort"`
  389. SshObfuscatedKey string `json:"sshObfuscatedKey"`
  390. Capabilities []string `json:"capabilities"`
  391. Region string `json:"region"`
  392. MeekServerPort int `json:"meekServerPort"`
  393. MeekCookieEncryptionPublicKey string `json:"meekCookieEncryptionPublicKey"`
  394. MeekObfuscatedKey string `json:"meekObfuscatedKey"`
  395. TacticsRequestPublicKey string `json:"tacticsRequestPublicKey"`
  396. TacticsRequestObfuscatedKey string `json:"tacticsRequestObfuscatedKey"`
  397. ConfigurationVersion int `json:"configurationVersion"`
  398. }
  399. // NOTE: also putting original values in extended config for easier parsing by new clients
  400. extendedConfig.IpAddress = server.IpAddress
  401. extendedConfig.WebServerPort = server.WebServerPort
  402. extendedConfig.WebServerSecret = server.WebServerSecret
  403. extendedConfig.WebServerCertificate = webServerCertificate
  404. sshPort, err := strconv.Atoi(server.SshPort)
  405. if err != nil {
  406. extendedConfig.SshPort = 0
  407. } else {
  408. extendedConfig.SshPort = sshPort
  409. }
  410. extendedConfig.SshUsername = server.SshUsername
  411. extendedConfig.SshPassword = server.SshPassword
  412. sshHostKeyType, sshHostKey := parseSshKeyString(server.SshHostKey)
  413. if strings.Compare(sshHostKeyType, "ssh-rsa") == 0 {
  414. extendedConfig.SshHostKey = sshHostKey
  415. } else {
  416. extendedConfig.SshHostKey = ""
  417. }
  418. extendedConfig.SshObfuscatedPort = server.SshObfuscatedPort
  419. // Use the latest alternate port unless tunneling through meek
  420. if len(server.AlternateSshObfuscatedPorts) > 0 && !server.Capabilities["UNFRONTED-MEEK"] {
  421. port, err := strconv.Atoi(server.AlternateSshObfuscatedPorts[len(server.AlternateSshObfuscatedPorts)-1])
  422. if err == nil {
  423. extendedConfig.SshObfuscatedPort = port
  424. }
  425. }
  426. extendedConfig.SshObfuscatedQUICPort = server.SshObfuscatedQUICPort
  427. extendedConfig.SshObfuscatedTapdancePort = server.SshObfuscatedTapdancePort
  428. extendedConfig.SshObfuscatedKey = server.SshObfuscatedKey
  429. extendedConfig.Region = host.Region
  430. extendedConfig.MeekCookieEncryptionPublicKey = host.MeekCookieEncryptionPublicKey
  431. extendedConfig.MeekServerPort = host.MeekServerPort
  432. extendedConfig.MeekObfuscatedKey = host.MeekServerObfuscatedKey
  433. extendedConfig.TacticsRequestPublicKey = host.TacticsRequestPublicKey
  434. extendedConfig.TacticsRequestObfuscatedKey = host.TacticsRequestObfuscatedKey
  435. serverCapabilities := make(map[string]bool, 0)
  436. for capability, enabled := range server.Capabilities {
  437. serverCapabilities[capability] = enabled
  438. }
  439. if serverCapabilities["UNFRONTED-MEEK"] && host.MeekServerPort == 443 {
  440. serverCapabilities["UNFRONTED-MEEK"] = false
  441. serverCapabilities["UNFRONTED-MEEK-HTTPS"] = true
  442. }
  443. for capability, enabled := range serverCapabilities {
  444. if enabled == true {
  445. extendedConfig.Capabilities = append(extendedConfig.Capabilities, capability)
  446. }
  447. }
  448. extendedConfig.ConfigurationVersion = server.ConfigurationVersion
  449. jsonDump, err := json.Marshal(extendedConfig)
  450. if err != nil {
  451. return ""
  452. }
  453. // Legacy format + extended (new) config
  454. prefixString := fmt.Sprintf("%s %s %s %s ", server.IpAddress, server.WebServerPort, server.WebServerSecret, webServerCertificate)
  455. return hex.EncodeToString(append([]byte(prefixString)[:], []byte(jsonDump)[:]...))
  456. }
  457. // Parse string of format "ssh-key-type ssh-key".
  458. func parseSshKeyString(sshKeyString string) (keyType string, key string) {
  459. sshKeyArr := strings.Split(sshKeyString, " ")
  460. if len(sshKeyArr) != 2 {
  461. return "", ""
  462. }
  463. return sshKeyArr[0], sshKeyArr[1]
  464. }
  465. // IsValidServerEntryTag checks if the specified server entry tag is valid.
  466. func (db *Database) IsValidServerEntryTag(serverEntryTag string) bool {
  467. db.ReloadableFile.RLock()
  468. defer db.ReloadableFile.RUnlock()
  469. // Default to "valid" if the valid list is unexpectedly empty or stale. This
  470. // helps prevent premature client-side server-entry pruning when there is an
  471. // issue with updating the database.
  472. if len(db.ValidServerEntryTags) == 0 ||
  473. db.fileModTime.Add(MAX_DATABASE_AGE_FOR_SERVER_ENTRY_VALIDITY).Before(time.Now()) {
  474. return true
  475. }
  476. // The tag must be in the map and have the value "true".
  477. return db.ValidServerEntryTags[serverEntryTag]
  478. }