dataStore_bolt.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. //go:build !PSIPHON_USE_BADGER_DB && !PSIPHON_USE_FILES_DB
  2. // +build !PSIPHON_USE_BADGER_DB,!PSIPHON_USE_FILES_DB
  3. /*
  4. * Copyright (c) 2018, Psiphon Inc.
  5. * All rights reserved.
  6. *
  7. * This program is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. *
  20. */
  21. package psiphon
  22. import (
  23. std_errors "errors"
  24. "fmt"
  25. "os"
  26. "path/filepath"
  27. "runtime/debug"
  28. "sync/atomic"
  29. "time"
  30. "github.com/Psiphon-Labs/bolt"
  31. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  32. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
  33. )
  34. const (
  35. OPEN_DB_RETRIES = 2
  36. )
  37. type datastoreDB struct {
  38. boltDB *bolt.DB
  39. filename string
  40. isFailed int32
  41. }
  42. type datastoreTx struct {
  43. db *datastoreDB
  44. boltTx *bolt.Tx
  45. }
  46. type datastoreBucket struct {
  47. db *datastoreDB
  48. boltBucket *bolt.Bucket
  49. }
  50. type datastoreCursor struct {
  51. db *datastoreDB
  52. boltCursor *bolt.Cursor
  53. }
  54. func datastoreOpenDB(
  55. rootDataDirectory string, retryAndReset bool) (*datastoreDB, error) {
  56. var db *datastoreDB
  57. var err error
  58. attempts := 1
  59. if retryAndReset {
  60. attempts += OPEN_DB_RETRIES
  61. }
  62. reset := false
  63. for attempt := 0; attempt < attempts; attempt++ {
  64. db, err = tryDatastoreOpenDB(rootDataDirectory, reset)
  65. if err == nil {
  66. break
  67. }
  68. NoticeWarning("tryDatastoreOpenDB failed: %s", err)
  69. // The datastore file may be corrupt, so, in subsequent iterations,
  70. // set the "reset" flag and attempt to delete the file and try again.
  71. //
  72. // Don't reset the datastore when open failed due to timeout obtaining
  73. // the file lock, as the datastore is simply locked by another
  74. // process and not corrupt. As the file lock is advisory, deleting
  75. // the file would succeed despite the lock. In this case, still retry
  76. // in case the the lock is released.
  77. reset = !std_errors.Is(err, bolt.ErrTimeout)
  78. }
  79. return db, err
  80. }
  81. func tryDatastoreOpenDB(
  82. rootDataDirectory string, reset bool) (retdb *datastoreDB, reterr error) {
  83. // Testing indicates that the bolt Check function can raise SIGSEGV due to
  84. // invalid mmap buffer accesses in cases such as opening a valid but
  85. // truncated datastore file.
  86. //
  87. // To handle this, we temporarily set SetPanicOnFault in order to treat the
  88. // fault as a panic, recover any panic, and return an error which will result
  89. // in a retry with reset.
  90. //
  91. // Limitation: another potential crash case is "fatal error: out of
  92. // memory" due to bolt.freelist.read attempting to allocate a slice using
  93. // a corrupted size value on disk. There is no way to recover from this
  94. // fatal.
  95. // Begin recovery preamble
  96. panicOnFault := debug.SetPanicOnFault(true)
  97. defer debug.SetPanicOnFault(panicOnFault)
  98. defer func() {
  99. if r := recover(); r != nil {
  100. retdb = nil
  101. reterr = errors.Tracef("panic: %v", r)
  102. }
  103. }()
  104. // End recovery preamble
  105. filename := filepath.Join(rootDataDirectory, "psiphon.boltdb")
  106. if reset {
  107. NoticeWarning("tryDatastoreOpenDB: reset")
  108. os.Remove(filename)
  109. }
  110. // A typical Psiphon datastore will not have a large, fragmented freelist.
  111. // For this reason, we're not setting FreelistType to FreelistMapType or
  112. // enabling NoFreelistSync. The latter option has a trade-off of slower
  113. // start up time.
  114. //
  115. // Monitor freelist stats in DataStoreMetrics in diagnostics and consider
  116. // setting these options if necessary.
  117. newDB, err := bolt.Open(filename, 0600, &bolt.Options{Timeout: 1 * time.Second})
  118. if err != nil {
  119. return nil, errors.Trace(err)
  120. }
  121. // Run consistency checks on datastore and emit errors for diagnostics
  122. // purposes. We assume this will complete quickly for typical size Psiphon
  123. // datastores and wait for the check to complete before proceeding.
  124. err = newDB.View(func(tx *bolt.Tx) error {
  125. return tx.SynchronousCheck()
  126. })
  127. if err != nil {
  128. return nil, errors.Trace(err)
  129. }
  130. err = newDB.Update(func(tx *bolt.Tx) error {
  131. requiredBuckets := [][]byte{
  132. datastoreServerEntriesBucket,
  133. datastoreServerEntryTagsBucket,
  134. datastoreServerEntryTombstoneTagsBucket,
  135. datastoreUrlETagsBucket,
  136. datastoreKeyValueBucket,
  137. datastoreRemoteServerListStatsBucket,
  138. datastoreFailedTunnelStatsBucket,
  139. datastoreSLOKsBucket,
  140. datastoreTacticsBucket,
  141. datastoreSpeedTestSamplesBucket,
  142. datastoreDialParametersBucket,
  143. datastoreNetworkReplayParametersBucket,
  144. }
  145. for _, bucket := range requiredBuckets {
  146. _, err := tx.CreateBucketIfNotExists(bucket)
  147. if err != nil {
  148. return err
  149. }
  150. }
  151. return nil
  152. })
  153. if err != nil {
  154. return nil, errors.Trace(err)
  155. }
  156. // Cleanup obsolete buckets
  157. err = newDB.Update(func(tx *bolt.Tx) error {
  158. obsoleteBuckets := [][]byte{
  159. []byte("tunnelStats"),
  160. []byte("rankedServerEntries"),
  161. []byte("splitTunnelRouteETags"),
  162. []byte("splitTunnelRouteData"),
  163. }
  164. for _, obsoleteBucket := range obsoleteBuckets {
  165. if tx.Bucket(obsoleteBucket) != nil {
  166. err := tx.DeleteBucket(obsoleteBucket)
  167. if err != nil {
  168. NoticeWarning("DeleteBucket %s error: %s", obsoleteBucket, err)
  169. // Continue, since this is not fatal
  170. }
  171. }
  172. }
  173. return nil
  174. })
  175. if err != nil {
  176. return nil, errors.Trace(err)
  177. }
  178. return &datastoreDB{
  179. boltDB: newDB,
  180. filename: filename,
  181. }, nil
  182. }
  183. var errDatastoreFailed = std_errors.New("datastore has failed")
  184. func (db *datastoreDB) isDatastoreFailed() bool {
  185. return atomic.LoadInt32(&db.isFailed) == 1
  186. }
  187. func (db *datastoreDB) setDatastoreFailed(r interface{}) {
  188. atomic.StoreInt32(&db.isFailed, 1)
  189. NoticeWarning("Datastore failed: %s", errors.Tracef("panic: %v", r))
  190. }
  191. func (db *datastoreDB) close() error {
  192. // Limitation: there is no panic recover in this case. We assume boltDB.Close
  193. // does not make mmap accesses and prefer to not continue with the datastore
  194. // file in a locked or open state. We also assume that any locks aquired by
  195. // boltDB.Close, held by transactions, will be released even if the
  196. // transaction panics and the database is in the failed state.
  197. return db.boltDB.Close()
  198. }
  199. func (db *datastoreDB) getDataStoreMetrics() string {
  200. fileSize := int64(0)
  201. fileInfo, err := os.Stat(db.filename)
  202. if err == nil {
  203. fileSize = fileInfo.Size()
  204. }
  205. stats := db.boltDB.Stats()
  206. return fmt.Sprintf("filesize %s | freepages %d | freealloc %s | txcount %d | writes %d | writetime %s",
  207. common.FormatByteCount(uint64(fileSize)),
  208. stats.FreePageN,
  209. common.FormatByteCount(uint64(stats.FreeAlloc)),
  210. stats.TxN,
  211. stats.TxStats.Write,
  212. stats.TxStats.WriteTime)
  213. }
  214. func (db *datastoreDB) view(fn func(tx *datastoreTx) error) (reterr error) {
  215. // Any bolt function that performs mmap buffer accesses can raise SIGBUS due
  216. // to underlying storage changes, such as a truncation of the datastore file
  217. // or removal or network attached storage, etc.
  218. //
  219. // To handle this, we temporarily set SetPanicOnFault in order to treat the
  220. // fault as a panic, recover any panic to avoid crashing the process, and
  221. // putting this datastoreDB instance into a failed state. All subsequent
  222. // calls to this datastoreDBinstance or its related datastoreTx and
  223. // datastoreBucket instances will fail.
  224. // Begin recovery preamble
  225. if db.isDatastoreFailed() {
  226. return errDatastoreFailed
  227. }
  228. panicOnFault := debug.SetPanicOnFault(true)
  229. defer debug.SetPanicOnFault(panicOnFault)
  230. defer func() {
  231. if r := recover(); r != nil {
  232. db.setDatastoreFailed(r)
  233. reterr = errDatastoreFailed
  234. }
  235. }()
  236. // End recovery preamble
  237. return db.boltDB.View(
  238. func(tx *bolt.Tx) error {
  239. err := fn(&datastoreTx{db: db, boltTx: tx})
  240. if err != nil {
  241. return errors.Trace(err)
  242. }
  243. return nil
  244. })
  245. }
  246. func (db *datastoreDB) update(fn func(tx *datastoreTx) error) (reterr error) {
  247. // Begin recovery preamble
  248. if db.isDatastoreFailed() {
  249. return errDatastoreFailed
  250. }
  251. panicOnFault := debug.SetPanicOnFault(true)
  252. defer debug.SetPanicOnFault(panicOnFault)
  253. defer func() {
  254. if r := recover(); r != nil {
  255. db.setDatastoreFailed(r)
  256. reterr = errDatastoreFailed
  257. }
  258. }()
  259. // End recovery preamble
  260. return db.boltDB.Update(
  261. func(tx *bolt.Tx) error {
  262. err := fn(&datastoreTx{db: db, boltTx: tx})
  263. if err != nil {
  264. return errors.Trace(err)
  265. }
  266. return nil
  267. })
  268. }
  269. func (tx *datastoreTx) bucket(name []byte) (retbucket *datastoreBucket) {
  270. // Begin recovery preamble
  271. if tx.db.isDatastoreFailed() {
  272. return &datastoreBucket{db: tx.db, boltBucket: nil}
  273. }
  274. panicOnFault := debug.SetPanicOnFault(true)
  275. defer debug.SetPanicOnFault(panicOnFault)
  276. defer func() {
  277. if r := recover(); r != nil {
  278. tx.db.setDatastoreFailed(r)
  279. retbucket = &datastoreBucket{db: tx.db, boltBucket: nil}
  280. }
  281. }()
  282. // End recovery preamble
  283. return &datastoreBucket{db: tx.db, boltBucket: tx.boltTx.Bucket(name)}
  284. }
  285. func (tx *datastoreTx) clearBucket(name []byte) (reterr error) {
  286. // Begin recovery preamble
  287. if tx.db.isDatastoreFailed() {
  288. return errDatastoreFailed
  289. }
  290. panicOnFault := debug.SetPanicOnFault(true)
  291. defer debug.SetPanicOnFault(panicOnFault)
  292. defer func() {
  293. if r := recover(); r != nil {
  294. tx.db.setDatastoreFailed(r)
  295. reterr = errDatastoreFailed
  296. }
  297. }()
  298. // End recovery preamble
  299. err := tx.boltTx.DeleteBucket(name)
  300. if err != nil {
  301. return errors.Trace(err)
  302. }
  303. _, err = tx.boltTx.CreateBucket(name)
  304. if err != nil {
  305. return errors.Trace(err)
  306. }
  307. return nil
  308. }
  309. func (b *datastoreBucket) get(key []byte) (retvalue []byte) {
  310. // Begin recovery preamble
  311. if b.db.isDatastoreFailed() {
  312. return nil
  313. }
  314. panicOnFault := debug.SetPanicOnFault(true)
  315. defer debug.SetPanicOnFault(panicOnFault)
  316. defer func() {
  317. if r := recover(); r != nil {
  318. b.db.setDatastoreFailed(r)
  319. retvalue = nil
  320. }
  321. }()
  322. // End recovery preamble
  323. return b.boltBucket.Get(key)
  324. }
  325. func (b *datastoreBucket) put(key, value []byte) (reterr error) {
  326. // Begin recovery preamble
  327. if b.db.isDatastoreFailed() {
  328. return errDatastoreFailed
  329. }
  330. panicOnFault := debug.SetPanicOnFault(true)
  331. defer debug.SetPanicOnFault(panicOnFault)
  332. defer func() {
  333. if r := recover(); r != nil {
  334. b.db.setDatastoreFailed(r)
  335. reterr = errDatastoreFailed
  336. }
  337. }()
  338. // End recovery preamble
  339. err := b.boltBucket.Put(key, value)
  340. if err != nil {
  341. return errors.Trace(err)
  342. }
  343. return nil
  344. }
  345. func (b *datastoreBucket) delete(key []byte) (reterr error) {
  346. // Begin recovery preamble
  347. if b.db.isDatastoreFailed() {
  348. return errDatastoreFailed
  349. }
  350. panicOnFault := debug.SetPanicOnFault(true)
  351. defer debug.SetPanicOnFault(panicOnFault)
  352. defer func() {
  353. if r := recover(); r != nil {
  354. b.db.setDatastoreFailed(r)
  355. reterr = errDatastoreFailed
  356. }
  357. }()
  358. // End recovery preamble
  359. err := b.boltBucket.Delete(key)
  360. if err != nil {
  361. return errors.Trace(err)
  362. }
  363. return nil
  364. }
  365. func (b *datastoreBucket) cursor() (retcursor datastoreCursor) {
  366. // Begin recovery preamble
  367. if b.db.isDatastoreFailed() {
  368. return datastoreCursor{db: b.db, boltCursor: nil}
  369. }
  370. panicOnFault := debug.SetPanicOnFault(true)
  371. defer debug.SetPanicOnFault(panicOnFault)
  372. defer func() {
  373. if r := recover(); r != nil {
  374. b.db.setDatastoreFailed(r)
  375. retcursor = datastoreCursor{db: b.db, boltCursor: nil}
  376. }
  377. }()
  378. // End recovery preamble
  379. return datastoreCursor{db: b.db, boltCursor: b.boltBucket.Cursor()}
  380. }
  381. func (c *datastoreCursor) firstKey() (retkey []byte) {
  382. // Begin recovery preamble
  383. if c.db.isDatastoreFailed() {
  384. return nil
  385. }
  386. panicOnFault := debug.SetPanicOnFault(true)
  387. defer debug.SetPanicOnFault(panicOnFault)
  388. defer func() {
  389. if r := recover(); r != nil {
  390. c.db.setDatastoreFailed(r)
  391. retkey = nil
  392. }
  393. }()
  394. // End recovery preamble
  395. key, _ := c.boltCursor.First()
  396. return key
  397. }
  398. func (c *datastoreCursor) nextKey() (retkey []byte) {
  399. // Begin recovery preamble
  400. if c.db.isDatastoreFailed() {
  401. return nil
  402. }
  403. panicOnFault := debug.SetPanicOnFault(true)
  404. defer debug.SetPanicOnFault(panicOnFault)
  405. defer func() {
  406. if r := recover(); r != nil {
  407. c.db.setDatastoreFailed(r)
  408. retkey = nil
  409. }
  410. }()
  411. // End recovery preamble
  412. key, _ := c.boltCursor.Next()
  413. return key
  414. }
  415. func (c *datastoreCursor) first() (retkey, retvalue []byte) {
  416. // Begin recovery preamble
  417. if c.db.isDatastoreFailed() {
  418. return nil, nil
  419. }
  420. panicOnFault := debug.SetPanicOnFault(true)
  421. defer debug.SetPanicOnFault(panicOnFault)
  422. defer func() {
  423. if r := recover(); r != nil {
  424. c.db.setDatastoreFailed(r)
  425. retkey = nil
  426. retvalue = nil
  427. }
  428. }()
  429. // End recovery preamble
  430. return c.boltCursor.First()
  431. }
  432. func (c *datastoreCursor) next() (retkey, retvalue []byte) {
  433. // Begin recovery preamble
  434. if c.db.isDatastoreFailed() {
  435. return nil, nil
  436. }
  437. panicOnFault := debug.SetPanicOnFault(true)
  438. defer debug.SetPanicOnFault(panicOnFault)
  439. defer func() {
  440. if r := recover(); r != nil {
  441. c.db.setDatastoreFailed(r)
  442. retkey = nil
  443. retvalue = nil
  444. }
  445. }()
  446. // End recovery preamble
  447. return c.boltCursor.Next()
  448. }
  449. func (c *datastoreCursor) close() {
  450. // BoltDB doesn't close cursors.
  451. }