dataStore_bolt.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. //go:build !PSIPHON_USE_BADGER_DB && !PSIPHON_USE_FILES_DB
  2. // +build !PSIPHON_USE_BADGER_DB,!PSIPHON_USE_FILES_DB
  3. /*
  4. * Copyright (c) 2018, Psiphon Inc.
  5. * All rights reserved.
  6. *
  7. * This program is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. *
  20. */
  21. package psiphon
  22. import (
  23. std_errors "errors"
  24. "fmt"
  25. "os"
  26. "path/filepath"
  27. "runtime/debug"
  28. "sync/atomic"
  29. "time"
  30. "github.com/Psiphon-Labs/bolt"
  31. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  32. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
  33. )
  34. const (
  35. OPEN_DB_RETRIES = 2
  36. )
  37. type datastoreDB struct {
  38. boltDB *bolt.DB
  39. filename string
  40. isFailed int32
  41. }
  42. type datastoreTx struct {
  43. db *datastoreDB
  44. boltTx *bolt.Tx
  45. }
  46. type datastoreBucket struct {
  47. db *datastoreDB
  48. boltBucket *bolt.Bucket
  49. }
  50. type datastoreCursor struct {
  51. db *datastoreDB
  52. boltCursor *bolt.Cursor
  53. }
  54. func datastoreOpenDB(
  55. rootDataDirectory string, retryAndReset bool) (*datastoreDB, error) {
  56. var db *datastoreDB
  57. var err error
  58. attempts := 1
  59. if retryAndReset {
  60. attempts += OPEN_DB_RETRIES
  61. }
  62. reset := false
  63. for attempt := 0; attempt < attempts; attempt++ {
  64. db, err = tryDatastoreOpenDB(rootDataDirectory, reset)
  65. if err == nil {
  66. break
  67. }
  68. NoticeWarning("tryDatastoreOpenDB failed: %s", err)
  69. // The datastore file may be corrupt, so, in subsequent iterations,
  70. // set the "reset" flag and attempt to delete the file and try again.
  71. //
  72. // Don't reset the datastore when open failed due to timeout obtaining
  73. // the file lock, as the datastore is simply locked by another
  74. // process and not corrupt. As the file lock is advisory, deleting
  75. // the file would succeed despite the lock. In this case, still retry
  76. // in case the the lock is released.
  77. reset = !std_errors.Is(err, bolt.ErrTimeout)
  78. }
  79. return db, err
  80. }
  81. func tryDatastoreOpenDB(
  82. rootDataDirectory string, reset bool) (retdb *datastoreDB, reterr error) {
  83. // Testing indicates that the bolt Check function can raise SIGSEGV due to
  84. // invalid mmap buffer accesses in cases such as opening a valid but
  85. // truncated datastore file.
  86. //
  87. // To handle this, we temporarily set SetPanicOnFault in order to treat the
  88. // fault as a panic, recover any panic, and return an error which will result
  89. // in a retry with reset.
  90. //
  91. // Limitation: another potential crash case is "fatal error: out of
  92. // memory" due to bolt.freelist.read attempting to allocate a slice using
  93. // a corrupted size value on disk. There is no way to recover from this
  94. // fatal.
  95. // Begin recovery preamble
  96. panicOnFault := debug.SetPanicOnFault(true)
  97. defer debug.SetPanicOnFault(panicOnFault)
  98. defer func() {
  99. if r := recover(); r != nil {
  100. retdb = nil
  101. reterr = errors.Tracef("panic: %v", r)
  102. }
  103. }()
  104. // End recovery preamble
  105. filename := filepath.Join(rootDataDirectory, "psiphon.boltdb")
  106. if reset {
  107. NoticeWarning("tryDatastoreOpenDB: reset")
  108. os.Remove(filename)
  109. }
  110. // A typical Psiphon datastore will not have a large, fragmented freelist.
  111. // For this reason, we're not setting FreelistType to FreelistMapType or
  112. // enabling NoFreelistSync. The latter option has a trade-off of slower
  113. // start up time.
  114. //
  115. // Monitor freelist stats in DataStoreMetrics in diagnostics and consider
  116. // setting these options if necessary.
  117. newDB, err := bolt.Open(filename, 0600, &bolt.Options{Timeout: 1 * time.Second})
  118. if err != nil {
  119. return nil, errors.Trace(err)
  120. }
  121. // Run consistency checks on datastore and emit errors for diagnostics
  122. // purposes. We assume this will complete quickly for typical size Psiphon
  123. // datastores and wait for the check to complete before proceeding.
  124. err = newDB.View(func(tx *bolt.Tx) error {
  125. return tx.SynchronousCheck()
  126. })
  127. if err != nil {
  128. return nil, errors.Trace(err)
  129. }
  130. err = newDB.Update(func(tx *bolt.Tx) error {
  131. requiredBuckets := [][]byte{
  132. datastoreServerEntriesBucket,
  133. datastoreServerEntryTagsBucket,
  134. datastoreServerEntryTombstoneTagsBucket,
  135. datastoreUrlETagsBucket,
  136. datastoreKeyValueBucket,
  137. datastoreRemoteServerListStatsBucket,
  138. datastoreFailedTunnelStatsBucket,
  139. datastoreSLOKsBucket,
  140. datastoreTacticsBucket,
  141. datastoreSpeedTestSamplesBucket,
  142. datastoreDialParametersBucket,
  143. datastoreNetworkReplayParametersBucket,
  144. datastoreDSLOSLStatesBucket,
  145. }
  146. for _, bucket := range requiredBuckets {
  147. _, err := tx.CreateBucketIfNotExists(bucket)
  148. if err != nil {
  149. return err
  150. }
  151. }
  152. return nil
  153. })
  154. if err != nil {
  155. return nil, errors.Trace(err)
  156. }
  157. // Cleanup obsolete buckets
  158. err = newDB.Update(func(tx *bolt.Tx) error {
  159. obsoleteBuckets := [][]byte{
  160. []byte("tunnelStats"),
  161. []byte("rankedServerEntries"),
  162. []byte("splitTunnelRouteETags"),
  163. []byte("splitTunnelRouteData"),
  164. }
  165. for _, obsoleteBucket := range obsoleteBuckets {
  166. if tx.Bucket(obsoleteBucket) != nil {
  167. err := tx.DeleteBucket(obsoleteBucket)
  168. if err != nil {
  169. NoticeWarning("DeleteBucket %s error: %s", obsoleteBucket, err)
  170. // Continue, since this is not fatal
  171. }
  172. }
  173. }
  174. return nil
  175. })
  176. if err != nil {
  177. return nil, errors.Trace(err)
  178. }
  179. return &datastoreDB{
  180. boltDB: newDB,
  181. filename: filename,
  182. }, nil
  183. }
  184. var errDatastoreFailed = std_errors.New("datastore has failed")
  185. func (db *datastoreDB) isDatastoreFailed() bool {
  186. return atomic.LoadInt32(&db.isFailed) == 1
  187. }
  188. func (db *datastoreDB) setDatastoreFailed(r interface{}) {
  189. atomic.StoreInt32(&db.isFailed, 1)
  190. NoticeWarning("Datastore failed: %s", errors.Tracef("panic: %v", r))
  191. }
  192. func (db *datastoreDB) close() error {
  193. // Limitation: there is no panic recover in this case. We assume boltDB.Close
  194. // does not make mmap accesses and prefer to not continue with the datastore
  195. // file in a locked or open state. We also assume that any locks aquired by
  196. // boltDB.Close, held by transactions, will be released even if the
  197. // transaction panics and the database is in the failed state.
  198. return db.boltDB.Close()
  199. }
  200. func (db *datastoreDB) getDataStoreMetrics() string {
  201. fileSize := int64(0)
  202. fileInfo, err := os.Stat(db.filename)
  203. if err == nil {
  204. fileSize = fileInfo.Size()
  205. }
  206. stats := db.boltDB.Stats()
  207. return fmt.Sprintf("filesize %s | freepages %d | freealloc %s | txcount %d | writes %d | writetime %s",
  208. common.FormatByteCount(uint64(fileSize)),
  209. stats.FreePageN,
  210. common.FormatByteCount(uint64(stats.FreeAlloc)),
  211. stats.TxN,
  212. stats.TxStats.Write,
  213. stats.TxStats.WriteTime)
  214. }
  215. func (db *datastoreDB) view(fn func(tx *datastoreTx) error) (reterr error) {
  216. // Any bolt function that performs mmap buffer accesses can raise SIGBUS due
  217. // to underlying storage changes, such as a truncation of the datastore file
  218. // or removal or network attached storage, etc.
  219. //
  220. // To handle this, we temporarily set SetPanicOnFault in order to treat the
  221. // fault as a panic, recover any panic to avoid crashing the process, and
  222. // putting this datastoreDB instance into a failed state. All subsequent
  223. // calls to this datastoreDBinstance or its related datastoreTx and
  224. // datastoreBucket instances will fail.
  225. // Begin recovery preamble
  226. if db.isDatastoreFailed() {
  227. return errDatastoreFailed
  228. }
  229. panicOnFault := debug.SetPanicOnFault(true)
  230. defer debug.SetPanicOnFault(panicOnFault)
  231. defer func() {
  232. if r := recover(); r != nil {
  233. db.setDatastoreFailed(r)
  234. reterr = errDatastoreFailed
  235. }
  236. }()
  237. // End recovery preamble
  238. return db.boltDB.View(
  239. func(tx *bolt.Tx) error {
  240. err := fn(&datastoreTx{db: db, boltTx: tx})
  241. if err != nil {
  242. return errors.Trace(err)
  243. }
  244. return nil
  245. })
  246. }
  247. func (db *datastoreDB) update(fn func(tx *datastoreTx) error) (reterr error) {
  248. // Begin recovery preamble
  249. if db.isDatastoreFailed() {
  250. return errDatastoreFailed
  251. }
  252. panicOnFault := debug.SetPanicOnFault(true)
  253. defer debug.SetPanicOnFault(panicOnFault)
  254. defer func() {
  255. if r := recover(); r != nil {
  256. db.setDatastoreFailed(r)
  257. reterr = errDatastoreFailed
  258. }
  259. }()
  260. // End recovery preamble
  261. return db.boltDB.Update(
  262. func(tx *bolt.Tx) error {
  263. err := fn(&datastoreTx{db: db, boltTx: tx})
  264. if err != nil {
  265. return errors.Trace(err)
  266. }
  267. return nil
  268. })
  269. }
  270. func (tx *datastoreTx) bucket(name []byte) (retbucket *datastoreBucket) {
  271. // Begin recovery preamble
  272. if tx.db.isDatastoreFailed() {
  273. return &datastoreBucket{db: tx.db, boltBucket: nil}
  274. }
  275. panicOnFault := debug.SetPanicOnFault(true)
  276. defer debug.SetPanicOnFault(panicOnFault)
  277. defer func() {
  278. if r := recover(); r != nil {
  279. tx.db.setDatastoreFailed(r)
  280. retbucket = &datastoreBucket{db: tx.db, boltBucket: nil}
  281. }
  282. }()
  283. // End recovery preamble
  284. return &datastoreBucket{db: tx.db, boltBucket: tx.boltTx.Bucket(name)}
  285. }
  286. func (tx *datastoreTx) clearBucket(name []byte) (reterr error) {
  287. // Begin recovery preamble
  288. if tx.db.isDatastoreFailed() {
  289. return errDatastoreFailed
  290. }
  291. panicOnFault := debug.SetPanicOnFault(true)
  292. defer debug.SetPanicOnFault(panicOnFault)
  293. defer func() {
  294. if r := recover(); r != nil {
  295. tx.db.setDatastoreFailed(r)
  296. reterr = errDatastoreFailed
  297. }
  298. }()
  299. // End recovery preamble
  300. err := tx.boltTx.DeleteBucket(name)
  301. if err != nil {
  302. return errors.Trace(err)
  303. }
  304. _, err = tx.boltTx.CreateBucket(name)
  305. if err != nil {
  306. return errors.Trace(err)
  307. }
  308. return nil
  309. }
  310. func (b *datastoreBucket) get(key []byte) (retvalue []byte) {
  311. // Begin recovery preamble
  312. if b.db.isDatastoreFailed() {
  313. return nil
  314. }
  315. panicOnFault := debug.SetPanicOnFault(true)
  316. defer debug.SetPanicOnFault(panicOnFault)
  317. defer func() {
  318. if r := recover(); r != nil {
  319. b.db.setDatastoreFailed(r)
  320. retvalue = nil
  321. }
  322. }()
  323. // End recovery preamble
  324. return b.boltBucket.Get(key)
  325. }
  326. func (b *datastoreBucket) put(key, value []byte) (reterr error) {
  327. // Begin recovery preamble
  328. if b.db.isDatastoreFailed() {
  329. return errDatastoreFailed
  330. }
  331. panicOnFault := debug.SetPanicOnFault(true)
  332. defer debug.SetPanicOnFault(panicOnFault)
  333. defer func() {
  334. if r := recover(); r != nil {
  335. b.db.setDatastoreFailed(r)
  336. reterr = errDatastoreFailed
  337. }
  338. }()
  339. // End recovery preamble
  340. err := b.boltBucket.Put(key, value)
  341. if err != nil {
  342. return errors.Trace(err)
  343. }
  344. return nil
  345. }
  346. func (b *datastoreBucket) delete(key []byte) (reterr error) {
  347. // Begin recovery preamble
  348. if b.db.isDatastoreFailed() {
  349. return errDatastoreFailed
  350. }
  351. panicOnFault := debug.SetPanicOnFault(true)
  352. defer debug.SetPanicOnFault(panicOnFault)
  353. defer func() {
  354. if r := recover(); r != nil {
  355. b.db.setDatastoreFailed(r)
  356. reterr = errDatastoreFailed
  357. }
  358. }()
  359. // End recovery preamble
  360. err := b.boltBucket.Delete(key)
  361. if err != nil {
  362. return errors.Trace(err)
  363. }
  364. return nil
  365. }
  366. func (b *datastoreBucket) cursor() (retcursor datastoreCursor) {
  367. // Begin recovery preamble
  368. if b.db.isDatastoreFailed() {
  369. return datastoreCursor{db: b.db, boltCursor: nil}
  370. }
  371. panicOnFault := debug.SetPanicOnFault(true)
  372. defer debug.SetPanicOnFault(panicOnFault)
  373. defer func() {
  374. if r := recover(); r != nil {
  375. b.db.setDatastoreFailed(r)
  376. retcursor = datastoreCursor{db: b.db, boltCursor: nil}
  377. }
  378. }()
  379. // End recovery preamble
  380. return datastoreCursor{db: b.db, boltCursor: b.boltBucket.Cursor()}
  381. }
  382. func (c *datastoreCursor) firstKey() (retkey []byte) {
  383. // Begin recovery preamble
  384. if c.db.isDatastoreFailed() {
  385. return nil
  386. }
  387. panicOnFault := debug.SetPanicOnFault(true)
  388. defer debug.SetPanicOnFault(panicOnFault)
  389. defer func() {
  390. if r := recover(); r != nil {
  391. c.db.setDatastoreFailed(r)
  392. retkey = nil
  393. }
  394. }()
  395. // End recovery preamble
  396. key, _ := c.boltCursor.First()
  397. return key
  398. }
  399. func (c *datastoreCursor) nextKey() (retkey []byte) {
  400. // Begin recovery preamble
  401. if c.db.isDatastoreFailed() {
  402. return nil
  403. }
  404. panicOnFault := debug.SetPanicOnFault(true)
  405. defer debug.SetPanicOnFault(panicOnFault)
  406. defer func() {
  407. if r := recover(); r != nil {
  408. c.db.setDatastoreFailed(r)
  409. retkey = nil
  410. }
  411. }()
  412. // End recovery preamble
  413. key, _ := c.boltCursor.Next()
  414. return key
  415. }
  416. func (c *datastoreCursor) first() (retkey, retvalue []byte) {
  417. // Begin recovery preamble
  418. if c.db.isDatastoreFailed() {
  419. return nil, nil
  420. }
  421. panicOnFault := debug.SetPanicOnFault(true)
  422. defer debug.SetPanicOnFault(panicOnFault)
  423. defer func() {
  424. if r := recover(); r != nil {
  425. c.db.setDatastoreFailed(r)
  426. retkey = nil
  427. retvalue = nil
  428. }
  429. }()
  430. // End recovery preamble
  431. return c.boltCursor.First()
  432. }
  433. func (c *datastoreCursor) next() (retkey, retvalue []byte) {
  434. // Begin recovery preamble
  435. if c.db.isDatastoreFailed() {
  436. return nil, nil
  437. }
  438. panicOnFault := debug.SetPanicOnFault(true)
  439. defer debug.SetPanicOnFault(panicOnFault)
  440. defer func() {
  441. if r := recover(); r != nil {
  442. c.db.setDatastoreFailed(r)
  443. retkey = nil
  444. retvalue = nil
  445. }
  446. }()
  447. // End recovery preamble
  448. return c.boltCursor.Next()
  449. }
  450. func (c *datastoreCursor) close() {
  451. // BoltDB doesn't close cursors.
  452. }