1
0
Fork 0
forked from External/ergo

initial persistent history implementation

This commit is contained in:
Shivaram Lingamneni 2020-02-18 19:38:42 -05:00
parent 0d5a4fd584
commit 33dac4c0ba
34 changed files with 2229 additions and 595 deletions

View file

@ -6,7 +6,6 @@ package history
import (
"github.com/oragono/oragono/irc/utils"
"sync"
"sync/atomic"
"time"
)
@ -43,9 +42,10 @@ type Item struct {
// this is the uncasefolded account name, if there's no account it should be set to "*"
AccountName string
// for non-privmsg items, we may stuff some other data in here
Message utils.SplitMessage
Tags map[string]string
Params [1]string
Message utils.SplitMessage
Tags map[string]string
Params [1]string
CfCorrespondent string
}
// HasMsgid tests whether a message has the message id `msgid`.
@ -53,20 +53,30 @@ func (item *Item) HasMsgid(msgid string) bool {
return item.Message.Msgid == msgid
}
func (item *Item) isStorable() bool {
if item.Type == Tagmsg {
func (item *Item) IsStorable() bool {
switch item.Type {
case Tagmsg:
for name := range item.Tags {
if !transientTags[name] {
return true
}
}
return false // all tags were blacklisted
} else {
case Privmsg, Notice:
// don't store CTCP other than ACTION
return !item.Message.IsRestrictedCTCPMessage()
default:
return true
}
}
type Predicate func(item Item) (matches bool)
type Predicate func(item *Item) (matches bool)
func Reverse(results []Item) {
for i, j := 0, len(results)-1; i < j; i, j = i+1, j-1 {
results[i], results[j] = results[j], results[i]
}
}
// Buffer is a ring buffer holding message/event history for a channel or user
type Buffer struct {
@ -81,8 +91,6 @@ type Buffer struct {
lastDiscarded time.Time
enabled uint32
nowFunc func() time.Time
}
@ -99,8 +107,6 @@ func (hist *Buffer) Initialize(size int, window time.Duration) {
hist.window = window
hist.maximumSize = size
hist.nowFunc = time.Now
hist.setEnabled(size)
}
// compute the initial size for the buffer, taking into account autoresize
@ -115,31 +121,8 @@ func (hist *Buffer) initialSize(size int, window time.Duration) (result int) {
return
}
func (hist *Buffer) setEnabled(size int) {
var enabled uint32
if size != 0 {
enabled = 1
}
atomic.StoreUint32(&hist.enabled, enabled)
}
// Enabled returns whether the buffer is currently storing messages
// (a disabled buffer blackholes everything it sees)
func (list *Buffer) Enabled() bool {
return atomic.LoadUint32(&list.enabled) != 0
}
// Add adds a history item to the buffer
func (list *Buffer) Add(item Item) {
// fast path without a lock acquisition for when we are not storing history
if !list.Enabled() {
return
}
if !item.isStorable() {
return
}
if item.Message.Time.IsZero() {
item.Message.Time = time.Now().UTC()
}
@ -147,6 +130,10 @@ func (list *Buffer) Add(item Item) {
list.Lock()
defer list.Unlock()
if len(list.buffer) == 0 {
return
}
list.maybeExpand()
var pos int
@ -170,55 +157,100 @@ func (list *Buffer) Add(item Item) {
list.buffer[pos] = item
}
// Reverse reverses an []Item, in-place.
func Reverse(results []Item) {
for i, j := 0, len(results)-1; i < j; i, j = i+1, j-1 {
results[i], results[j] = results[j], results[i]
func (list *Buffer) lookup(msgid string) (result Item, found bool) {
predicate := func(item *Item) bool {
return item.HasMsgid(msgid)
}
results := list.matchInternal(predicate, false, 1)
if len(results) != 0 {
return results[0], true
}
return
}
// Between returns all history items with a time `after` <= time <= `before`,
// with an indication of whether the results are complete or are missing items
// because some of that period was discarded. A zero value of `before` is considered
// higher than all other times.
func (list *Buffer) Between(after, before time.Time, ascending bool, limit int) (results []Item, complete bool) {
if !list.Enabled() {
return
}
func (list *Buffer) betweenHelper(start, end Selector, cutoff time.Time, pred Predicate, limit int) (results []Item, complete bool, err error) {
var ascending bool
defer func() {
if !ascending {
Reverse(results)
}
}()
list.RLock()
defer list.RUnlock()
if len(list.buffer) == 0 {
return
}
after := start.Time
if start.Msgid != "" {
item, found := list.lookup(start.Msgid)
if !found {
return
}
after = item.Message.Time
}
before := end.Time
if end.Msgid != "" {
item, found := list.lookup(end.Msgid)
if !found {
return
}
before = item.Message.Time
}
after, before, ascending = MinMaxAsc(after, before, cutoff)
complete = after.Equal(list.lastDiscarded) || after.After(list.lastDiscarded)
satisfies := func(item Item) bool {
return (after.IsZero() || item.Message.Time.After(after)) && (before.IsZero() || item.Message.Time.Before(before))
satisfies := func(item *Item) bool {
return (after.IsZero() || item.Message.Time.After(after)) &&
(before.IsZero() || item.Message.Time.Before(before)) &&
(pred == nil || pred(item))
}
return list.matchInternal(satisfies, ascending, limit), complete
return list.matchInternal(satisfies, ascending, limit), complete, nil
}
// Match returns all history items such that `predicate` returns true for them.
// Items are considered in reverse insertion order if `ascending` is false, or
// in insertion order if `ascending` is true, up to a total of `limit` matches
// if `limit` > 0 (unlimited otherwise).
// `predicate` MAY be a closure that maintains its own state across invocations;
// it MUST NOT acquire any locks or otherwise do anything weird.
// Results are always returned in insertion order.
func (list *Buffer) Match(predicate Predicate, ascending bool, limit int) (results []Item) {
if !list.Enabled() {
return
// implements history.Sequence, emulating a single history buffer (for a channel,
// a single user's DMs, or a DM conversation)
type bufferSequence struct {
list *Buffer
pred Predicate
cutoff time.Time
}
func (list *Buffer) MakeSequence(correspondent string, cutoff time.Time) Sequence {
var pred Predicate
if correspondent != "" {
pred = func(item *Item) bool {
return item.CfCorrespondent == correspondent
}
}
return &bufferSequence{
list: list,
pred: pred,
cutoff: cutoff,
}
}
list.RLock()
defer list.RUnlock()
func (seq *bufferSequence) Between(start, end Selector, limit int) (results []Item, complete bool, err error) {
return seq.list.betweenHelper(start, end, seq.cutoff, seq.pred, limit)
}
return list.matchInternal(predicate, ascending, limit)
func (seq *bufferSequence) Around(start Selector, limit int) (results []Item, err error) {
return GenericAround(seq, start, limit)
}
// you must be holding the read lock to call this
func (list *Buffer) matchInternal(predicate Predicate, ascending bool, limit int) (results []Item) {
if list.start == -1 {
if list.start == -1 || len(list.buffer) == 0 {
return
}
@ -232,7 +264,7 @@ func (list *Buffer) matchInternal(predicate Predicate, ascending bool, limit int
}
for {
if predicate(list.buffer[pos]) {
if predicate(&list.buffer[pos]) {
results = append(results, list.buffer[pos])
}
if pos == stop || (limit != 0 && len(results) == limit) {
@ -245,18 +277,14 @@ func (list *Buffer) matchInternal(predicate Predicate, ascending bool, limit int
}
}
// TODO sort by time instead?
if !ascending {
Reverse(results)
}
return
}
// Latest returns the items most recently added, up to `limit`. If `limit` is 0,
// latest returns the items most recently added, up to `limit`. If `limit` is 0,
// it returns all items.
func (list *Buffer) Latest(limit int) (results []Item) {
matchAll := func(item Item) bool { return true }
return list.Match(matchAll, false, limit)
func (list *Buffer) latest(limit int) (results []Item) {
results, _, _ = list.betweenHelper(Selector{}, Selector{}, time.Time{}, nil, limit)
return
}
// LastDiscarded returns the latest time of any entry that was evicted
@ -355,8 +383,6 @@ func (list *Buffer) Resize(maximumSize int, window time.Duration) {
func (list *Buffer) resize(size int) {
newbuffer := make([]Item, size)
list.setEnabled(size)
if list.start == -1 {
// indices are already correct and nothing needs to be copied
} else if size == 0 {

View file

@ -14,19 +14,21 @@ const (
timeFormat = "2006-01-02 15:04:05Z"
)
func betweenTimestamps(buf *Buffer, start, end time.Time, limit int) (result []Item, complete bool) {
result, complete, _ = buf.betweenHelper(Selector{Time: start}, Selector{Time: end}, time.Time{}, nil, limit)
return
}
func TestEmptyBuffer(t *testing.T) {
pastTime := easyParse(timeFormat)
buf := NewHistoryBuffer(0, 0)
if buf.Enabled() {
t.Error("the buffer of size 0 must be considered disabled")
}
buf.Add(Item{
Nick: "testnick",
})
since, complete := buf.Between(pastTime, time.Now(), false, 0)
since, complete := betweenTimestamps(buf, pastTime, time.Now(), 0)
if len(since) != 0 {
t.Error("shouldn't be able to add to disabled buf")
}
@ -35,16 +37,13 @@ func TestEmptyBuffer(t *testing.T) {
}
buf.Resize(1, 0)
if !buf.Enabled() {
t.Error("the buffer of size 1 must be considered enabled")
}
since, complete = buf.Between(pastTime, time.Now(), false, 0)
since, complete = betweenTimestamps(buf, pastTime, time.Now(), 0)
assertEqual(complete, true, t)
assertEqual(len(since), 0, t)
buf.Add(Item{
Nick: "testnick",
})
since, complete = buf.Between(pastTime, time.Now(), false, 0)
since, complete = betweenTimestamps(buf, pastTime, time.Now(), 0)
if len(since) != 1 {
t.Error("should be able to store items in a nonempty buffer")
}
@ -58,7 +57,7 @@ func TestEmptyBuffer(t *testing.T) {
buf.Add(Item{
Nick: "testnick2",
})
since, complete = buf.Between(pastTime, time.Now(), false, 0)
since, complete = betweenTimestamps(buf, pastTime, time.Now(), 0)
if len(since) != 1 {
t.Error("expect exactly 1 item")
}
@ -68,8 +67,7 @@ func TestEmptyBuffer(t *testing.T) {
if since[0].Nick != "testnick2" {
t.Error("retrieved junk data")
}
matchAll := func(item Item) bool { return true }
assertEqual(toNicks(buf.Match(matchAll, false, 0)), []string{"testnick2"}, t)
assertEqual(toNicks(buf.latest(0)), []string{"testnick2"}, t)
}
func toNicks(items []Item) (result []string) {
@ -110,27 +108,27 @@ func TestBuffer(t *testing.T) {
buf.Add(easyItem("testnick2", "2006-01-03 15:04:05Z"))
since, complete := buf.Between(start, time.Now(), false, 0)
since, complete := betweenTimestamps(buf, start, time.Now(), 0)
assertEqual(complete, true, t)
assertEqual(toNicks(since), []string{"testnick0", "testnick1", "testnick2"}, t)
// add another item, evicting the first
buf.Add(easyItem("testnick3", "2006-01-04 15:04:05Z"))
since, complete = buf.Between(start, time.Now(), false, 0)
since, complete = betweenTimestamps(buf, start, time.Now(), 0)
assertEqual(complete, false, t)
assertEqual(toNicks(since), []string{"testnick1", "testnick2", "testnick3"}, t)
// now exclude the time of the discarded entry; results should be complete again
since, complete = buf.Between(easyParse("2006-01-02 00:00:00Z"), time.Now(), false, 0)
since, complete = betweenTimestamps(buf, easyParse("2006-01-02 00:00:00Z"), time.Now(), 0)
assertEqual(complete, true, t)
assertEqual(toNicks(since), []string{"testnick1", "testnick2", "testnick3"}, t)
since, complete = buf.Between(easyParse("2006-01-02 00:00:00Z"), easyParse("2006-01-03 00:00:00Z"), false, 0)
since, complete = betweenTimestamps(buf, easyParse("2006-01-02 00:00:00Z"), easyParse("2006-01-03 00:00:00Z"), 0)
assertEqual(complete, true, t)
assertEqual(toNicks(since), []string{"testnick1"}, t)
// shrink the buffer, cutting off testnick1
buf.Resize(2, 0)
since, complete = buf.Between(easyParse("2006-01-02 00:00:00Z"), time.Now(), false, 0)
since, complete = betweenTimestamps(buf, easyParse("2006-01-02 00:00:00Z"), time.Now(), 0)
assertEqual(complete, false, t)
assertEqual(toNicks(since), []string{"testnick2", "testnick3"}, t)
@ -138,18 +136,19 @@ func TestBuffer(t *testing.T) {
buf.Add(easyItem("testnick4", "2006-01-05 15:04:05Z"))
buf.Add(easyItem("testnick5", "2006-01-06 15:04:05Z"))
buf.Add(easyItem("testnick6", "2006-01-07 15:04:05Z"))
since, complete = buf.Between(easyParse("2006-01-03 00:00:00Z"), time.Now(), false, 0)
since, complete = betweenTimestamps(buf, easyParse("2006-01-03 00:00:00Z"), time.Now(), 0)
assertEqual(complete, true, t)
assertEqual(toNicks(since), []string{"testnick2", "testnick3", "testnick4", "testnick5", "testnick6"}, t)
// test ascending order
since, _ = buf.Between(easyParse("2006-01-03 00:00:00Z"), time.Now(), true, 2)
since, _ = betweenTimestamps(buf, easyParse("2006-01-03 00:00:00Z"), time.Time{}, 2)
assertEqual(toNicks(since), []string{"testnick2", "testnick3"}, t)
}
func autoItem(id int, t time.Time) (result Item) {
result.Message.Time = t
result.Nick = strconv.Itoa(id)
result.Message.Msgid = result.Nick
return
}
@ -181,7 +180,7 @@ func TestAutoresize(t *testing.T) {
now = now.Add(time.Minute * 10)
id += 1
}
items := buf.Latest(0)
items := buf.latest(0)
assertEqual(len(items), initialAutoSize, t)
assertEqual(atoi(items[0].Nick), 40, t)
assertEqual(atoi(items[len(items)-1].Nick), 71, t)
@ -195,7 +194,7 @@ func TestAutoresize(t *testing.T) {
// ok, 5 items from the first batch are still in the 1-hour window;
// we should overwrite until only those 5 are left, then start expanding
// the buffer so that it retains those 5 and the 100 new items
items = buf.Latest(0)
items = buf.latest(0)
assertEqual(len(items), 105, t)
assertEqual(atoi(items[0].Nick), 67, t)
assertEqual(atoi(items[len(items)-1].Nick), 171, t)
@ -207,7 +206,7 @@ func TestAutoresize(t *testing.T) {
id += 1
}
// should fill up to the maximum size of 128 and start overwriting
items = buf.Latest(0)
items = buf.latest(0)
assertEqual(len(items), 128, t)
assertEqual(atoi(items[0].Nick), 144, t)
assertEqual(atoi(items[len(items)-1].Nick), 271, t)
@ -222,7 +221,7 @@ func TestEnabledByResize(t *testing.T) {
buf.Resize(128, time.Hour)
// add an item and test that it is stored and retrievable
buf.Add(autoItem(0, now))
items := buf.Latest(0)
items := buf.latest(0)
assertEqual(len(items), 1, t)
assertEqual(atoi(items[0].Nick), 0, t)
}
@ -232,13 +231,13 @@ func TestDisabledByResize(t *testing.T) {
// enabled autoresizing buffer
buf := NewHistoryBuffer(128, time.Hour)
buf.Add(autoItem(0, now))
items := buf.Latest(0)
items := buf.latest(0)
assertEqual(len(items), 1, t)
assertEqual(atoi(items[0].Nick), 0, t)
// disable as during a rehash, confirm that nothing can be retrieved
buf.Resize(0, time.Hour)
items = buf.Latest(0)
items = buf.latest(0)
assertEqual(len(items), 0, t)
}
@ -252,3 +251,25 @@ func TestRoundUp(t *testing.T) {
assertEqual(roundUpToPowerOfTwo(1025), 2048, t)
assertEqual(roundUpToPowerOfTwo(269435457), 536870912, t)
}
func BenchmarkInsert(b *testing.B) {
buf := NewHistoryBuffer(1024, 0)
b.ResetTimer()
for i := 0; i < b.N; i++ {
buf.Add(Item{})
}
}
func BenchmarkMatch(b *testing.B) {
buf := NewHistoryBuffer(1024, 0)
var now time.Time
for i := 0; i < 1024; i += 1 {
buf.Add(autoItem(i, now))
now = now.Add(time.Second)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
buf.lookup("512")
}
}

71
irc/history/queries.go Normal file
View file

@ -0,0 +1,71 @@
// Copyright (c) 2020 Shivaram Lingamneni <slingamn@cs.stanford.edu>
// released under the MIT license
package history
import (
"time"
)
// Selector represents a parameter to a CHATHISTORY command;
// at most one of Msgid or Time may be nonzero
type Selector struct {
Msgid string
Time time.Time
}
// Sequence is an abstract sequence of history entries that can be queried;
// it encapsulates restrictions such as registration time cutoffs, or
// only looking at a single "query buffer" (DMs with a particular correspondent)
type Sequence interface {
Between(start, end Selector, limit int) (results []Item, complete bool, err error)
Around(start Selector, limit int) (results []Item, err error)
}
// This is a bad, slow implementation of CHATHISTORY AROUND using the BETWEEN semantics
func GenericAround(seq Sequence, start Selector, limit int) (results []Item, err error) {
var halfLimit int
halfLimit = (limit + 1) / 2
initialResults, _, err := seq.Between(Selector{}, start, halfLimit)
if err != nil {
return
} else if len(initialResults) == 0 {
// TODO: this fails if we're doing an AROUND on the first message in the buffer
// would be nice to fix this but whatever
return
}
newStart := Selector{Time: initialResults[0].Message.Time}
results, _, err = seq.Between(newStart, Selector{}, limit)
return
}
// MinMaxAsc converts CHATHISTORY arguments into time intervals, handling the most
// general case (BETWEEN going forwards or backwards) natively and the other ordering
// queries (AFTER, BEFORE, LATEST) as special cases.
func MinMaxAsc(after, before, cutoff time.Time) (min, max time.Time, ascending bool) {
startIsZero, endIsZero := after.IsZero(), before.IsZero()
if !startIsZero && endIsZero {
// AFTER
ascending = true
} else if startIsZero && !endIsZero {
// BEFORE
ascending = false
} else if !startIsZero && !endIsZero {
if before.Before(after) {
// BETWEEN going backwards
before, after = after, before
ascending = false
} else {
// BETWEEN going forwards
ascending = true
}
} else if startIsZero && endIsZero {
// LATEST
ascending = false
}
if after.IsZero() || after.Before(cutoff) {
// this may result in an impossible query, which is fine
after = cutoff
}
return after, before, ascending
}