Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

internal/grpcsync: Provide an internal-only pub-sub type API #6167

Merged
merged 17 commits into from
Jun 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 136 additions & 0 deletions internal/grpcsync/pubsub.go
Original file line number Diff line number Diff line change
@@ -0,0 1,136 @@
/*
*
* Copyright 2023 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package grpcsync

import (
"context"
"sync"
)

// Subscriber represents an entity that is subscribed to messages published on
// a PubSub. It wraps the callback to be invoked by the PubSub when a new
// message is published.
type Subscriber interface {
// OnMessage is invoked when a new message is published. Implementations
// must not block in this method.
OnMessage(msg interface{})
}

// PubSub is a simple one-to-many publish-subscribe system that supports
// messages of arbitrary type. It guarantees that messages are delivered in
// the same order in which they were published.
//
// Publisher invokes the Publish() method to publish new messages, while
// subscribers interested in receiving these messages register a callback
// via the Subscribe() method.
//
// Once a PubSub is stopped, no more messages can be published, and
// it is guaranteed that no more subscriber callback will be invoked.
type PubSub struct {
cs *CallbackSerializer
cancel context.CancelFunc

// Access to the below fields are guarded by this mutex.
mu sync.Mutex
msg interface{}
subscribers map[Subscriber]bool
stopped bool
}

// NewPubSub returns a new PubSub instance.
func NewPubSub() *PubSub {
ctx, cancel := context.WithCancel(context.Background())
return &PubSub{
cs: NewCallbackSerializer(ctx),
cancel: cancel,
subscribers: map[Subscriber]bool{},
}
}

// Subscribe registers the provided Subscriber to the PubSub.
//
// If the PubSub contains a previously published message, the Subscriber's
// OnMessage() callback will be invoked asynchronously with the existing
// message to begin with, and subsequently for every newly published message.
//
// The caller is responsible for invoking the returned cancel function to
// unsubscribe itself from the PubSub.
func (ps *PubSub) Subscribe(sub Subscriber) (cancel func()) {
ps.mu.Lock()
defer ps.mu.Unlock()

if ps.stopped {
return func() {}
}

ps.subscribers[sub] = true

if ps.msg != nil {
msg := ps.msg
ps.cs.Schedule(func(context.Context) {
ps.mu.Lock()
defer ps.mu.Unlock()
if !ps.subscribers[sub] {
return
}
sub.OnMessage(msg)
})
}

return func() {
ps.mu.Lock()
defer ps.mu.Unlock()
delete(ps.subscribers, sub)
}
}

// Publish publishes the provided message to the PubSub, and invokes
// callbacks registered by subscribers asynchronously.
func (ps *PubSub) Publish(msg interface{}) {
ps.mu.Lock()
defer ps.mu.Unlock()

if ps.stopped {
return
}

ps.msg = msg
for sub := range ps.subscribers {
s := sub
ps.cs.Schedule(func(context.Context) {
ps.mu.Lock()
defer ps.mu.Unlock()
if !ps.subscribers[s] {
return
}
s.OnMessage(msg)
})
}
}

// Stop shuts down the PubSub and releases any resources allocated by it.
// It is guaranteed that no subscriber callbacks would be invoked once this
// method returns.
func (ps *PubSub) Stop() {
ps.mu.Lock()
defer ps.mu.Unlock()
ps.stopped = true

ps.cancel()
}
211 changes: 211 additions & 0 deletions internal/grpcsync/pubsub_test.go
Original file line number Diff line number Diff line change
@@ -0,0 1,211 @@
/*
*
* Copyright 2023 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package grpcsync

import (
"fmt"
"sync"
"testing"
"time"

"github.com/google/go-cmp/cmp"
)

type testSubscriber struct {
mu sync.Mutex
msgs []int
onMsgCh chan struct{}
}

func newTestSubscriber(chSize int) *testSubscriber {
return &testSubscriber{onMsgCh: make(chan struct{}, chSize)}
}

func (ts *testSubscriber) OnMessage(msg interface{}) {
ts.mu.Lock()
defer ts.mu.Unlock()
ts.msgs = append(ts.msgs, msg.(int))
select {
case ts.onMsgCh <- struct{}{}:
default:
}
}

func (ts *testSubscriber) receivedMsgs() []int {
ts.mu.Lock()
defer ts.mu.Unlock()

msgs := make([]int, len(ts.msgs))
copy(msgs, ts.msgs)

return msgs
}

easwars marked this conversation as resolved.
Show resolved Hide resolved
func (s) TestPubSub_PublishNoMsg(t *testing.T) {
pubsub := NewPubSub()
defer pubsub.Stop()

ts := newTestSubscriber(1)
pubsub.Subscribe(ts)

select {
case <-ts.onMsgCh:
t.Fatalf("Subscriber callback invoked when no message was published")
case <-time.After(defaultTestShortTimeout):
}
}

func (s) TestPubSub_PublishMsgs_RegisterSubs_And_Stop(t *testing.T) {
pubsub := NewPubSub()

const numPublished = 10

easwars marked this conversation as resolved.
Show resolved Hide resolved
ts1 := newTestSubscriber(numPublished)
pubsub.Subscribe(ts1)
wantMsgs1 := []int{}

easwars marked this conversation as resolved.
Show resolved Hide resolved
var wg sync.WaitGroup
wg.Add(2)
// Publish ten messages on the pubsub and ensure that they are received in order by the subscriber.
go func() {
for i := 0; i < numPublished; i {
pubsub.Publish(i)
wantMsgs1 = append(wantMsgs1, i)
}
wg.Done()
}()

isTimeout := false
go func() {
for i := 0; i < numPublished; i {
select {
case <-ts1.onMsgCh:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC, we are trying to check the subscriber receieves the messages in the order they are published. And we know the message that is published in each iteration (which is i := range numPublished). Why dont we just check for the message received here and fail if its not the one that is expected?

that way we can nix wantMsg1 and gotMsg1 variables and also fail faster.

I usually like the idea of always failing fast as possible in test assertions. Here lets say that the first message itself came out of order, we would have to wait until all the messages are received to fail the test.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case, the one sub goroutine is used to call Publish() and the other is used to check to see if ts1.OnMessage() function was called the correct number of times.
Both are processed asynchronously.

Also, if we tried to fail faster, it would be more complicated than before.
(I haven't come up with the idea to do yet.)

case <-time.After(defaultTestTimeout):
isTimeout = true
}
}
wg.Done()
}()

wg.Wait()
if isTimeout {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we fail later in the case of a timeout? I believe its better to t.Fatal whenever the reader goroutine times out and fail fast.

Consider the worst case where onMessage() callback is never invoked. We would have to wait for 10 * 10 ms for the test to fail, where as it could have failed in the first run.

Let me know if there was a different consideration for why we need the isTimeout boolean to check this.


Alternatively, you could use a t.Errorf() when a reader loop times out and then do this after the wg.Wait()

if t.Failed() {
	t.FailNow()
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whatever we choose, there are trade-offs.

t.Fatal
If we invoke t.Fatal() in the goroutine which is called by the main goroutine, the main isn't cancelled.
So, we can't use this in sub goroutine.

t.Errof & t.Failed()
It is more like Golang than using boolean flag. But many error messages would be displayed if there is a timeout in loops and it would make logs difficult to read.

Could you please share your opinion about that🙇‍♂️

t.Fatalf("Timeout when expecting the onMessage() callback to be invoked")
}
if gotMsgs1 := ts1.receivedMsgs(); !cmp.Equal(gotMsgs1, wantMsgs1) {
t.Fatalf("Received messages is %v, want %v", gotMsgs1, wantMsgs1)
}

// Register another subscriber and ensure that it receives the last published message.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

receives the last published message

-- which we know in this case if numPublished - 1 right? If you go by my comment above, we could also think about nixing wantMsgs2 right?

ts2 := newTestSubscriber(numPublished)
pubsub.Subscribe(ts2)
wantMsgs2 := wantMsgs1[len(wantMsgs1)-1:]

select {
case <-ts2.onMsgCh:
case <-time.After(defaultTestShortTimeout):
t.Fatalf("Timeout when expecting the onMessage() callback to be invoked")
}
if gotMsgs2 := ts2.receivedMsgs(); !cmp.Equal(gotMsgs2, wantMsgs2) {
t.Fatalf("Received messages is %v, want %v", gotMsgs2, wantMsgs2)
}

wg.Add(3)
// Publish ten messages on the pubsub and ensure that they are received in order by the subscribers.
go func() {
easwars marked this conversation as resolved.
Show resolved Hide resolved
for i := 0; i < numPublished; i {
pubsub.Publish(i)
wantMsgs1 = append(wantMsgs1, i)
wantMsgs2 = append(wantMsgs2, i)
}
wg.Done()
}()
errCh := make(chan error, 1)
go func() {
for i := 0; i < numPublished; i {
select {
case <-ts1.onMsgCh:
case <-time.After(defaultTestTimeout):
errCh <- fmt.Errorf("")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since errCh is initialized with size 1, Wouldn't this be a blocking write if there are multiple timeouts since the channel is only read after wg.Wait()?

I would recommend not using an errCh but instead use t.Error() or t.Fatal() in this case? using t.Error might help you check if both subscribers failed in this case.

Copy link
Contributor Author

@my4-dev my4-dev Jun 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't this be a blocking write if there are multiple timeouts since the channel is only read after wg.Wait()?

This is correct! So we have to fix this problem.

But, as I said the above comment, I'm hesitating using t.Errorf() or t.Fatalf() .

}
}
wg.Done()
}()
go func() {
for i := 0; i < numPublished; i {
select {
case <-ts2.onMsgCh:
case <-time.After(defaultTestTimeout):
errCh <- fmt.Errorf("")
}
}
wg.Done()
}()
wg.Wait()
select {
case <-errCh:
t.Fatalf("Timeout when expecting the onMessage() callback to be invoked")
default:
}
if gotMsgs1 := ts1.receivedMsgs(); !cmp.Equal(gotMsgs1, wantMsgs1) {
t.Fatalf("Received messages is %v, want %v", gotMsgs1, wantMsgs1)
}
if gotMsgs2 := ts2.receivedMsgs(); !cmp.Equal(gotMsgs2, wantMsgs2) {
t.Fatalf("Received messages is %v, want %v", gotMsgs2, wantMsgs2)
}

pubsub.Stop()

go func() {
pubsub.Publish(99)
}()
// Ensure that the subscriber callback is not invoked as instantiated
// pubsub has already closed.
select {
case <-ts1.onMsgCh:
t.Fatalf("The callback was invoked after pubsub being stopped")
case <-ts2.onMsgCh:
t.Fatalf("The callback was invoked after pubsub being stopped")
case <-time.After(defaultTestShortTimeout):
}
}

func (s) TestPubSub_PublishMsgs_BeforeRegisterSub(t *testing.T) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this not already tests in the previous test case here? Maybe remove that check from the previous test case if you would like to have separate test cases for both

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They have different preconditions.
Previous one is tested by calling pubsub.Subscribe() at the status that some test subsribers have already added to pubsub.
The other one is tested at the status that no subscriber has added to pubsub.

Considering above, should we remove either?

pubsub := NewPubSub()
defer pubsub.Stop()

const numPublished = 3
for i := 0; i < numPublished; i {
pubsub.Publish(i)
}

ts := newTestSubscriber(numPublished)
pubsub.Subscribe(ts)

wantMsgs := []int{numPublished - 1}
// Ensure that the subscriber callback is invoked with a previously
// published message.
select {
case <-ts.onMsgCh:
if gotMsgs := ts.receivedMsgs(); !cmp.Equal(gotMsgs, wantMsgs) {
t.Fatalf("Received messages is %v, want %v", gotMsgs, wantMsgs)
}
case <-time.After(defaultTestShortTimeout):
t.Fatalf("Timeout when expecting the onMessage() callback to be invoked")
}
}