Skip to content

Commit

Permalink
Small improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
saracen committed Jul 28, 2023
1 parent 7f16bb1 commit 626c435
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 9 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 4,9 @@ This is a Go port of [llama2.c](https://github.com/karpathy/llama2.c).

## Performance


| system | model | llama2.c | llama2.go
| ----------------------------------------- | --------------- | ---------------- |
| ------------------------------------------| --------------- | ---------------- | ----------------
| MacBook Pro, Apple M1 Max, 10-Core, 32 GB | stories15M.bin | 676.392573 tok/s | 230.144629 tok/s
| MacBook Pro, Apple M1 Max, 10-Core, 32 GB | stories42M.bin | 267.295597 tok/s | 94.539509 tok/s
| MacBook Pro, Apple M1 Max, 10-Core, 32 GB | stories110M.bin | 100.671141 tok/s | 42.359789 tok/s
21 changes: 13 additions & 8 deletions llama2.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,14 327,15 @@ func Transformer(token, pos int, p *Config, s *RunState, w *TransformerWeights)
for h := 0; h < int(p.NHeads); h {
h := h
go func() {
hhs := h * headSize
// get the query vector for this head
q := s.Q[h*headSize:]
q := s.Q[hhs:]
// attention scores for this head
att := s.Att[h*int(p.SeqLen):]
// iterate over all timesteps, including the current one
for t := 0; t <= pos; t {
// get the key vector for this head and at this timestep
k := s.KeyCache[loff t*dim h*headSize:]
k := s.KeyCache[loff t*dim hhs:]
// calculate the attention score as the dot product of q and k
var score float32
for i := 0; i < headSize; i {
Expand All @@ -349,12 350,16 @@ func Transformer(token, pos int, p *Config, s *RunState, w *TransformerWeights)
Softmax(att[:pos 1])

// weighted sum of the values, store back into xb
for i := 0; i < headSize; i {
var val float32
for t := 0; t <= pos; t {
val = att[t] * s.ValueCache[loff t*dim h*headSize i] // note bad locality
xb := s.Xb[hhs : hhs headSize]
for i := range xb {
xb[i] = 0.0
}
for t := 0; t <= pos; t {
v := s.ValueCache[loff t*dim hhs : loff t*dim hhs headSize]
a := att[t]
for i := range v {
xb[i] = a * v[i]
}
s.Xb[h*headSize i] = val
}
wg.Done()
}()
Expand Down Expand Up @@ -481,7 486,7 @@ func matmul(xout, x, w []float32, d int) {

func accum(a, b []float32) {
_ = a[len(a)-1]
_ = b[len(a)-1]
_ = b[len(a)-1] // bce
for i := range a {
a[i] = b[i]
}
Expand Down

0 comments on commit 626c435

Please sign in to comment.