Small improvements

matteo-grella · Jul 28, 2023 · 626c435 · 626c435
1 parent 7f16bb1
commit 626c435
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 9 deletions.
diff --git a/README.md b/README.md
@@ -4,8  4,9 @@ This is a Go port of [llama2.c](https://github.com/karpathy/llama2.c).
 
 ## Performance
 
 
 | system                                    | model           | llama2.c         | llama2.go
-| ----------------------------------------- | --------------- | ---------------- |
 | ------------------------------------------| --------------- | ---------------- | ----------------
 | MacBook Pro, Apple M1 Max, 10-Core, 32 GB | stories15M.bin  | 676.392573 tok/s | 230.144629 tok/s
 | MacBook Pro, Apple M1 Max, 10-Core, 32 GB | stories42M.bin  | 267.295597 tok/s | 94.539509  tok/s
 | MacBook Pro, Apple M1 Max, 10-Core, 32 GB | stories110M.bin | 100.671141 tok/s | 42.359789  tok/s
diff --git a/llama2.go b/llama2.go
@@ -327,14  327,15 @@ func Transformer(token, pos int, p *Config, s *RunState, w *TransformerWeights)
 		for h := 0; h < int(p.NHeads); h   {
 			h := h
 			go func() {
 				hhs := h * headSize
 				// get the query vector for this head
-				q := s.Q[h*headSize:]
 				q := s.Q[hhs:]
 				// attention scores for this head
 				att := s.Att[h*int(p.SeqLen):]
 				// iterate over all timesteps, including the current one
 				for t := 0; t <= pos; t   {
 					// get the key vector for this head and at this timestep
-					k := s.KeyCache[loff t*dim h*headSize:]
 					k := s.KeyCache[loff t*dim hhs:]
 					// calculate the attention score as the dot product of q and k
 					var score float32
 					for i := 0; i < headSize; i   {
@@ -349,12  350,16 @@ func Transformer(token, pos int, p *Config, s *RunState, w *TransformerWeights)
 				Softmax(att[:pos 1])
 
 				// weighted sum of the values, store back into xb
-				for i := 0; i < headSize; i   {
-					var val float32
-					for t := 0; t <= pos; t   {
-						val  = att[t] * s.ValueCache[loff t*dim h*headSize i] // note bad locality
 				xb := s.Xb[hhs : hhs headSize]
 				for i := range xb {
 					xb[i] = 0.0
 				}
 				for t := 0; t <= pos; t   {
 					v := s.ValueCache[loff t*dim hhs : loff t*dim hhs headSize]
 					a := att[t]
 					for i := range v {
 						xb[i]  = a * v[i]
 					}
-					s.Xb[h*headSize i] = val
 				}
 				wg.Done()
 			}()
@@ -481,7  486,7 @@ func matmul(xout, x, w []float32, d int) {
 
 func accum(a, b []float32) {
 	_ = a[len(a)-1]
-	_ = b[len(a)-1]
 	_ = b[len(a)-1] // bce
 	for i := range a {
 		a[i]  = b[i]
 	}