nn

package

v0.0.0-...-6e6905b Latest Latest Go to latest Published: Sep 22, 2025 License: MIT Imports: 3 Imported by: 0

Details

This section is empty.

This section is empty.

func Attention(ctx ml.Context, query, key, value ml.Tensor, scale float64, cache kvcache.Cache) ml.Tensor

Attention implements scaled dot-product attention for transformer models: Attention(Q, K, V) = softmax(QK^T/√d_k)V

Parameters:

ctx: Context for tensor operations
query: Query tensor (Q) with shape [d_k, heads, seq_len_q]
key: Key tensor (K) with shape [d_k, kv_heads, seq_len_k], can be nil to read from cache only
value: Value tensor (V) with shape [d_v, kv_heads, seq_len_k], can be nil to read from cache only
scale: Scaling factor, typically 1/√d_k where d_k is the key dimension
cache: KV cache to store key/value and get past history, can be nil to only use provided key/value

Returns:

Attention output with shape [d_v, heads, seq_len_q]

func AttentionWithSinks(ctx ml.Context, query, key, value, sinks ml.Tensor, scale float64, cache kvcache.Cache) ml.Tensor

type Conv2D struct {
	Weight ml.Tensor `gguf:"weight"`
}

func (m *Conv2D) Forward(ctx ml.Context, t ml.Tensor, s0, s1, p0, p1, d0, d1 int) ml.Tensor

type Embedding struct {
	Weight ml.Tensor `gguf:"weight"`
}

func (m *Embedding) Forward(ctx ml.Context, hiddenState ml.Tensor) ml.Tensor

type LayerNorm struct {
	Weight ml.Tensor `gguf:"weight"`
	Bias   ml.Tensor `gguf:"bias"`
}

func (m *LayerNorm) Forward(ctx ml.Context, t ml.Tensor, eps float32) ml.Tensor

type Linear struct {
	Weight ml.Tensor `gguf:"weight"`
	Bias   ml.Tensor `gguf:"bias"`
}

func (m *Linear) Forward(ctx ml.Context, t ml.Tensor) ml.Tensor

type LinearBatch struct {
	Weight ml.Tensor `gguf:"weight"`
	Bias   ml.Tensor `gguf:"bias"`
}

func (m *LinearBatch) Forward(ctx ml.Context, t, indices ml.Tensor) ml.Tensor

type RMSNorm struct {
	Weight ml.Tensor `gguf:"weight"`
}

func (m *RMSNorm) Forward(ctx ml.Context, t ml.Tensor, eps float32) ml.Tensor

Path	Synopsis
fast fast provides implementations of fast (fused) operations for increased performance.	fast provides implementations of fast (fused) operations for increased performance.
pooling
rope