ggml

package
v0.0.0-...-6e6905b Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 22, 2025 License: MIT Imports: 17 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// Magic constant for `ggml` files (unversioned).
	FILE_MAGIC_GGML = 0x67676d6c
	// Magic constant for `ggml` files (versioned, ggmf).
	FILE_MAGIC_GGMF = 0x67676d66
	// Magic constant for `ggml` files (versioned, ggjt).
	FILE_MAGIC_GGJT = 0x67676a74
	// Magic constant for `ggla` files (LoRA adapter).
	FILE_MAGIC_GGLA = 0x67676C61
	// Magic constant for `gguf` files (versioned, gguf)
	FILE_MAGIC_GGUF_LE = 0x46554747
	FILE_MAGIC_GGUF_BE = 0x47475546
)

Variables

View Source
var ErrUnsupportedFormat = errors.New("unsupported model format")

Functions

func DetectContentType

func DetectContentType(b []byte) string

func WriteGGUF

func WriteGGUF(f *os.File, kv KV, ts []*Tensor) error

Types

type FileType

type FileType uint32

FileType is the Go equivalent to llama_ftype used for gguf file typing

const (
	FileTypeF32 FileType = iota
	FileTypeF16

	FileTypeQ8_0

	FileTypeQ4_K_S
	FileTypeQ4_K_M

	FileTypeBF16

	FileTypeUnknown = 1024
)

func ParseFileType

func ParseFileType(s string) (FileType, error)

ParseFileType parses the provided GGUF file type Only Ollama supported types are considered valid

func (FileType) String

func (t FileType) String() string

func (FileType) ToTensorType

func (ftype FileType) ToTensorType() TensorType

func (FileType) Value

func (t FileType) Value() uint32

type GGML

type GGML struct {
	Length int64
	// contains filtered or unexported fields
}

func Decode

func Decode(rs io.ReadSeeker, maxArraySize int) (*GGML, error)

Decode decodes a GGML model from the given reader.

It collects array values for arrays with a size less than or equal to maxArraySize. If the maxArraySize is negative, all arrays are collected.

func (GGML) FlashAttention

func (f GGML) FlashAttention() bool

FlashAttention checks if the model should enable flash attention

func (GGML) GraphSize

func (f GGML) GraphSize(context, batch uint64, numParallel int, kvCacheType string, useFlashAttention bool) (kv []uint64, partialOffload, fullOffload uint64)

func (GGML) SupportsFlashAttention

func (f GGML) SupportsFlashAttention() bool

SupportsFlashAttention checks if the model supports flash attention

func (GGML) SupportsKVCacheType

func (f GGML) SupportsKVCacheType(cacheType string) bool

SupportsKVCacheType checks if the requested cache type is supported

func (GGML) VisionGraphSize

func (llm GGML) VisionGraphSize() (weights, graphSize uint64)

type KV

type KV map[string]any

func (KV) Architecture

func (kv KV) Architecture() string

func (KV) BlockCount

func (kv KV) BlockCount() uint64

func (KV) Bool

func (kv KV) Bool(key string, defaultValue ...bool) bool

func (KV) Bools

func (kv KV) Bools(key string, defaultValue ...[]bool) []bool

func (KV) ChatTemplate

func (kv KV) ChatTemplate() string

func (KV) ContextLength

func (kv KV) ContextLength() uint64

func (KV) EmbeddingHeadCountK

func (kv KV) EmbeddingHeadCountK() uint64

func (KV) EmbeddingHeadCountMax

func (kv KV) EmbeddingHeadCountMax() uint64

func (KV) EmbeddingHeadCountV

func (kv KV) EmbeddingHeadCountV() uint64

func (KV) EmbeddingLength

func (kv KV) EmbeddingLength() uint64

func (KV) FileType

func (kv KV) FileType() FileType

func (KV) Float

func (kv KV) Float(key string, defaultValue ...float32) float32

func (KV) Floats

func (kv KV) Floats(key string, defaultValue ...[]float32) []float32

func (KV) HeadCount

func (kv KV) HeadCount() []uint64

func (KV) HeadCountKV

func (kv KV) HeadCountKV() []uint64

func (KV) HeadCountKVMax

func (kv KV) HeadCountKVMax() uint64

func (KV) HeadCountKVMin

func (kv KV) HeadCountKVMin() uint64

func (KV) HeadCountMax

func (kv KV) HeadCountMax() uint64

func (KV) HeadCountMin

func (kv KV) HeadCountMin() uint64

func (KV) Ints

func (kv KV) Ints(key string, defaultValue ...[]int32) []int32

func (KV) Kind

func (kv KV) Kind() string

func (KV) OllamaEngineRequired

func (kv KV) OllamaEngineRequired() bool

func (KV) ParameterCount

func (kv KV) ParameterCount() uint64

func (KV) SSMConvKernel

func (kv KV) SSMConvKernel() uint64

func (KV) SSMGroupCount

func (kv KV) SSMGroupCount() uint64

func (KV) SSMInnerSize

func (kv KV) SSMInnerSize() uint64

func (KV) SSMStateSize

func (kv KV) SSMStateSize() uint64

func (KV) String

func (kv KV) String(key string, defaultValue ...string) string

func (KV) Strings

func (kv KV) Strings(key string, defaultValue ...[]string) []string

func (KV) Uint

func (kv KV) Uint(key string, defaultValue ...uint32) uint32

func (KV) UintOrArrayValue

func (kv KV) UintOrArrayValue(key string, defaultValue uint32) (uint32, uint32)

func (KV) UintOrArrayValueAsArray

func (kv KV) UintOrArrayValueAsArray(key string, defaultValue uint32) []uint32

func (KV) UintOrMaxArrayValue

func (kv KV) UintOrMaxArrayValue(key string, defaultValue uint32) uint32

func (KV) UintOrMinArrayValue

func (kv KV) UintOrMinArrayValue(key string, defaultValue uint32) uint32

func (KV) Uints

func (kv KV) Uints(key string, defaultValue ...[]uint32) []uint32

type Layer

type Layer map[string]*Tensor

func (Layer) Size

func (l Layer) Size() (size uint64)

type Tensor

type Tensor struct {
	Name   string `json:"name"`
	Kind   uint32 `json:"kind"`
	Offset uint64 `json:"-"`

	// Shape is the number of elements in each dimension
	Shape []uint64 `json:"shape"`

	io.WriterTo `json:"-"`
}

func (Tensor) Elements

func (t Tensor) Elements() uint64

func (Tensor) Size

func (t Tensor) Size() uint64

func (Tensor) Type

func (t Tensor) Type() string

type TensorType

type TensorType uint32

TensorType is equivalent to ggml_type for individual tensor types Note: these are not the same as FileType

const (
	TensorTypeF32 TensorType = iota
	TensorTypeF16
	TensorTypeQ4_0
	TensorTypeQ4_1

	TensorTypeQ5_0
	TensorTypeQ5_1
	TensorTypeQ8_0
	TensorTypeQ8_1
	TensorTypeQ2_K
	TensorTypeQ3_K
	TensorTypeQ4_K
	TensorTypeQ5_K
	TensorTypeQ6_K
	TensorTypeQ8_K

	TensorTypeI8
	TensorTypeI16
	TensorTypeI32
	TensorTypeI64
	TensorTypeF64

	TensorTypeBF16

	TensorTypeMXFP4
)

func ParseTensorType

func ParseTensorType(s string) (TensorType, error)

ParseFileType parses the provided GGUF file type Only Ollama supported types are considered valid

func (TensorType) BlockSize

func (t TensorType) BlockSize() uint64

func (TensorType) IsQuantized

func (t TensorType) IsQuantized() bool

func (TensorType) RowSize

func (t TensorType) RowSize(ne uint64) uint64

func (TensorType) String

func (t TensorType) String() string

func (TensorType) TypeSize

func (t TensorType) TypeSize() uint64

type Tensors

type Tensors struct {
	Offset uint64
	// contains filtered or unexported fields
}

func (Tensors) GroupLayers

func (ts Tensors) GroupLayers() map[string]Layer

func (Tensors) Items

func (s Tensors) Items(prefix ...string) []*Tensor

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL