Documentation
¶
Index ¶
- Constants
- Variables
- func DetectContentType(b []byte) string
- func WriteGGUF(f *os.File, kv KV, ts []*Tensor) error
- type FileType
- type GGML
- func (f GGML) FlashAttention() bool
- func (f GGML) GraphSize(context, batch uint64, numParallel int, kvCacheType string, ...) (kv []uint64, partialOffload, fullOffload uint64)
- func (f GGML) SupportsFlashAttention() bool
- func (f GGML) SupportsKVCacheType(cacheType string) bool
- func (llm GGML) VisionGraphSize() (weights, graphSize uint64)
- type KV
- func (kv KV) Architecture() string
- func (kv KV) BlockCount() uint64
- func (kv KV) Bool(key string, defaultValue ...bool) bool
- func (kv KV) Bools(key string, defaultValue ...[]bool) []bool
- func (kv KV) ChatTemplate() string
- func (kv KV) ContextLength() uint64
- func (kv KV) EmbeddingHeadCountK() uint64
- func (kv KV) EmbeddingHeadCountMax() uint64
- func (kv KV) EmbeddingHeadCountV() uint64
- func (kv KV) EmbeddingLength() uint64
- func (kv KV) FileType() FileType
- func (kv KV) Float(key string, defaultValue ...float32) float32
- func (kv KV) Floats(key string, defaultValue ...[]float32) []float32
- func (kv KV) HeadCount() []uint64
- func (kv KV) HeadCountKV() []uint64
- func (kv KV) HeadCountKVMax() uint64
- func (kv KV) HeadCountKVMin() uint64
- func (kv KV) HeadCountMax() uint64
- func (kv KV) HeadCountMin() uint64
- func (kv KV) Ints(key string, defaultValue ...[]int32) []int32
- func (kv KV) Kind() string
- func (kv KV) OllamaEngineRequired() bool
- func (kv KV) ParameterCount() uint64
- func (kv KV) SSMConvKernel() uint64
- func (kv KV) SSMGroupCount() uint64
- func (kv KV) SSMInnerSize() uint64
- func (kv KV) SSMStateSize() uint64
- func (kv KV) String(key string, defaultValue ...string) string
- func (kv KV) Strings(key string, defaultValue ...[]string) []string
- func (kv KV) Uint(key string, defaultValue ...uint32) uint32
- func (kv KV) UintOrArrayValue(key string, defaultValue uint32) (uint32, uint32)
- func (kv KV) UintOrArrayValueAsArray(key string, defaultValue uint32) []uint32
- func (kv KV) UintOrMaxArrayValue(key string, defaultValue uint32) uint32
- func (kv KV) UintOrMinArrayValue(key string, defaultValue uint32) uint32
- func (kv KV) Uints(key string, defaultValue ...[]uint32) []uint32
- type Layer
- type Tensor
- type TensorType
- type Tensors
Constants ¶
const ( // Magic constant for `ggml` files (unversioned). FILE_MAGIC_GGML = 0x67676d6c // Magic constant for `ggml` files (versioned, ggmf). FILE_MAGIC_GGMF = 0x67676d66 // Magic constant for `ggml` files (versioned, ggjt). FILE_MAGIC_GGJT = 0x67676a74 // Magic constant for `ggla` files (LoRA adapter). FILE_MAGIC_GGLA = 0x67676C61 // Magic constant for `gguf` files (versioned, gguf) FILE_MAGIC_GGUF_LE = 0x46554747 FILE_MAGIC_GGUF_BE = 0x47475546 )
Variables ¶
var ErrUnsupportedFormat = errors.New("unsupported model format")
Functions ¶
func DetectContentType ¶
Types ¶
type FileType ¶
type FileType uint32
FileType is the Go equivalent to llama_ftype used for gguf file typing
func ParseFileType ¶
ParseFileType parses the provided GGUF file type Only Ollama supported types are considered valid
func (FileType) ToTensorType ¶
func (ftype FileType) ToTensorType() TensorType
type GGML ¶
type GGML struct {
Length int64
// contains filtered or unexported fields
}
func Decode ¶
func Decode(rs io.ReadSeeker, maxArraySize int) (*GGML, error)
Decode decodes a GGML model from the given reader.
It collects array values for arrays with a size less than or equal to maxArraySize. If the maxArraySize is negative, all arrays are collected.
func (GGML) FlashAttention ¶
FlashAttention checks if the model should enable flash attention
func (GGML) SupportsFlashAttention ¶
SupportsFlashAttention checks if the model supports flash attention
func (GGML) SupportsKVCacheType ¶
SupportsKVCacheType checks if the requested cache type is supported
func (GGML) VisionGraphSize ¶
type KV ¶
func (KV) Architecture ¶
func (KV) BlockCount ¶
func (KV) ChatTemplate ¶
func (KV) ContextLength ¶
func (KV) EmbeddingHeadCountK ¶
func (KV) EmbeddingHeadCountMax ¶
func (KV) EmbeddingHeadCountV ¶
func (KV) EmbeddingLength ¶
func (KV) HeadCountKV ¶
func (KV) HeadCountKVMax ¶
func (KV) HeadCountKVMin ¶
func (KV) HeadCountMax ¶
func (KV) HeadCountMin ¶
func (KV) OllamaEngineRequired ¶
func (KV) ParameterCount ¶
func (KV) SSMConvKernel ¶
func (KV) SSMGroupCount ¶
func (KV) SSMInnerSize ¶
func (KV) SSMStateSize ¶
func (KV) UintOrArrayValue ¶
func (KV) UintOrArrayValueAsArray ¶
func (KV) UintOrMaxArrayValue ¶
func (KV) UintOrMinArrayValue ¶
type Tensor ¶
type TensorType ¶
type TensorType uint32
TensorType is equivalent to ggml_type for individual tensor types Note: these are not the same as FileType
const ( TensorTypeF32 TensorType = iota TensorTypeF16 TensorTypeQ4_0 TensorTypeQ4_1 TensorTypeQ5_0 TensorTypeQ5_1 TensorTypeQ8_0 TensorTypeQ8_1 TensorTypeQ2_K TensorTypeQ3_K TensorTypeQ4_K TensorTypeQ5_K TensorTypeQ6_K TensorTypeQ8_K TensorTypeI8 TensorTypeI16 TensorTypeI32 TensorTypeI64 TensorTypeF64 TensorTypeBF16 TensorTypeMXFP4 )
func ParseTensorType ¶
func ParseTensorType(s string) (TensorType, error)
ParseFileType parses the provided GGUF file type Only Ollama supported types are considered valid
func (TensorType) BlockSize ¶
func (t TensorType) BlockSize() uint64
func (TensorType) IsQuantized ¶
func (t TensorType) IsQuantized() bool
func (TensorType) RowSize ¶
func (t TensorType) RowSize(ne uint64) uint64
func (TensorType) String ¶
func (t TensorType) String() string
func (TensorType) TypeSize ¶
func (t TensorType) TypeSize() uint64