-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbackend_mlx.go
More file actions
38 lines (30 loc) · 1.07 KB
/
backend_mlx.go
File metadata and controls
38 lines (30 loc) · 1.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
// SPDX-Licence-Identifier: EUPL-1.2
//go:build darwin && arm64
package ml
import (
"log/slog"
coreerr "dappco.re/go/core/log"
"forge.lthn.ai/core/go-inference"
_ "forge.lthn.ai/core/go-mlx" // registers "metal" backend via init()
)
// NewMLXBackend loads a model via go-inference's Metal backend and wraps it
// in an InferenceAdapter for use as ml.Backend/StreamingBackend.
//
// The blank import of go-mlx registers the "metal" backend, so
// inference.LoadModel() will automatically use Metal on Apple Silicon.
//
// Load options (context length, etc.) are forwarded directly to go-inference.
func NewMLXBackend(modelPath string, loadOpts ...inference.LoadOption) (*InferenceAdapter, error) {
slog.Info("mlx: loading model via go-inference", "path", modelPath)
m, err := inference.LoadModel(modelPath, loadOpts...)
if err != nil {
return nil, coreerr.E("ml.NewMLXBackend", "mlx", err)
}
info := m.Info()
slog.Info("mlx: model loaded",
"arch", info.Architecture,
"layers", info.NumLayers,
"quant", info.QuantBits,
)
return NewInferenceAdapter(m, "mlx"), nil
}