--- /srv/rebuilderd/tmp/rebuilderdGqhwAt/inputs/llama.cpp-examples_9071+dfsg-1_arm64.deb +++ /srv/rebuilderd/tmp/rebuilderdGqhwAt/out/llama.cpp-examples_9071+dfsg-1_arm64.deb ├── file list │ @@ -1,3 +1,3 @@ │ -rw-r--r-- 0 0 0 4 2026-05-08 10:29:16.000000 debian-binary │ -rw-r--r-- 0 0 0 2396 2026-05-08 10:29:16.000000 control.tar.xz │ --rw-r--r-- 0 0 0 314696 2026-05-08 10:29:16.000000 data.tar.xz │ +-rw-r--r-- 0 0 0 322516 2026-05-08 10:29:16.000000 data.tar.xz ├── control.tar.xz │ ├── control.tar │ │ ├── ./control │ │ │ @@ -1,13 +1,13 @@ │ │ │ Package: llama.cpp-examples │ │ │ Source: llama.cpp │ │ │ Version: 9071+dfsg-1 │ │ │ Architecture: arm64 │ │ │ Maintainer: Debian Deep Learning Team │ │ │ -Installed-Size: 1822 │ │ │ +Installed-Size: 1820 │ │ │ Depends: llama.cpp-tools (= 9071+dfsg-1), libc6 (>= 2.38), libgcc-s1 (>= 3.0), libggml0 (>= 0.11.0), libllama0 (>= 9071+dfsg), libstdc++6 (>= 13.1) │ │ │ Breaks: llama.cpp (<< 5882+dfsg-3~exp1) │ │ │ Replaces: llama.cpp (<< 5882+dfsg-3~exp1) │ │ │ Section: science │ │ │ Priority: optional │ │ │ Multi-Arch: foreign │ │ │ Homepage: https://github.com/ggml-org/llama.cpp/ │ │ ├── ./md5sums │ │ │ ├── ./md5sums │ │ │ │┄ Files differ ├── data.tar.xz │ ├── data.tar │ │ ├── file list │ │ │ @@ -30,29 +30,29 @@ │ │ │ -rw-r--r-- 0 root (0) root (0) 3522 2026-05-08 10:29:16.000000 ./usr/share/doc/llama.cpp-examples/changelog.Debian.gz │ │ │ -rw-r--r-- 0 root (0) root (0) 13361 2026-05-08 10:29:16.000000 ./usr/share/doc/llama.cpp-examples/copyright │ │ │ drwxr-xr-x 0 root (0) root (0) 0 2026-05-08 10:29:16.000000 ./usr/share/lintian/ │ │ │ drwxr-xr-x 0 root (0) root (0) 0 2026-05-08 10:29:16.000000 ./usr/share/lintian/overrides/ │ │ │ -rw-r--r-- 0 root (0) root (0) 177 2026-05-08 10:29:16.000000 ./usr/share/lintian/overrides/llama.cpp-examples │ │ │ drwxr-xr-x 0 root (0) root (0) 0 2026-05-08 10:29:16.000000 ./usr/share/man/ │ │ │ drwxr-xr-x 0 root (0) root (0) 0 2026-05-08 10:29:16.000000 ./usr/share/man/man1/ │ │ │ --rw-r--r-- 0 root (0) root (0) 6225 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-batched.1.gz │ │ │ --rw-r--r-- 0 root (0) root (0) 6557 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-debug.1.gz │ │ │ --rw-r--r-- 0 root (0) root (0) 6458 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-diffusion-cli.1.gz │ │ │ --rw-r--r-- 0 root (0) root (0) 6546 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-embedding.1.gz │ │ │ --rw-r--r-- 0 root (0) root (0) 6059 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-eval-callback.1.gz │ │ │ --rw-r--r-- 0 root (0) root (0) 6436 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-finetune.1.gz │ │ │ +-rw-r--r-- 0 root (0) root (0) 6226 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-batched.1.gz │ │ │ +-rw-r--r-- 0 root (0) root (0) 6370 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-debug.1.gz │ │ │ +-rw-r--r-- 0 root (0) root (0) 6383 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-diffusion-cli.1.gz │ │ │ +-rw-r--r-- 0 root (0) root (0) 6540 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-embedding.1.gz │ │ │ +-rw-r--r-- 0 root (0) root (0) 6057 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-eval-callback.1.gz │ │ │ +-rw-r--r-- 0 root (0) root (0) 6369 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-finetune.1.gz │ │ │ -rw-r--r-- 0 root (0) root (0) 395 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-gguf-hash.1.gz │ │ │ -rw-r--r-- 0 root (0) root (0) 248 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-gguf.1.gz │ │ │ --rw-r--r-- 0 root (0) root (0) 6101 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-idle.1.gz │ │ │ +-rw-r--r-- 0 root (0) root (0) 6027 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-idle.1.gz │ │ │ -rw-r--r-- 0 root (0) root (0) 6058 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-lookahead.1.gz │ │ │ --rw-r--r-- 0 root (0) root (0) 6369 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-lookup-create.1.gz │ │ │ +-rw-r--r-- 0 root (0) root (0) 6287 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-lookup-create.1.gz │ │ │ -rw-r--r-- 0 root (0) root (0) 272 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-lookup-merge.1.gz │ │ │ -rw-r--r-- 0 root (0) root (0) 6367 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-lookup-stats.1.gz │ │ │ --rw-r--r-- 0 root (0) root (0) 6357 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-lookup.1.gz │ │ │ --rw-r--r-- 0 root (0) root (0) 6250 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-parallel.1.gz │ │ │ --rw-r--r-- 0 root (0) root (0) 6248 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-passkey.1.gz │ │ │ --rw-r--r-- 0 root (0) root (0) 6315 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-retrieval.1.gz │ │ │ --rw-r--r-- 0 root (0) root (0) 6067 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-save-load-state.1.gz │ │ │ +-rw-r--r-- 0 root (0) root (0) 6352 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-lookup.1.gz │ │ │ +-rw-r--r-- 0 root (0) root (0) 6247 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-parallel.1.gz │ │ │ +-rw-r--r-- 0 root (0) root (0) 6243 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-passkey.1.gz │ │ │ +-rw-r--r-- 0 root (0) root (0) 6403 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-retrieval.1.gz │ │ │ +-rw-r--r-- 0 root (0) root (0) 5979 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-save-load-state.1.gz │ │ │ -rw-r--r-- 0 root (0) root (0) 267 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-simple-chat.1.gz │ │ │ -rw-r--r-- 0 root (0) root (0) 258 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-simple.1.gz │ │ │ --rw-r--r-- 0 root (0) root (0) 7186 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-speculative-simple.1.gz │ │ │ --rw-r--r-- 0 root (0) root (0) 7176 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-speculative.1.gz │ │ │ +-rw-r--r-- 0 root (0) root (0) 7109 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-speculative-simple.1.gz │ │ │ +-rw-r--r-- 0 root (0) root (0) 7099 2026-05-08 10:29:16.000000 ./usr/share/man/man1/llama-speculative.1.gz │ │ ├── ./usr/share/man/man1/llama-batched.1.gz │ │ │ ├── llama-batched.1 │ │ │ │ @@ -1,13 +1,12 @@ │ │ │ │ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. │ │ │ │ .TH LLAMA-BATCHED "1" "May 2026" "debian" "User Commands" │ │ │ │ .SH NAME │ │ │ │ llama-batched \- llama-batched │ │ │ │ .SH DESCRIPTION │ │ │ │ -load_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ │ │ \fB\-\-\-\-\-\fR common params \fB\-\-\-\-\-\fR │ │ │ │ .PP │ │ │ │ \fB\-h\fR, \fB\-\-help\fR, \fB\-\-usage\fR print usage and exit │ │ │ │ \fB\-\-version\fR show version and build info │ │ │ │ \fB\-\-license\fR show source code license and dependencies │ │ │ │ \fB\-cl\fR, \fB\-\-cache\-list\fR show list of models in cache │ │ │ │ \fB\-\-completion\-bash\fR print source\-able bash completion script for llama.cpp │ │ │ │ @@ -413,16 +412,17 @@ │ │ │ │ context size) │ │ │ │ .PP │ │ │ │ \fB\-\-dry\-sequence\-breaker\fR STRING add sequence breaker for DRY sampling, clearing out default breakers │ │ │ │ .TP │ │ │ │ ('\en', ':', '"', '*') in the process; use "none" to not use any │ │ │ │ sequence breakers │ │ │ │ .PP │ │ │ │ -\fB\-\-adaptive\-target\fR N adaptive\-p: select tokens near this probability (valid range 0.0 to │ │ │ │ +\fB\-\-adaptive\-target\fR Nload_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ │ │ .TP │ │ │ │ +adaptive\-p: select tokens near this probability (valid range 0.0 to │ │ │ │ 1.0; negative = disabled) (default: \fB\-1\fR.00) │ │ │ │ [(more info)](https://github.com/ggml\-org/llama.cpp/pull/17927) │ │ │ │ .PP │ │ │ │ \fB\-\-adaptive\-decay\fR N adaptive\-p: decay rate for target adaptation over time. lower values │ │ │ │ .TP │ │ │ │ are more reactive, higher values are more stable. │ │ │ │ (valid range 0.0 to 0.99) (default: 0.90) │ │ ├── ./usr/share/man/man1/llama-debug.1.gz │ │ │ ├── llama-debug.1 │ │ │ │ @@ -1,13 +1,12 @@ │ │ │ │ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. │ │ │ │ .TH LLAMA-DEBUG "1" "May 2026" "debian" "User Commands" │ │ │ │ .SH NAME │ │ │ │ llama-debug \- llama-debug │ │ │ │ .SH DESCRIPTION │ │ │ │ -load_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ │ │ \fB\-\-\-\-\-\fR common params \fB\-\-\-\-\-\fR │ │ │ │ .PP │ │ │ │ \fB\-h\fR, \fB\-\-help\fR, \fB\-\-usage\fR print usage and exit │ │ │ │ \fB\-\-version\fR show version and build info │ │ │ │ \fB\-\-license\fR show source code license and dependencies │ │ │ │ \fB\-cl\fR, \fB\-\-cache\-list\fR show list of models in cache │ │ │ │ \fB\-\-completion\-bash\fR print source\-able bash completion script for llama.cpp │ │ │ │ @@ -481,21 +480,7 @@ │ │ │ │ (env: LLAMA_ARG_EMBEDDINGS) │ │ │ │ .PP │ │ │ │ \fB\-\-save\-logits\fR save final logits to files for verification (default: false) │ │ │ │ \fB\-\-logits\-output\-dir\fR PATH directory for saving logits output files (default: data) │ │ │ │ \fB\-\-tensor\-filter\fR REGEX filter tensor names for debug output (regex pattern, can be specified │ │ │ │ .IP │ │ │ │ multiple times) │ │ │ │ -.PP │ │ │ │ -example usage: │ │ │ │ -.IP │ │ │ │ -Print tensors: │ │ │ │ -.IP │ │ │ │ -obj\-aarch64\-linux\-gnu/bin/llama\-debug \fB\-m\fR model.gguf \fB\-p\fR "Hello my name is" \fB\-\-verbose\fR │ │ │ │ -.IP │ │ │ │ -The tensors to be printed can be filtered with \fB\-\-tensor\-filter\fR option. │ │ │ │ -.IP │ │ │ │ -Save logits/embeddings: │ │ │ │ -.IP │ │ │ │ -obj\-aarch64\-linux\-gnu/bin/llama\-debug \fB\-m\fR model.gguf \fB\-p\fR "Hello my name is" \fB\-\-save\-logits\fR │ │ │ │ -.IP │ │ │ │ -Add \fB\-\-embedding\fR to save embeddings │ │ ├── ./usr/share/man/man1/llama-diffusion-cli.1.gz │ │ │ ├── llama-diffusion-cli.1 │ │ │ │ @@ -1,13 +1,12 @@ │ │ │ │ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. │ │ │ │ .TH LLAMA-DIFFUSION-CLI "1" "May 2026" "debian" "User Commands" │ │ │ │ .SH NAME │ │ │ │ llama-diffusion-cli \- llama-diffusion-cli │ │ │ │ .SH DESCRIPTION │ │ │ │ -load_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ │ │ \fB\-\-\-\-\-\fR common params \fB\-\-\-\-\-\fR │ │ │ │ .PP │ │ │ │ \fB\-h\fR, \fB\-\-help\fR, \fB\-\-usage\fR print usage and exit │ │ │ │ \fB\-\-version\fR show version and build info │ │ │ │ \fB\-\-license\fR show source code license and dependencies │ │ │ │ \fB\-cl\fR, \fB\-\-cache\-list\fR show list of models in cache │ │ │ │ \fB\-\-completion\-bash\fR print source\-able bash completion script for llama.cpp │ │ ├── ./usr/share/man/man1/llama-embedding.1.gz │ │ │ ├── llama-embedding.1 │ │ │ │ @@ -1,13 +1,12 @@ │ │ │ │ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. │ │ │ │ .TH LLAMA-EMBEDDING "1" "May 2026" "debian" "User Commands" │ │ │ │ .SH NAME │ │ │ │ llama-embedding \- llama-embedding │ │ │ │ .SH DESCRIPTION │ │ │ │ -load_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ │ │ \fB\-\-\-\-\-\fR common params \fB\-\-\-\-\-\fR │ │ │ │ .PP │ │ │ │ \fB\-h\fR, \fB\-\-help\fR, \fB\-\-usage\fR print usage and exit │ │ │ │ \fB\-\-version\fR show version and build info │ │ │ │ \fB\-\-license\fR show source code license and dependencies │ │ │ │ \fB\-cl\fR, \fB\-\-cache\-list\fR show list of models in cache │ │ │ │ \fB\-\-completion\-bash\fR print source\-able bash completion script for llama.cpp │ │ │ │ @@ -81,14 +80,16 @@ │ │ │ │ \fB\-e\fR, \fB\-\-escape\fR, \fB\-\-no\-escape\fR whether to process escapes sequences (\en, \er, \et, \e', \e", \e\e) │ │ │ │ .IP │ │ │ │ (default: true) │ │ │ │ .PP │ │ │ │ \fB\-\-rope\-scaling\fR {none,linear,yarn} RoPE frequency scaling method, defaults to linear unless specified by │ │ │ │ .TP │ │ │ │ the model │ │ │ │ +load_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ │ │ +.IP │ │ │ │ (env: LLAMA_ARG_ROPE_SCALING_TYPE) │ │ │ │ .PP │ │ │ │ \fB\-\-rope\-scale\fR N RoPE context scaling factor, expands context by a factor of N │ │ │ │ .IP │ │ │ │ (env: LLAMA_ARG_ROPE_SCALE) │ │ │ │ .PP │ │ │ │ \fB\-\-rope\-freq\-base\fR N RoPE base frequency, used by NTK\-aware scaling (default: loaded from │ │ ├── ./usr/share/man/man1/llama-eval-callback.1.gz │ │ │ ├── llama-eval-callback.1 │ │ │ │ @@ -1,13 +1,12 @@ │ │ │ │ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. │ │ │ │ .TH LLAMA-EVAL-CALLBACK "1" "May 2026" "debian" "User Commands" │ │ │ │ .SH NAME │ │ │ │ llama-eval-callback \- llama-eval-callback │ │ │ │ .SH DESCRIPTION │ │ │ │ -load_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ │ │ \fB\-\-\-\-\-\fR common params \fB\-\-\-\-\-\fR │ │ │ │ .PP │ │ │ │ \fB\-\-\-\-\-\fR sampling params \fB\-\-\-\-\-\fR │ │ │ │ .PP │ │ │ │ \fB\-\-samplers\fR SAMPLERS samplers that will be used for generation in the order, separated by │ │ │ │ .TP │ │ │ │ \&';' │ │ │ │ @@ -461,7 +460,9 @@ │ │ │ │ .PP │ │ │ │ \fB\-\-spec\-draft\-type\-v\fR, \fB\-ctvd\fR, \fB\-\-cache\-type\-v\-draft\fR TYPE │ │ │ │ .TP │ │ │ │ KV cache data type for V for the draft model │ │ │ │ allowed values: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1 │ │ │ │ (default: f16) │ │ │ │ (env: LLAMA_ARG_SPEC_DRAFT_CACHE_TYPE_V) │ │ │ │ +.PP │ │ │ │ +load_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ ├── ./usr/share/man/man1/llama-finetune.1.gz │ │ │ ├── llama-finetune.1 │ │ │ │ @@ -1,13 +1,12 @@ │ │ │ │ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. │ │ │ │ .TH LLAMA-FINETUNE "1" "May 2026" "debian" "User Commands" │ │ │ │ .SH NAME │ │ │ │ llama-finetune \- llama-finetune │ │ │ │ .SH DESCRIPTION │ │ │ │ -load_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ │ │ \fB\-\-\-\-\-\fR common params \fB\-\-\-\-\-\fR │ │ │ │ .PP │ │ │ │ \fB\-h\fR, \fB\-\-help\fR, \fB\-\-usage\fR print usage and exit │ │ │ │ \fB\-\-version\fR show version and build info │ │ │ │ \fB\-\-license\fR show source code license and dependencies │ │ │ │ \fB\-cl\fR, \fB\-\-cache\-list\fR show list of models in cache │ │ │ │ \fB\-\-completion\-bash\fR print source\-able bash completion script for llama.cpp │ │ ├── ./usr/share/man/man1/llama-idle.1.gz │ │ │ ├── llama-idle.1 │ │ │ │ @@ -1,13 +1,12 @@ │ │ │ │ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. │ │ │ │ .TH LLAMA-IDLE "1" "May 2026" "debian" "User Commands" │ │ │ │ .SH NAME │ │ │ │ llama-idle \- llama-idle │ │ │ │ .SH DESCRIPTION │ │ │ │ -load_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ │ │ \fB\-\-\-\-\-\fR common params \fB\-\-\-\-\-\fR │ │ │ │ .PP │ │ │ │ \fB\-\-\-\-\-\fR sampling params \fB\-\-\-\-\-\fR │ │ │ │ .PP │ │ │ │ \fB\-\-samplers\fR SAMPLERS samplers that will be used for generation in the order, separated by │ │ │ │ .TP │ │ │ │ \&';' │ │ ├── ./usr/share/man/man1/llama-lookup-create.1.gz │ │ │ ├── llama-lookup-create.1 │ │ │ │ @@ -1,13 +1,12 @@ │ │ │ │ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. │ │ │ │ .TH LLAMA-LOOKUP-CREATE "1" "May 2026" "debian" "User Commands" │ │ │ │ .SH NAME │ │ │ │ llama-lookup-create \- llama-lookup-create │ │ │ │ .SH DESCRIPTION │ │ │ │ -load_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ │ │ \fB\-\-\-\-\-\fR common params \fB\-\-\-\-\-\fR │ │ │ │ .PP │ │ │ │ \fB\-h\fR, \fB\-\-help\fR, \fB\-\-usage\fR print usage and exit │ │ │ │ \fB\-\-version\fR show version and build info │ │ │ │ \fB\-\-license\fR show source code license and dependencies │ │ │ │ \fB\-cl\fR, \fB\-\-cache\-list\fR show list of models in cache │ │ │ │ \fB\-\-completion\-bash\fR print source\-able bash completion script for llama.cpp │ │ ├── ./usr/share/man/man1/llama-lookup.1.gz │ │ │ ├── llama-lookup.1 │ │ │ │ @@ -1,13 +1,12 @@ │ │ │ │ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. │ │ │ │ .TH LLAMA-LOOKUP "1" "May 2026" "debian" "User Commands" │ │ │ │ .SH NAME │ │ │ │ llama-lookup \- llama-lookup │ │ │ │ .SH DESCRIPTION │ │ │ │ -load_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ │ │ \fB\-\-\-\-\-\fR common params \fB\-\-\-\-\-\fR │ │ │ │ .PP │ │ │ │ \fB\-h\fR, \fB\-\-help\fR, \fB\-\-usage\fR print usage and exit │ │ │ │ \fB\-\-version\fR show version and build info │ │ │ │ \fB\-\-license\fR show source code license and dependencies │ │ │ │ \fB\-cl\fR, \fB\-\-cache\-list\fR show list of models in cache │ │ │ │ \fB\-\-completion\-bash\fR print source\-able bash completion script for llama.cpp │ │ │ │ @@ -81,14 +80,16 @@ │ │ │ │ \fB\-e\fR, \fB\-\-escape\fR, \fB\-\-no\-escape\fR whether to process escapes sequences (\en, \er, \et, \e', \e", \e\e) │ │ │ │ .IP │ │ │ │ (default: true) │ │ │ │ .PP │ │ │ │ \fB\-\-rope\-scaling\fR {none,linear,yarn} RoPE frequency scaling method, defaults to linear unless specified by │ │ │ │ .TP │ │ │ │ the model │ │ │ │ +load_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ │ │ +.IP │ │ │ │ (env: LLAMA_ARG_ROPE_SCALING_TYPE) │ │ │ │ .PP │ │ │ │ \fB\-\-rope\-scale\fR N RoPE context scaling factor, expands context by a factor of N │ │ │ │ .IP │ │ │ │ (env: LLAMA_ARG_ROPE_SCALE) │ │ │ │ .PP │ │ │ │ \fB\-\-rope\-freq\-base\fR N RoPE base frequency, used by NTK\-aware scaling (default: loaded from │ │ ├── ./usr/share/man/man1/llama-parallel.1.gz │ │ │ ├── llama-parallel.1 │ │ │ │ @@ -1,13 +1,12 @@ │ │ │ │ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. │ │ │ │ .TH LLAMA-PARALLEL "1" "May 2026" "debian" "User Commands" │ │ │ │ .SH NAME │ │ │ │ llama-parallel \- llama-parallel │ │ │ │ .SH DESCRIPTION │ │ │ │ -load_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ │ │ \fB\-\-\-\-\-\fR common params \fB\-\-\-\-\-\fR │ │ │ │ .PP │ │ │ │ \fB\-h\fR, \fB\-\-help\fR, \fB\-\-usage\fR print usage and exit │ │ │ │ \fB\-\-version\fR show version and build info │ │ │ │ \fB\-\-license\fR show source code license and dependencies │ │ │ │ \fB\-cl\fR, \fB\-\-cache\-list\fR show list of models in cache │ │ │ │ \fB\-\-completion\-bash\fR print source\-able bash completion script for llama.cpp │ │ │ │ @@ -81,14 +80,16 @@ │ │ │ │ \fB\-e\fR, \fB\-\-escape\fR, \fB\-\-no\-escape\fR whether to process escapes sequences (\en, \er, \et, \e', \e", \e\e) │ │ │ │ .IP │ │ │ │ (default: true) │ │ │ │ .PP │ │ │ │ \fB\-\-rope\-scaling\fR {none,linear,yarn} RoPE frequency scaling method, defaults to linear unless specified by │ │ │ │ .TP │ │ │ │ the model │ │ │ │ +load_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ │ │ +.IP │ │ │ │ (env: LLAMA_ARG_ROPE_SCALING_TYPE) │ │ │ │ .PP │ │ │ │ \fB\-\-rope\-scale\fR N RoPE context scaling factor, expands context by a factor of N │ │ │ │ .IP │ │ │ │ (env: LLAMA_ARG_ROPE_SCALE) │ │ │ │ .PP │ │ │ │ \fB\-\-rope\-freq\-base\fR N RoPE base frequency, used by NTK\-aware scaling (default: loaded from │ │ ├── ./usr/share/man/man1/llama-passkey.1.gz │ │ │ ├── llama-passkey.1 │ │ │ │ @@ -1,13 +1,12 @@ │ │ │ │ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. │ │ │ │ .TH LLAMA-PASSKEY "1" "May 2026" "debian" "User Commands" │ │ │ │ .SH NAME │ │ │ │ llama-passkey \- llama-passkey │ │ │ │ .SH DESCRIPTION │ │ │ │ -load_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ │ │ \fB\-\-\-\-\-\fR common params \fB\-\-\-\-\-\fR │ │ │ │ .PP │ │ │ │ \fB\-h\fR, \fB\-\-help\fR, \fB\-\-usage\fR print usage and exit │ │ │ │ \fB\-\-version\fR show version and build info │ │ │ │ \fB\-\-license\fR show source code license and dependencies │ │ │ │ \fB\-cl\fR, \fB\-\-cache\-list\fR show list of models in cache │ │ │ │ \fB\-\-completion\-bash\fR print source\-able bash completion script for llama.cpp │ │ │ │ @@ -413,15 +412,16 @@ │ │ │ │ context size) │ │ │ │ .PP │ │ │ │ \fB\-\-dry\-sequence\-breaker\fR STRING add sequence breaker for DRY sampling, clearing out default breakers │ │ │ │ .TP │ │ │ │ ('\en', ':', '"', '*') in the process; use "none" to not use any │ │ │ │ sequence breakers │ │ │ │ .PP │ │ │ │ -\fB\-\-adaptive\-target\fR N adaptive\-p: select tokens near this probability (valid range 0.0 to │ │ │ │ +\fB\-\-adaptive\-target\fR load_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ │ │ +N adaptive\-p: select tokens near this probability (valid range 0.0 to │ │ │ │ .TP │ │ │ │ 1.0; negative = disabled) (default: \fB\-1\fR.00) │ │ │ │ [(more info)](https://github.com/ggml\-org/llama.cpp/pull/17927) │ │ │ │ .PP │ │ │ │ \fB\-\-adaptive\-decay\fR N adaptive\-p: decay rate for target adaptation over time. lower values │ │ │ │ .TP │ │ │ │ are more reactive, higher values are more stable. │ │ ├── ./usr/share/man/man1/llama-retrieval.1.gz │ │ │ ├── llama-retrieval.1 │ │ │ │ @@ -477,7 +477,11 @@ │ │ │ │ .IP │ │ │ │ multiple files) │ │ │ │ .PP │ │ │ │ \fB\-\-chunk\-size\fR N minimum length of embedded text chunks (default: 64) │ │ │ │ \fB\-\-chunk\-separator\fR STRING separator between chunks (default: ' │ │ │ │ .IP │ │ │ │ \&') │ │ │ │ +.PP │ │ │ │ +example usage: │ │ │ │ +.IP │ │ │ │ +obj\-aarch64\-linux\-gnu/bin/llama\-retrieval \fB\-\-model\fR ./models/bge\-base\-en\-v1.5\-f16.gguf \fB\-\-top\-k\fR 3 \fB\-\-context\-file\fR README.md \fB\-\-context\-file\fR License \fB\-\-chunk\-size\fR 100 \fB\-\-chunk\-separator\fR . │ │ ├── ./usr/share/man/man1/llama-save-load-state.1.gz │ │ │ ├── llama-save-load-state.1 │ │ │ │ @@ -1,13 +1,12 @@ │ │ │ │ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. │ │ │ │ .TH LLAMA-SAVE-LOAD-STATE "1" "May 2026" "debian" "User Commands" │ │ │ │ .SH NAME │ │ │ │ llama-save-load-state \- llama-save-load-state │ │ │ │ .SH DESCRIPTION │ │ │ │ -load_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ │ │ \fB\-\-\-\-\-\fR common params \fB\-\-\-\-\-\fR │ │ │ │ .PP │ │ │ │ \fB\-\-\-\-\-\fR sampling params \fB\-\-\-\-\-\fR │ │ │ │ .PP │ │ │ │ \fB\-\-samplers\fR SAMPLERS samplers that will be used for generation in the order, separated by │ │ │ │ .TP │ │ │ │ \&';' │ │ ├── ./usr/share/man/man1/llama-speculative-simple.1.gz │ │ │ ├── llama-speculative-simple.1 │ │ │ │ @@ -1,13 +1,12 @@ │ │ │ │ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. │ │ │ │ .TH LLAMA-SPECULATIVE-SIMPLE "1" "May 2026" "debian" "User Commands" │ │ │ │ .SH NAME │ │ │ │ llama-speculative-simple \- llama-speculative-simple │ │ │ │ .SH DESCRIPTION │ │ │ │ -load_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ │ │ \fB\-\-\-\-\-\fR common params \fB\-\-\-\-\-\fR │ │ │ │ .PP │ │ │ │ \fB\-h\fR, \fB\-\-help\fR, \fB\-\-usage\fR print usage and exit │ │ │ │ \fB\-\-version\fR show version and build info │ │ │ │ \fB\-\-license\fR show source code license and dependencies │ │ │ │ \fB\-cl\fR, \fB\-\-cache\-list\fR show list of models in cache │ │ │ │ \fB\-\-completion\-bash\fR print source\-able bash completion script for llama.cpp │ │ ├── ./usr/share/man/man1/llama-speculative.1.gz │ │ │ ├── llama-speculative.1 │ │ │ │ @@ -1,13 +1,12 @@ │ │ │ │ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. │ │ │ │ .TH LLAMA-SPECULATIVE "1" "May 2026" "debian" "User Commands" │ │ │ │ .SH NAME │ │ │ │ llama-speculative \- llama-speculative │ │ │ │ .SH DESCRIPTION │ │ │ │ -load_backend: loaded CPU backend from \fI\,/usr/lib/aarch64\-linux\-gnu/ggml/backends0/libggml\-cpu\-armv8.2_2.so\/\fP │ │ │ │ \fB\-\-\-\-\-\fR common params \fB\-\-\-\-\-\fR │ │ │ │ .PP │ │ │ │ \fB\-h\fR, \fB\-\-help\fR, \fB\-\-usage\fR print usage and exit │ │ │ │ \fB\-\-version\fR show version and build info │ │ │ │ \fB\-\-license\fR show source code license and dependencies │ │ │ │ \fB\-cl\fR, \fB\-\-cache\-list\fR show list of models in cache │ │ │ │ \fB\-\-completion\-bash\fR print source\-able bash completion script for llama.cpp