Currently for sampling we are using an internal interface for the llama.cpp examples, which tends to change from release to release. This is the only such interface used for text models, though llava and clip are also used for image processing. This switches to use the stable interfaces, reducing the amount of work needed for future llama.cpp bumps. It also significantly reduces the amount of code that we need to vendor (much of it is unused but is a dependency). The sampling logic is the same as it is now for the parameters that we support and is done at the CGo layer. However, in the future if there are benefits to reconfiguring it then we can expose the primatives to native Go code.
179 lines
7.1 KiB
Makefile
179 lines
7.1 KiB
Makefile
# Helpers for managing our vendored llama.cpp repo and patch set
|
|
|
|
REPO_ROOT:=$(dir $(patsubst %/,%,$(dir $(patsubst %/,%,$(dir $(abspath $(lastword $(MAKEFILE_LIST))))))))
|
|
DST_DIR:=$(dir $(patsubst %/,%,$(dir $(abspath $(lastword $(MAKEFILE_LIST))))))
|
|
|
|
include $(REPO_ROOT)llama/vendoring
|
|
|
|
LLAMACPP_REPO := $(REPO_ROOT)llama/vendor/
|
|
|
|
LLAMACPP_PATCH_DIR := $(DST_DIR)patches/
|
|
|
|
|
|
help-sync:
|
|
@echo "The following make targets will help you update llama.cpp to a new base commit, or work on new features/fixes"
|
|
@echo ""
|
|
@echo "\tmake apply-patches # Establish the tracking repo if not already present, reset to the base commit, and apply our patch set"
|
|
@echo "\tmake sync # Vendor llama.cpp and ggml from the tracking repo working tree"
|
|
@echo "\tmake create-patches # Generate the patch set based on the current commits in the tracking repo since the base commit"
|
|
@echo ""
|
|
@echo "For more details on the workflow, see the Vendoring section in ../docs/development.md"
|
|
|
|
apply-patches: $(LLAMACPP_REPO)
|
|
@if ! git -C $(LLAMACPP_REPO) --no-pager diff --exit-code ; then \
|
|
echo "ERROR: Your llama.cpp repo is dirty. The apply-patches target requires a clean working tree"; \
|
|
echo "To clobber: git -C $(LLAMACPP_REPO) reset --hard HEAD" ; \
|
|
exit 1; \
|
|
fi
|
|
@echo "Checking out $(LLAMACPP_BASE_COMMIT)"
|
|
@git -C $(LLAMACPP_REPO) checkout -q $(LLAMACPP_BASE_COMMIT) || \
|
|
git -C $(LLAMACPP_REPO) fetch --all && git -C $(LLAMACPP_REPO) checkout -q $(LLAMACPP_BASE_COMMIT)
|
|
@echo "Applying ollama patches..."
|
|
@git -c 'user.name=nobody' -c 'user.email=<>' -C $(LLAMACPP_REPO) am -3 $(LLAMACPP_PATCH_DIR)/*.patch || \
|
|
echo "Please resolve the conflicts in $(LLAMACPP_REPO), and run 'git am --continue' to continue applying subsequent patches"
|
|
@echo ""
|
|
@echo "The tracking repo $(LLAMACPP_REPO) is now in a detached state with all patches applied."
|
|
@echo "Don't forget to commit any changes you make and run 'make create-patches' "
|
|
|
|
$(LLAMACPP_REPO):
|
|
@echo "Cloning llama.cpp to $(LLAMACPP_REPO)"
|
|
git clone https://github.com/ggerganov/llama.cpp.git $@
|
|
|
|
create-patches: $(LLAMACPP_REPO)
|
|
@if ! git -C $(LLAMACPP_REPO) --no-pager diff --exit-code ; then \
|
|
echo "ERROR: Your llama.cpp repo is dirty. You must commit any pending changes for format-patch to generate patches"; \
|
|
exit 1; \
|
|
fi
|
|
git -C $(LLAMACPP_REPO) format-patch --no-signature --no-numbered --zero-commit -o $(LLAMACPP_PATCH_DIR) $(LLAMACPP_BASE_COMMIT)
|
|
|
|
# Vendoring template logic
|
|
EXCLUDED_FILES=sgemm.cpp sgemm.h stb_image.h json.hpp llama_darwin.c base64.hpp
|
|
OLLAMA_NATIVE_FILES=mllama.cpp mllama.h llama_darwin.c
|
|
define vendor_file
|
|
$(strip $(addprefix $(2),$(notdir $1))) : $(addprefix $(LLAMACPP_REPO),$(1))
|
|
ifneq ($$(filter-out $(EXCLUDED_FILES),$(notdir $1)),)
|
|
@echo "vendoring $1"; \
|
|
mkdir -p $$(dir $$@) && \
|
|
echo "/**" > $$@ && \
|
|
echo " * llama.cpp - commit $$(LLAMACPP_BASE_COMMIT) - do not edit this file" >> $$@ && \
|
|
echo " *" >> $$@ && \
|
|
sed 's/^/ * /' <$(LLAMACPP_REPO)/LICENSE | sed 's/ *$$$$//' >> $$@ && \
|
|
echo " */" >> $$@ && \
|
|
echo "" >> $$@ && \
|
|
cat $$< >> $$@
|
|
else
|
|
@echo "vendoring $1"; \
|
|
mkdir -p $$(dir $$@) && \
|
|
cat $$< > $$@
|
|
endif
|
|
VENDORED_FILES += $(strip $(addprefix $(2),$(notdir $1)))
|
|
endef
|
|
|
|
# llama.cpp files -> llama/
|
|
LLAMACPP_FILES=\
|
|
src/unicode.cpp \
|
|
src/unicode.h \
|
|
src/unicode-data.cpp \
|
|
src/unicode-data.h \
|
|
src/llama.cpp \
|
|
src/llama-impl.h \
|
|
src/llama-vocab.cpp \
|
|
src/llama-vocab.h \
|
|
src/llama-grammar.cpp \
|
|
src/llama-grammar.h \
|
|
src/llama-sampling.cpp \
|
|
src/llama-sampling.h \
|
|
include/llama.h \
|
|
ggml/src/llamafile/sgemm.cpp \
|
|
ggml/src/llamafile/sgemm.h
|
|
$(foreach name,$(LLAMACPP_FILES),$(eval $(call vendor_file,$(name),$(DST_DIR))))
|
|
|
|
# llama.cpp files -> llama/llamafile
|
|
LLAMAFILE_FILES= \
|
|
ggml/src/llamafile/sgemm.h
|
|
$(foreach name,$(LLAMAFILE_FILES),$(eval $(call vendor_file,$(name),$(DST_DIR)llamafile/)))
|
|
|
|
# ggml files -> llama/
|
|
GGML_FILES= \
|
|
ggml/src/ggml.c \
|
|
ggml/include/ggml.h \
|
|
ggml/src/ggml-quants.c \
|
|
ggml/src/ggml-quants.h \
|
|
ggml/src/ggml-metal.metal \
|
|
ggml/include/ggml-metal.h \
|
|
ggml/src/ggml-impl.h \
|
|
ggml/include/ggml-cuda.h \
|
|
ggml/src/ggml-cuda.cu \
|
|
ggml/src/ggml-common.h \
|
|
ggml/include/ggml-backend.h \
|
|
ggml/src/ggml-backend.c \
|
|
ggml/src/ggml-backend-impl.h \
|
|
ggml/include/ggml-alloc.h \
|
|
ggml/src/ggml-alloc.c \
|
|
ggml/src/ggml-aarch64.h \
|
|
ggml/src/ggml-aarch64.c \
|
|
ggml/src/ggml-cpu-impl.h \
|
|
ggml/include/ggml-blas.h \
|
|
ggml/src/ggml-blas.cpp
|
|
$(foreach name,$(GGML_FILES),$(eval $(call vendor_file,$(name),$(DST_DIR))))
|
|
|
|
# TODO generalize renaming pattern if we have more of these
|
|
$(DST_DIR)ggml-metal_darwin_arm64.m : $(LLAMACPP_REPO)ggml/src/ggml-metal.m
|
|
@echo "vendoring $(subst $(LLAMACPP_REPO),,$<)"; \
|
|
mkdir -p $(dir $@) && \
|
|
echo "/**" > $@ && \
|
|
echo " * llama.cpp - commit $(LLAMACPP_BASE_COMMIT) - do not edit this file" >> $@ && \
|
|
echo " *" >> $@ && \
|
|
sed 's/^/ * /' <$(LLAMACPP_REPO)/LICENSE | sed 's/ *$$//' >> $@ && \
|
|
echo " */" >> $@ && \
|
|
echo "" >> $@ && \
|
|
cat $< >> $@
|
|
VENDORED_FILES += $(DST_DIR)ggml-metal_darwin_arm64.m
|
|
|
|
# ggml-cuda -> llama/ggml-cuda/
|
|
GGML_CUDA_FILES= ggml/src/ggml-cuda/*.cu ggml/src/ggml-cuda/*.cuh
|
|
GGML_CUDA_FILES_EXPANDED = $(addprefix ggml/src/ggml-cuda/,$(notdir $(wildcard $(addprefix $(LLAMACPP_REPO),$(GGML_CUDA_FILES)))))
|
|
$(foreach name,$(GGML_CUDA_FILES_EXPANDED),$(eval $(call vendor_file,$(name),$(DST_DIR)ggml-cuda/)))
|
|
|
|
GGML_TEMPLATE_FILES= ggml/src/ggml-cuda/template-instances/*.cu
|
|
GGML_TEMPLATE_FILES_EXPANDED = $(addprefix ggml/src/ggml-cuda/template-instances/,$(notdir $(wildcard $(addprefix $(LLAMACPP_REPO),$(GGML_TEMPLATE_FILES)))))
|
|
$(foreach name,$(GGML_TEMPLATE_FILES_EXPANDED),$(eval $(call vendor_file,$(name),$(DST_DIR)ggml-cuda/template-instances/)))
|
|
|
|
GGML_VENDOR_FILES= ggml/src/ggml-cuda/vendors/*.h
|
|
GGML_VENDOR_FILES_EXPANDED=$(addprefix ggml/src/ggml-cuda/vendors/,$(notdir $(wildcard $(addprefix $(LLAMACPP_REPO),$(GGML_VENDOR_FILES)))))
|
|
$(foreach name,$(GGML_VENDOR_FILES_EXPANDED),$(eval $(call vendor_file,$(name),$(DST_DIR)ggml-cuda/vendors/)))
|
|
|
|
# llava -> llama/
|
|
LAVA_FILES= \
|
|
examples/llava/clip.cpp \
|
|
examples/llava/clip.h \
|
|
examples/llava/llava.cpp \
|
|
examples/llava/llava.h \
|
|
common/stb_image.h
|
|
$(foreach name,$(LAVA_FILES),$(eval $(call vendor_file,$(name),$(DST_DIR))))
|
|
|
|
$(DST_DIR)build-info.cpp:
|
|
@echo "Generating $@"
|
|
@echo "int LLAMA_BUILD_NUMBER = 0;" > $@
|
|
@echo "char const *LLAMA_COMMIT = \"$(LLAMACPP_BASE_COMMIT)\";" >> $@
|
|
@echo "char const *LLAMA_COMPILER = \"\";" >> $@
|
|
@echo "char const *LLAMA_BUILD_TARGET = \"\";" >> $@
|
|
VENDORED_FILES += $(DST_DIR)build-info.cpp
|
|
|
|
|
|
sync: $(LLAMACPP_REPO) .WAIT $(VENDORED_FILES) .WAIT remove-stale-files
|
|
|
|
PATS=*.c *.h *.cpp *.m *.metal *.cu *.cuh
|
|
NATIVE_DIRS=$(DST_DIR) $(DST_DIR)llamafile/ $(DST_DIR)ggml-cuda/ $(DST_DIR)ggml-cuda/template-instances/ $(DST_DIR)ggml-cuda/vendors/
|
|
ALL_NATIVE_FILES=$(foreach dir,$(NATIVE_DIRS),$(wildcard $(addprefix $(dir),$(PATS))))
|
|
EXTRA_NATIVE_FILES=$(filter-out $(VENDORED_FILES) $(addprefix $(DST_DIR),$(OLLAMA_NATIVE_FILES)), $(ALL_NATIVE_FILES))
|
|
remove-stale-files:
|
|
@rm -f $(EXTRA_NATIVE_FILES)
|
|
|
|
.PHONY: help-sync apply-patches sync create-patches remove-stale-fails .WAIT
|
|
|
|
|
|
# Handy debugging for make variables
|
|
print-%:
|
|
@echo '$*=$($*)'
|