From ba0ff1c46a72109b2a492cd42f9710d3f9c02348 Mon Sep 17 00:00:00 2001 From: jmorganca Date: Sun, 9 Jun 2024 19:49:22 -0400 Subject: [PATCH] fix cuda and rocm builds --- llama/.gitignore | 1 + llama/base64.hpp | 784 +- llama/build-info.cpp | 2 +- llama/build_cuda.sh | 15 +- llama/build_hipblas.sh | 92 +- llama/clip.cpp | 2 +- llama/clip.h | 2 +- llama/common.cpp | 32 +- llama/common.h | 5 +- llama/ggml-alloc.c | 2 +- llama/ggml-alloc.h | 154 +- llama/ggml-backend-impl.h | 284 +- llama/ggml-backend.c | 2 +- llama/ggml-backend.h | 2 +- llama/ggml-common.h | 2 +- llama/ggml-cuda.cu | 90 +- llama/ggml-cuda.h | 2 +- llama/ggml-cuda/acc.cu | 94 +- llama/ggml-cuda/acc.cuh | 10 +- llama/ggml-cuda/arange.cu | 68 +- llama/ggml-cuda/arange.cuh | 10 +- llama/ggml-cuda/argsort.cu | 206 +- llama/ggml-cuda/argsort.cuh | 6 +- llama/ggml-cuda/binbcast.cu | 560 +- llama/ggml-cuda/binbcast.cuh | 12 +- llama/ggml-cuda/clamp.cuh | 10 +- llama/ggml-cuda/concat.cuh | 10 +- llama/ggml-cuda/convert.cuh | 26 +- llama/ggml-cuda/dequantize.cuh | 206 +- llama/ggml-cuda/diagmask.cu | 80 +- llama/ggml-cuda/diagmask.cuh | 10 +- llama/ggml-cuda/dmmv.cuh | 36 +- llama/ggml-cuda/fattn-vec-f16.cu | 326 - llama/ggml-cuda/fattn-vec-f32.cu | 275 - llama/ggml-cuda/getrows.cu | 356 +- llama/ggml-cuda/getrows.cuh | 10 +- llama/ggml-cuda/im2col.cu | 208 +- llama/ggml-cuda/im2col.cuh | 10 +- llama/ggml-cuda/mmq.cu | 3 +- llama/ggml-cuda/mmq.cuh | 236 +- llama/ggml-cuda/mmvq.cuh | 14 +- llama/ggml-cuda/norm.cuh | 14 +- llama/ggml-cuda/pad.cu | 98 +- llama/ggml-cuda/pad.cuh | 10 +- llama/ggml-cuda/pool2d.cu | 188 +- llama/ggml-cuda/pool2d.cuh | 10 +- llama/ggml-cuda/quantize.cu | 89 +- llama/ggml-cuda/quantize.cuh | 17 +- llama/ggml-cuda/rope.cuh | 10 +- llama/ggml-cuda/scale.cuh | 10 +- llama/ggml-cuda/softmax.cuh | 10 +- llama/ggml-cuda/sumrows.cu | 80 +- llama/ggml-cuda/sumrows.cuh | 6 +- .../fattn-vec-f16-instance-hs128-f16-f16.cu | 5 + .../fattn-vec-f16-instance-hs128-f16-q4_0.cu | 5 + .../fattn-vec-f16-instance-hs128-f16-q4_1.cu | 5 + .../fattn-vec-f16-instance-hs128-f16-q5_0.cu | 5 + .../fattn-vec-f16-instance-hs128-f16-q5_1.cu | 5 + .../fattn-vec-f16-instance-hs128-f16-q8_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_0-f16.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_0-q4_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_0-q4_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_0-q5_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_0-q5_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_0-q8_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_1-f16.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_1-q4_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_1-q4_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_1-q5_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_1-q5_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_1-q8_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_0-f16.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_0-q4_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_0-q4_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_0-q5_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_0-q5_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_0-q8_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_1-f16.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_1-q4_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_1-q4_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_1-q5_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_1-q5_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_1-q8_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q8_0-f16.cu | 5 + .../fattn-vec-f16-instance-hs128-q8_0-q4_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q8_0-q4_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q8_0-q5_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q8_0-q5_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q8_0-q8_0.cu | 5 + .../fattn-vec-f16-instance-hs256-f16-f16.cu | 5 + .../fattn-vec-f16-instance-hs64-f16-f16.cu | 5 + .../fattn-vec-f16-instance-hs64-f16-q4_0.cu | 5 + .../fattn-vec-f16-instance-hs64-f16-q4_1.cu | 5 + .../fattn-vec-f16-instance-hs64-f16-q5_0.cu | 5 + .../fattn-vec-f16-instance-hs64-f16-q5_1.cu | 5 + .../fattn-vec-f16-instance-hs64-f16-q8_0.cu | 5 + .../fattn-vec-f32-instance-hs128-f16-f16.cu | 5 + .../fattn-vec-f32-instance-hs128-f16-q4_0.cu | 5 + .../fattn-vec-f32-instance-hs128-f16-q4_1.cu | 5 + .../fattn-vec-f32-instance-hs128-f16-q5_0.cu | 5 + .../fattn-vec-f32-instance-hs128-f16-q5_1.cu | 5 + .../fattn-vec-f32-instance-hs128-f16-q8_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_0-f16.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_0-q4_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_0-q4_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_0-q5_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_0-q5_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_0-q8_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_1-f16.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_1-q4_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_1-q4_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_1-q5_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_1-q5_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_1-q8_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_0-f16.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_0-q4_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_0-q4_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_0-q5_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_0-q5_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_0-q8_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_1-f16.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_1-q4_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_1-q4_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_1-q5_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_1-q5_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_1-q8_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q8_0-f16.cu | 5 + .../fattn-vec-f32-instance-hs128-q8_0-q4_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q8_0-q4_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q8_0-q5_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q8_0-q5_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q8_0-q8_0.cu | 5 + .../fattn-vec-f32-instance-hs256-f16-f16.cu | 5 + .../fattn-vec-f32-instance-hs64-f16-f16.cu | 5 + .../fattn-vec-f32-instance-hs64-f16-q4_0.cu | 5 + .../fattn-vec-f32-instance-hs64-f16-q4_1.cu | 5 + .../fattn-vec-f32-instance-hs64-f16-q5_0.cu | 5 + .../fattn-vec-f32-instance-hs64-f16-q5_1.cu | 5 + .../fattn-vec-f32-instance-hs64-f16-q8_0.cu | 5 + .../fattn-wmma-f16-instance-kqfloat-cpb16.cu | 10 + .../fattn-wmma-f16-instance-kqfloat-cpb32.cu | 9 + .../fattn-wmma-f16-instance-kqhalf-cpb16.cu | 10 + .../fattn-wmma-f16-instance-kqhalf-cpb32.cu | 10 + .../fattn-wmma-f16-instance-kqhalf-cpb8.cu | 8 + .../template-instances/mmq-instance-q2_k.cu | 5 + .../template-instances/mmq-instance-q3_k.cu | 5 + .../template-instances/mmq-instance-q4_0.cu | 5 + .../template-instances/mmq-instance-q4_1.cu | 5 + .../template-instances/mmq-instance-q4_k.cu | 5 + .../template-instances/mmq-instance-q5_0.cu | 5 + .../template-instances/mmq-instance-q5_1.cu | 5 + .../template-instances/mmq-instance-q5_k.cu | 5 + .../template-instances/mmq-instance-q6_k.cu | 5 + .../template-instances/mmq-instance-q8_0.cu | 5 + llama/ggml-cuda/tsembd.cu | 94 +- llama/ggml-cuda/tsembd.cuh | 10 +- llama/ggml-cuda/upscale.cuh | 10 +- llama/ggml-impl.h | 2 +- llama/ggml-metal-darwin_arm64.m | 2 +- llama/ggml-metal.h | 2 +- llama/ggml-metal.metal | 2 +- llama/ggml-metal.o | Bin 373184 -> 0 bytes llama/ggml-quants.c | 2 +- llama/ggml-quants.h | 268 +- llama/ggml.c | 2 +- llama/ggml.h | 2 +- llama/grammar-parser.cpp | 2 +- llama/grammar-parser.h | 60 +- llama/json-schema-to-grammar.cpp | 2 +- llama/json-schema-to-grammar.h | 2 +- llama/json.hpp | 49532 ++++++++-------- llama/llama.cpp | 2 +- llama/llama.h | 2 +- llama/llava.cpp | 2 +- llama/llava.h | 102 +- llama/log.h | 2 +- llama/sampling.cpp | 2 +- llama/sampling.h | 2 +- llama/stb_image.h | 16794 +++--- llama/sync_llama.sh | 6 +- llama/unicode-data.cpp | 2 +- llama/unicode-data.h | 2 +- llama/unicode.cpp | 2 +- llama/unicode.h | 2 +- 184 files changed, 36175 insertions(+), 36065 deletions(-) delete mode 100644 llama/ggml-cuda/fattn-vec-f16.cu delete mode 100644 llama/ggml-cuda/fattn-vec-f32.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu create mode 100644 llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu create mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q2_k.cu create mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q3_k.cu create mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q4_0.cu create mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q4_1.cu create mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q4_k.cu create mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q5_0.cu create mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q5_1.cu create mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q5_k.cu create mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q6_k.cu create mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q8_0.cu delete mode 100644 llama/ggml-metal.o diff --git a/llama/.gitignore b/llama/.gitignore index 9710205b..4204cdea 100644 --- a/llama/.gitignore +++ b/llama/.gitignore @@ -3,3 +3,4 @@ *.lib *.exp *.dll +*.o \ No newline at end of file diff --git a/llama/base64.hpp b/llama/base64.hpp index 563247a6..04df58e8 100644 --- a/llama/base64.hpp +++ b/llama/base64.hpp @@ -1,392 +1,392 @@ -/* -This is free and unencumbered software released into the public domain. - -Anyone is free to copy, modify, publish, use, compile, sell, or -distribute this software, either in source code form or as a compiled -binary, for any purpose, commercial or non-commercial, and by any -means. - -In jurisdictions that recognize copyright laws, the author or authors -of this software dedicate any and all copyright interest in the -software to the public domain. We make this dedication for the benefit -of the public at large and to the detriment of our heirs and -successors. We intend this dedication to be an overt act of -relinquishment in perpetuity of all present and future rights to this -software under copyright law. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. - -For more information, please refer to -*/ - -#ifndef PUBLIC_DOMAIN_BASE64_HPP_ -#define PUBLIC_DOMAIN_BASE64_HPP_ - -#include -#include -#include -#include - -class base64_error : public std::runtime_error -{ -public: - using std::runtime_error::runtime_error; -}; - -class base64 -{ -public: - enum class alphabet - { - /** the alphabet is detected automatically */ - auto_, - /** the standard base64 alphabet is used */ - standard, - /** like `standard` except that the characters `+` and `/` are replaced by `-` and `_` respectively*/ - url_filename_safe - }; - - enum class decoding_behavior - { - /** if the input is not padded, the remaining bits are ignored */ - moderate, - /** if a padding character is encounter decoding is finished */ - loose - }; - - /** - Encodes all the elements from `in_begin` to `in_end` to `out`. - - @warning The source and destination cannot overlap. The destination must be able to hold at least - `required_encode_size(std::distance(in_begin, in_end))`, otherwise the behavior depends on the output iterator. - - @tparam Input_iterator the source; the returned elements are cast to `std::uint8_t` and should not be greater than - 8 bits - @tparam Output_iterator the destination; the elements written to it are from the type `char` - @param in_begin the beginning of the source - @param in_end the ending of the source - @param out the destination iterator - @param alphabet which alphabet should be used - @returns the iterator to the next element past the last element copied - @throws see `Input_iterator` and `Output_iterator` - */ - template - static Output_iterator encode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out, - alphabet alphabet = alphabet::standard) - { - constexpr auto pad = '='; - const char* alpha = alphabet == alphabet::url_filename_safe - ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" - : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - - while (in_begin != in_end) { - std::uint8_t i0 = 0, i1 = 0, i2 = 0; - - // first character - i0 = static_cast(*in_begin); - ++in_begin; - - *out = alpha[i0 >> 2 & 0x3f]; - ++out; - - // part of first character and second - if (in_begin != in_end) { - i1 = static_cast(*in_begin); - ++in_begin; - - *out = alpha[((i0 & 0x3) << 4) | (i1 >> 4 & 0x0f)]; - ++out; - } else { - *out = alpha[(i0 & 0x3) << 4]; - ++out; - - // last padding - *out = pad; - ++out; - - // last padding - *out = pad; - ++out; - - break; - } - - // part of second character and third - if (in_begin != in_end) { - i2 = static_cast(*in_begin); - ++in_begin; - - *out = alpha[((i1 & 0xf) << 2) | (i2 >> 6 & 0x03)]; - ++out; - } else { - *out = alpha[(i1 & 0xf) << 2]; - ++out; - - // last padding - *out = pad; - ++out; - - break; - } - - // rest of third - *out = alpha[i2 & 0x3f]; - ++out; - } - - return out; - } - /** - Encodes a string. - - @param str the string that should be encoded - @param alphabet which alphabet should be used - @returns the encoded base64 string - @throws see base64::encode() - */ - static std::string encode(const std::string& str, alphabet alphabet = alphabet::standard) - { - std::string result; - - result.reserve(required_encode_size(str.length()) + 1); - - encode(str.begin(), str.end(), std::back_inserter(result), alphabet); - - return result; - } - /** - Encodes a char array. - - @param buffer the char array - @param size the size of the array - @param alphabet which alphabet should be used - @returns the encoded string - */ - static std::string encode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::standard) - { - std::string result; - - result.reserve(required_encode_size(size) + 1); - - encode(buffer, buffer + size, std::back_inserter(result), alphabet); - - return result; - } - /** - Decodes all the elements from `in_begin` to `in_end` to `out`. `in_begin` may point to the same location as `out`, - in other words: inplace decoding is possible. - - @warning The destination must be able to hold at least `required_decode_size(std::distance(in_begin, in_end))`, - otherwise the behavior depends on the output iterator. - - @tparam Input_iterator the source; the returned elements are cast to `char` - @tparam Output_iterator the destination; the elements written to it are from the type `std::uint8_t` - @param in_begin the beginning of the source - @param in_end the ending of the source - @param out the destination iterator - @param alphabet which alphabet should be used - @param behavior the behavior when an error was detected - @returns the iterator to the next element past the last element copied - @throws base64_error depending on the set behavior - @throws see `Input_iterator` and `Output_iterator` - */ - template - static Output_iterator decode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out, - alphabet alphabet = alphabet::auto_, - decoding_behavior behavior = decoding_behavior::moderate) - { - //constexpr auto pad = '='; - std::uint8_t last = 0; - auto bits = 0; - - while (in_begin != in_end) { - auto c = *in_begin; - ++in_begin; - - if (c == '=') { - break; - } - - auto part = _base64_value(alphabet, c); - - // enough bits for one byte - if (bits + 6 >= 8) { - *out = (last << (8 - bits)) | (part >> (bits - 2)); - ++out; - - bits -= 2; - } else { - bits += 6; - } - - last = part; - } - - // check padding - if (behavior != decoding_behavior::loose) { - while (in_begin != in_end) { - auto c = *in_begin; - ++in_begin; - - if (c != '=') { - throw base64_error("invalid base64 character."); - } - } - } - - return out; - } - /** - Decodes a string. - - @param str the base64 encoded string - @param alphabet which alphabet should be used - @param behavior the behavior when an error was detected - @returns the decoded string - @throws see base64::decode() - */ - static std::string decode(const std::string& str, alphabet alphabet = alphabet::auto_, - decoding_behavior behavior = decoding_behavior::moderate) - { - std::string result; - - result.reserve(max_decode_size(str.length())); - - decode(str.begin(), str.end(), std::back_inserter(result), alphabet, behavior); - - return result; - } - /** - Decodes a string. - - @param buffer the base64 encoded buffer - @param size the size of the buffer - @param alphabet which alphabet should be used - @param behavior the behavior when an error was detected - @returns the decoded string - @throws see base64::decode() - */ - static std::string decode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::auto_, - decoding_behavior behavior = decoding_behavior::moderate) - { - std::string result; - - result.reserve(max_decode_size(size)); - - decode(buffer, buffer + size, std::back_inserter(result), alphabet, behavior); - - return result; - } - /** - Decodes a string inplace. - - @param[in,out] str the base64 encoded string - @param alphabet which alphabet should be used - @param behavior the behavior when an error was detected - @throws base64::decode_inplace() - */ - static void decode_inplace(std::string& str, alphabet alphabet = alphabet::auto_, - decoding_behavior behavior = decoding_behavior::moderate) - { - str.resize(decode(str.begin(), str.end(), str.begin(), alphabet, behavior) - str.begin()); - } - /** - Decodes a char array inplace. - - @param[in,out] str the string array - @param size the length of the array - @param alphabet which alphabet should be used - @param behavior the behavior when an error was detected - @returns the pointer to the next element past the last element decoded - @throws base64::decode_inplace() - */ - static char* decode_inplace(char* str, std::size_t size, alphabet alphabet = alphabet::auto_, - decoding_behavior behavior = decoding_behavior::moderate) - { - return decode(str, str + size, str, alphabet, behavior); - } - /** - Returns the required decoding size for a given size. The value is calculated with the following formula: - - $$ - \lceil \frac{size}{4} \rceil \cdot 3 - $$ - - @param size the size of the encoded input - @returns the size of the resulting decoded buffer; this the absolute maximum - */ - static std::size_t max_decode_size(std::size_t size) noexcept - { - return (size / 4 + (size % 4 ? 1 : 0)) * 3; - } - /** - Returns the required encoding size for a given size. The value is calculated with the following formula: - - $$ - \lceil \frac{size}{3} \rceil \cdot 4 - $$ - - @param size the size of the decoded input - @returns the size of the resulting encoded buffer - */ - static std::size_t required_encode_size(std::size_t size) noexcept - { - return (size / 3 + (size % 3 ? 1 : 0)) * 4; - } - -private: - static std::uint8_t _base64_value(alphabet& alphabet, char c) - { - if (c >= 'A' && c <= 'Z') { - return c - 'A'; - } else if (c >= 'a' && c <= 'z') { - return c - 'a' + 26; - } else if (c >= '0' && c <= '9') { - return c - '0' + 52; - } - - // comes down to alphabet - if (alphabet == alphabet::standard) { - if (c == '+') { - return 62; - } else if (c == '/') { - return 63; - } - } else if (alphabet == alphabet::url_filename_safe) { - if (c == '-') { - return 62; - } else if (c == '_') { - return 63; - } - } // auto detect - else { - if (c == '+') { - alphabet = alphabet::standard; - - return 62; - } else if (c == '/') { - alphabet = alphabet::standard; - - return 63; - } else if (c == '-') { - alphabet = alphabet::url_filename_safe; - - return 62; - } else if (c == '_') { - alphabet = alphabet::url_filename_safe; - - return 63; - } - } - - throw base64_error("invalid base64 character."); - } -}; - -#endif // !PUBLIC_DOMAIN_BASE64_HPP_ +/* +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to +*/ + +#ifndef PUBLIC_DOMAIN_BASE64_HPP_ +#define PUBLIC_DOMAIN_BASE64_HPP_ + +#include +#include +#include +#include + +class base64_error : public std::runtime_error +{ +public: + using std::runtime_error::runtime_error; +}; + +class base64 +{ +public: + enum class alphabet + { + /** the alphabet is detected automatically */ + auto_, + /** the standard base64 alphabet is used */ + standard, + /** like `standard` except that the characters `+` and `/` are replaced by `-` and `_` respectively*/ + url_filename_safe + }; + + enum class decoding_behavior + { + /** if the input is not padded, the remaining bits are ignored */ + moderate, + /** if a padding character is encounter decoding is finished */ + loose + }; + + /** + Encodes all the elements from `in_begin` to `in_end` to `out`. + + @warning The source and destination cannot overlap. The destination must be able to hold at least + `required_encode_size(std::distance(in_begin, in_end))`, otherwise the behavior depends on the output iterator. + + @tparam Input_iterator the source; the returned elements are cast to `std::uint8_t` and should not be greater than + 8 bits + @tparam Output_iterator the destination; the elements written to it are from the type `char` + @param in_begin the beginning of the source + @param in_end the ending of the source + @param out the destination iterator + @param alphabet which alphabet should be used + @returns the iterator to the next element past the last element copied + @throws see `Input_iterator` and `Output_iterator` + */ + template + static Output_iterator encode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out, + alphabet alphabet = alphabet::standard) + { + constexpr auto pad = '='; + const char* alpha = alphabet == alphabet::url_filename_safe + ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" + : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + while (in_begin != in_end) { + std::uint8_t i0 = 0, i1 = 0, i2 = 0; + + // first character + i0 = static_cast(*in_begin); + ++in_begin; + + *out = alpha[i0 >> 2 & 0x3f]; + ++out; + + // part of first character and second + if (in_begin != in_end) { + i1 = static_cast(*in_begin); + ++in_begin; + + *out = alpha[((i0 & 0x3) << 4) | (i1 >> 4 & 0x0f)]; + ++out; + } else { + *out = alpha[(i0 & 0x3) << 4]; + ++out; + + // last padding + *out = pad; + ++out; + + // last padding + *out = pad; + ++out; + + break; + } + + // part of second character and third + if (in_begin != in_end) { + i2 = static_cast(*in_begin); + ++in_begin; + + *out = alpha[((i1 & 0xf) << 2) | (i2 >> 6 & 0x03)]; + ++out; + } else { + *out = alpha[(i1 & 0xf) << 2]; + ++out; + + // last padding + *out = pad; + ++out; + + break; + } + + // rest of third + *out = alpha[i2 & 0x3f]; + ++out; + } + + return out; + } + /** + Encodes a string. + + @param str the string that should be encoded + @param alphabet which alphabet should be used + @returns the encoded base64 string + @throws see base64::encode() + */ + static std::string encode(const std::string& str, alphabet alphabet = alphabet::standard) + { + std::string result; + + result.reserve(required_encode_size(str.length()) + 1); + + encode(str.begin(), str.end(), std::back_inserter(result), alphabet); + + return result; + } + /** + Encodes a char array. + + @param buffer the char array + @param size the size of the array + @param alphabet which alphabet should be used + @returns the encoded string + */ + static std::string encode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::standard) + { + std::string result; + + result.reserve(required_encode_size(size) + 1); + + encode(buffer, buffer + size, std::back_inserter(result), alphabet); + + return result; + } + /** + Decodes all the elements from `in_begin` to `in_end` to `out`. `in_begin` may point to the same location as `out`, + in other words: inplace decoding is possible. + + @warning The destination must be able to hold at least `required_decode_size(std::distance(in_begin, in_end))`, + otherwise the behavior depends on the output iterator. + + @tparam Input_iterator the source; the returned elements are cast to `char` + @tparam Output_iterator the destination; the elements written to it are from the type `std::uint8_t` + @param in_begin the beginning of the source + @param in_end the ending of the source + @param out the destination iterator + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @returns the iterator to the next element past the last element copied + @throws base64_error depending on the set behavior + @throws see `Input_iterator` and `Output_iterator` + */ + template + static Output_iterator decode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out, + alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + //constexpr auto pad = '='; + std::uint8_t last = 0; + auto bits = 0; + + while (in_begin != in_end) { + auto c = *in_begin; + ++in_begin; + + if (c == '=') { + break; + } + + auto part = _base64_value(alphabet, c); + + // enough bits for one byte + if (bits + 6 >= 8) { + *out = (last << (8 - bits)) | (part >> (bits - 2)); + ++out; + + bits -= 2; + } else { + bits += 6; + } + + last = part; + } + + // check padding + if (behavior != decoding_behavior::loose) { + while (in_begin != in_end) { + auto c = *in_begin; + ++in_begin; + + if (c != '=') { + throw base64_error("invalid base64 character."); + } + } + } + + return out; + } + /** + Decodes a string. + + @param str the base64 encoded string + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @returns the decoded string + @throws see base64::decode() + */ + static std::string decode(const std::string& str, alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + std::string result; + + result.reserve(max_decode_size(str.length())); + + decode(str.begin(), str.end(), std::back_inserter(result), alphabet, behavior); + + return result; + } + /** + Decodes a string. + + @param buffer the base64 encoded buffer + @param size the size of the buffer + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @returns the decoded string + @throws see base64::decode() + */ + static std::string decode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + std::string result; + + result.reserve(max_decode_size(size)); + + decode(buffer, buffer + size, std::back_inserter(result), alphabet, behavior); + + return result; + } + /** + Decodes a string inplace. + + @param[in,out] str the base64 encoded string + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @throws base64::decode_inplace() + */ + static void decode_inplace(std::string& str, alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + str.resize(decode(str.begin(), str.end(), str.begin(), alphabet, behavior) - str.begin()); + } + /** + Decodes a char array inplace. + + @param[in,out] str the string array + @param size the length of the array + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @returns the pointer to the next element past the last element decoded + @throws base64::decode_inplace() + */ + static char* decode_inplace(char* str, std::size_t size, alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + return decode(str, str + size, str, alphabet, behavior); + } + /** + Returns the required decoding size for a given size. The value is calculated with the following formula: + + $$ + \lceil \frac{size}{4} \rceil \cdot 3 + $$ + + @param size the size of the encoded input + @returns the size of the resulting decoded buffer; this the absolute maximum + */ + static std::size_t max_decode_size(std::size_t size) noexcept + { + return (size / 4 + (size % 4 ? 1 : 0)) * 3; + } + /** + Returns the required encoding size for a given size. The value is calculated with the following formula: + + $$ + \lceil \frac{size}{3} \rceil \cdot 4 + $$ + + @param size the size of the decoded input + @returns the size of the resulting encoded buffer + */ + static std::size_t required_encode_size(std::size_t size) noexcept + { + return (size / 3 + (size % 3 ? 1 : 0)) * 4; + } + +private: + static std::uint8_t _base64_value(alphabet& alphabet, char c) + { + if (c >= 'A' && c <= 'Z') { + return c - 'A'; + } else if (c >= 'a' && c <= 'z') { + return c - 'a' + 26; + } else if (c >= '0' && c <= '9') { + return c - '0' + 52; + } + + // comes down to alphabet + if (alphabet == alphabet::standard) { + if (c == '+') { + return 62; + } else if (c == '/') { + return 63; + } + } else if (alphabet == alphabet::url_filename_safe) { + if (c == '-') { + return 62; + } else if (c == '_') { + return 63; + } + } // auto detect + else { + if (c == '+') { + alphabet = alphabet::standard; + + return 62; + } else if (c == '/') { + alphabet = alphabet::standard; + + return 63; + } else if (c == '-') { + alphabet = alphabet::url_filename_safe; + + return 62; + } else if (c == '_') { + alphabet = alphabet::url_filename_safe; + + return 63; + } + } + + throw base64_error("invalid base64 character."); + } +}; + +#endif // !PUBLIC_DOMAIN_BASE64_HPP_ diff --git a/llama/build-info.cpp b/llama/build-info.cpp index 348fda3f..e6eab288 100644 --- a/llama/build-info.cpp +++ b/llama/build-info.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * diff --git a/llama/build_cuda.sh b/llama/build_cuda.sh index c2f0ec03..bd9621bb 100755 --- a/llama/build_cuda.sh +++ b/llama/build_cuda.sh @@ -9,7 +9,7 @@ else fi nvcc \ - -t 12 \ + -t $(nproc) \ --generate-code=arch=compute_50,code=[compute_50,sm_50] \ --generate-code=arch=compute_52,code=[compute_52,sm_52] \ --generate-code=arch=compute_61,code=[compute_61,sm_61] \ @@ -30,9 +30,18 @@ nvcc \ -use_fast_math \ -link \ -shared \ - -fPIC \ -I. \ -lcuda -lcublas -lcudart -lcublasLt \ -O3 \ -o $output \ - ggml-cuda.cu ggml-cuda/*.cu ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c sgemm.cpp + ggml-cuda.cu \ + ggml-cuda/*.cu \ + ggml-cuda/template-instances/fattn-wmma*.cu \ + ggml-cuda/template-instances/mmq*.cu \ + ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu \ + ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu \ + ggml-cuda/template-instances/fattn-vec*f16-f16.cu \ + ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c sgemm.cpp + +# -DGGML_CUDA_USE_GRAPHS=1 +# -DGGML_CUDA_FA_ALL_QUANTS=1 \ No newline at end of file diff --git a/llama/build_hipblas.sh b/llama/build_hipblas.sh index 15ed0cbc..51eb2419 100755 --- a/llama/build_hipblas.sh +++ b/llama/build_hipblas.sh @@ -26,7 +26,7 @@ additional_flags="" if [[ "$os" == "Windows_NT" || "$os" == "MINGW64_NT"* ]]; then output="ggml-hipblas.dll" - additional_flags=" -Xclang --dependent-lib=msvcrt -Wl,/subsystem:console" + additional_flags=" -Xclang --dependent-lib=msvcrt" else output="libggml-hipblas.so" archs+=("${linux_archs[@]}") @@ -36,37 +36,61 @@ for arch in "${archs[@]}"; do additional_flags+=" --offload-arch=$arch" done -hipcc \ - -v \ - -parallel-jobs=12 \ - -O3 \ - -DGGML_USE_CUDA \ - -DGGML_BUILD=1 \ - -DGGML_SHARED=1 \ - -DGGML_CUDA_DMMV_X=32 \ - -DGGML_CUDA_MMV_Y=1 \ - -DGGML_SCHED_MAX_COPIES=4 \ - -DGGML_USE_HIPBLAS \ - -DGGML_USE_LLAMAFILE \ - -DHIP_FAST_MATH \ - -DNDEBUG \ - -DK_QUANTS_PER_ITERATION=2 \ - -D_CRT_SECURE_NO_WARNINGS \ - -DCMAKE_POSITION_INDEPENDENT_CODE=on \ - -D_GNU_SOURCE \ - -Wno-expansion-to-defined \ - -Wno-invalid-noreturn \ - -Wno-ignored-attributes \ - -Wno-pass-failed \ - -Wno-deprecated-declarations \ - -Wno-unused-result \ - -I. \ - -lhipblas -lamdhip64 -lrocblas \ - -shared \ - $additional_flags \ - -o $output \ - ggml-cuda.cu ggml-cuda/*.cu ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c sgemm.cpp +# Create an array of all source files, expanding globs +sources=( + $(echo ggml-cuda/template-instances/fattn-wmma*.cu) + $(echo ggml-cuda/template-instances/mmq*.cu) + $(echo ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu) + $(echo ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu) + $(echo ggml-cuda/template-instances/fattn-vec*f16-f16.cu) + ggml-cuda.cu + $(echo ggml-cuda/*.cu) + ggml.c + ggml-backend.c + ggml-alloc.c + ggml-quants.c + sgemm.cpp +) - # -D_DLL \ - # -D_MT \ - # -D_XOPEN_SOURCE=600 \ +# Function to compile a single source file +compile_source() { + src="$1" + hipcc -c -O3 -DGGML_USE_CUDA -DGGML_BUILD=1 -DGGML_SHARED=1 -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 \ + -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_HIPBLAS -DGGML_USE_LLAMAFILE -DHIP_FAST_MATH -DNDEBUG \ + -DK_QUANTS_PER_ITERATION=2 -D_CRT_SECURE_NO_WARNINGS -DCMAKE_POSITION_INDEPENDENT_CODE=on \ + -D_GNU_SOURCE -Wno-expansion-to-defined -Wno-invalid-noreturn -Wno-ignored-attributes -Wno-pass-failed \ + -Wno-deprecated-declarations -Wno-unused-result -I. \ + $additional_flags -o "${src%.cu}.o" "$src" +} + +# Function to handle Ctrl+C +cleanup() { + echo "Terminating all background processes..." + kill 0 +} + +# Set trap to handle SIGINT (Ctrl+C) +trap cleanup SIGINT + +# Limit the number of concurrent jobs +max_jobs=$(nproc) +job_count=0 + +for src in "${sources[@]}"; do + echo "$src" + compile_source "$src" & + job_count=$((job_count + 1)) + if [[ $job_count -ge $max_jobs ]]; then + wait -n + job_count=$((job_count - 1)) + fi +done + +wait + +# Link all object files into a shared library +echo "Linking object files..." +hipcc -v -shared -o $output *.o ggml-cuda/*.o ggml-cuda/template-instances/*.o -lhipblas -lamdhip64 -lrocblas + +# Clean up object files after linking +rm -f *.o ggml-cuda/*.o ggml-cuda/template-instances/*.o \ No newline at end of file diff --git a/llama/clip.cpp b/llama/clip.cpp index e6229af0..76fa44f2 100644 --- a/llama/clip.cpp +++ b/llama/clip.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * diff --git a/llama/clip.h b/llama/clip.h index d6c28c5a..b105adf2 100644 --- a/llama/clip.h +++ b/llama/clip.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * diff --git a/llama/common.cpp b/llama/common.cpp index dd8bcee0..2fe2ca98 100644 --- a/llama/common.cpp +++ b/llama/common.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * @@ -226,19 +226,13 @@ void gpt_params_handle_model_default(gpt_params & params) { } params.hf_file = params.model; } else if (params.model.empty()) { - std::string cache_directory = fs_get_cache_directory(); - const bool success = fs_create_directory_with_parents(cache_directory); - if (!success) { - throw std::runtime_error("failed to create cache directory: " + cache_directory); - } - params.model = cache_directory + string_split(params.hf_file, '/').back(); + params.model = fs_get_cache_file(string_split(params.hf_file, '/').back()); } } else if (!params.model_url.empty()) { if (params.model.empty()) { auto f = string_split(params.model_url, '#').front(); f = string_split(f, '?').front(); - f = string_split(f, '/').back(); - params.model = "models/" + f; + params.model = fs_get_cache_file(string_split(f, '/').back()); } } else if (params.model.empty()) { params.model = DEFAULT_MODEL_PATH; @@ -1517,6 +1511,14 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa params.chat_template = argv[i]; return true; } + if (arg == "--slot-prompt-similarity" || arg == "-sps") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.slot_prompt_similarity = std::stof(argv[i]); + return true; + } if (arg == "-pps") { params.is_pp_shared = true; return true; @@ -1939,6 +1941,8 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param "set custom jinja chat template (default: template taken from model's metadata)\n" "only commonly used templates are accepted:\n" "https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template" }); + options.push_back({ "server", "-sps, --slot-prompt-similarity SIMILARITY", + "how much the prompt of a request must match the prompt of a slot in order to use that slot (default: %.2f, 0.0 = disabled)\n", params.slot_prompt_similarity }); #ifndef LOG_DISABLE_LOGS options.push_back({ "logging" }); @@ -2295,6 +2299,16 @@ std::string fs_get_cache_directory() { return ensure_trailing_slash(cache_directory); } +std::string fs_get_cache_file(const std::string & filename) { + GGML_ASSERT(filename.find(DIRECTORY_SEPARATOR) == std::string::npos); + std::string cache_directory = fs_get_cache_directory(); + const bool success = fs_create_directory_with_parents(cache_directory); + if (!success) { + throw std::runtime_error("failed to create cache directory: " + cache_directory); + } + return cache_directory + filename; +} + // // Model utils diff --git a/llama/common.h b/llama/common.h index 913d8c2b..e64513ae 100644 --- a/llama/common.h +++ b/llama/common.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * @@ -229,6 +229,8 @@ struct gpt_params { std::string slot_save_path; + float slot_prompt_similarity = 0.5f; + // batched-bench params bool is_pp_shared = false; @@ -301,6 +303,7 @@ bool fs_validate_filename(const std::string & filename); bool fs_create_directory_with_parents(const std::string & path); std::string fs_get_cache_directory(); +std::string fs_get_cache_file(const std::string & filename); // // Model utils diff --git a/llama/ggml-alloc.c b/llama/ggml-alloc.c index 8d296910..3a538098 100644 --- a/llama/ggml-alloc.c +++ b/llama/ggml-alloc.c @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * diff --git a/llama/ggml-alloc.h b/llama/ggml-alloc.h index b3647884..ecceb044 100644 --- a/llama/ggml-alloc.h +++ b/llama/ggml-alloc.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * @@ -24,79 +24,79 @@ * SOFTWARE. */ -#pragma once - -#include "ggml.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t; -typedef struct ggml_backend_buffer * ggml_backend_buffer_t; -typedef struct ggml_backend * ggml_backend_t; - -// Tensor allocator -struct ggml_tallocr { - ggml_backend_buffer_t buffer; - void * base; - size_t alignment; - size_t offset; -}; - -GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer); -GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor); - -// Graph allocator -/* - Example usage: - ggml_gallocr_t galloc = ggml_gallocr_new(ggml_bacckend_cpu_buffer_type()); - - // optional: create a worst-case graph and reserve the buffers to avoid reallocations - ggml_gallocr_reserve(galloc, build_graph(max_batch)); - - // allocate the graph - struct ggml_cgraph * graph = build_graph(batch); - ggml_gallocr_alloc_graph(galloc, graph); - - printf("compute buffer size: %zu bytes\n", ggml_gallocr_get_buffer_size(galloc, 0)); - - // evaluate the graph - ggml_backend_graph_compute(backend, graph); -*/ - -// special tensor flags for use with the graph allocator: -// ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses -// ggml_set_output(): output tensors are never freed and never overwritten - -typedef struct ggml_gallocr * ggml_gallocr_t; - -GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft); -GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs); -GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc); - -// pre-allocate buffers from a measure graph - does not allocate or modify the graph -// call with a worst-case graph to avoid buffer reallocations -// not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed -// returns false if the buffer allocation failed -GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph); -GGML_API bool ggml_gallocr_reserve_n( - ggml_gallocr_t galloc, - struct ggml_cgraph * graph, - const int * node_buffer_ids, - const int * leaf_buffer_ids); - -// automatic reallocation if the topology changes when using a single buffer -// returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers) -GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph); - -GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id); - -// Utils -// Create a buffer and allocate all the tensors in a ggml_context -GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft); -GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend); - -#ifdef __cplusplus -} -#endif +#pragma once + +#include "ggml.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t; +typedef struct ggml_backend_buffer * ggml_backend_buffer_t; +typedef struct ggml_backend * ggml_backend_t; + +// Tensor allocator +struct ggml_tallocr { + ggml_backend_buffer_t buffer; + void * base; + size_t alignment; + size_t offset; +}; + +GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer); +GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor); + +// Graph allocator +/* + Example usage: + ggml_gallocr_t galloc = ggml_gallocr_new(ggml_bacckend_cpu_buffer_type()); + + // optional: create a worst-case graph and reserve the buffers to avoid reallocations + ggml_gallocr_reserve(galloc, build_graph(max_batch)); + + // allocate the graph + struct ggml_cgraph * graph = build_graph(batch); + ggml_gallocr_alloc_graph(galloc, graph); + + printf("compute buffer size: %zu bytes\n", ggml_gallocr_get_buffer_size(galloc, 0)); + + // evaluate the graph + ggml_backend_graph_compute(backend, graph); +*/ + +// special tensor flags for use with the graph allocator: +// ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses +// ggml_set_output(): output tensors are never freed and never overwritten + +typedef struct ggml_gallocr * ggml_gallocr_t; + +GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft); +GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs); +GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc); + +// pre-allocate buffers from a measure graph - does not allocate or modify the graph +// call with a worst-case graph to avoid buffer reallocations +// not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed +// returns false if the buffer allocation failed +GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph); +GGML_API bool ggml_gallocr_reserve_n( + ggml_gallocr_t galloc, + struct ggml_cgraph * graph, + const int * node_buffer_ids, + const int * leaf_buffer_ids); + +// automatic reallocation if the topology changes when using a single buffer +// returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers) +GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph); + +GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id); + +// Utils +// Create a buffer and allocate all the tensors in a ggml_context +GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft); +GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend); + +#ifdef __cplusplus +} +#endif diff --git a/llama/ggml-backend-impl.h b/llama/ggml-backend-impl.h index 91470140..d30845f9 100644 --- a/llama/ggml-backend-impl.h +++ b/llama/ggml-backend-impl.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * @@ -24,144 +24,144 @@ * SOFTWARE. */ -#pragma once - -// ggml-backend internal header - -#include "ggml-backend.h" - -#ifdef __cplusplus -extern "C" { -#endif - - // - // Backend buffer - // - - // buffer type - typedef void * ggml_backend_buffer_type_context_t; - - struct ggml_backend_buffer_type_i { - const char * (*GGML_CALL get_name) (ggml_backend_buffer_type_t buft); - ggml_backend_buffer_t (*GGML_CALL alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size); - size_t (*GGML_CALL get_alignment) (ggml_backend_buffer_type_t buft); // tensor alignment - size_t (*GGML_CALL get_max_size) (ggml_backend_buffer_type_t buft); // allocation max size - size_t (*GGML_CALL get_alloc_size) (ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor); // data size needed to allocate the tensor, including padding - bool (*GGML_CALL supports_backend)(ggml_backend_buffer_type_t buft, ggml_backend_t backend); // check if the buffer type is usable by the backend - // check if tensor data is in host memory - // should be equivalent to supports_backend(buft, ggml_backend_cpu_init()) - bool (*GGML_CALL is_host) (ggml_backend_buffer_type_t buft); - }; - - struct ggml_backend_buffer_type { - struct ggml_backend_buffer_type_i iface; - ggml_backend_buffer_type_context_t context; - }; - - // buffer - typedef void * ggml_backend_buffer_context_t; - - struct ggml_backend_buffer_i { - const char * (*GGML_CALL get_name) (ggml_backend_buffer_t buffer); - void (*GGML_CALL free_buffer)(ggml_backend_buffer_t buffer); - void * (*GGML_CALL get_base) (ggml_backend_buffer_t buffer); - void (*GGML_CALL init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); - void (*GGML_CALL set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); - void (*GGML_CALL get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); - bool (*GGML_CALL cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst); // dst is in the buffer, src may be in any buffer - void (*GGML_CALL clear) (ggml_backend_buffer_t buffer, uint8_t value); - void (*GGML_CALL reset) (ggml_backend_buffer_t buffer); // reset any internal state due to tensor initialization, such as tensor extras - }; - - struct ggml_backend_buffer { - struct ggml_backend_buffer_i iface; - ggml_backend_buffer_type_t buft; - ggml_backend_buffer_context_t context; - size_t size; - enum ggml_backend_buffer_usage usage; - }; - - GGML_CALL ggml_backend_buffer_t ggml_backend_buffer_init( - ggml_backend_buffer_type_t buft, - struct ggml_backend_buffer_i iface, - ggml_backend_buffer_context_t context, - size_t size); - - // do not use directly, use ggml_backend_tensor_copy instead - bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst); - - // buffer that contains a collection of buffers - GGML_CALL ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers); - GGML_CALL bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer); - GGML_CALL void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage); - - // - // Backend - // - - typedef void * ggml_backend_context_t; - - struct ggml_backend_i { - const char * (*GGML_CALL get_name)(ggml_backend_t backend); - - void (*GGML_CALL free)(ggml_backend_t backend); - - // buffer allocation - ggml_backend_buffer_type_t (*GGML_CALL get_default_buffer_type)(ggml_backend_t backend); - - // (optional) asynchronous tensor data access - void (*GGML_CALL set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); - void (*GGML_CALL get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); - bool (*GGML_CALL cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst); - - // (optional) complete all pending operations - void (*GGML_CALL synchronize)(ggml_backend_t backend); - - // compute graph with a plan (not used currently) - ggml_backend_graph_plan_t (*GGML_CALL graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph); - void (*GGML_CALL graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan); - - // compute graph with a plan - enum ggml_status (*GGML_CALL graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan); - // compute graph without a plan (async) - enum ggml_status (*GGML_CALL graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph); - - // check if the backend supports an operation - bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op); - - // check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer - // these should be expensive operations with large batch sizes that may benefit from running on this backend - // even if the weight has to be copied from the CPU temporarily - bool (*GGML_CALL offload_op)(ggml_backend_t backend, const struct ggml_tensor * op); - - // (optional) event synchronization - ggml_backend_event_t (*GGML_CALL event_new) (ggml_backend_t backend); - void (*GGML_CALL event_free) (ggml_backend_event_t event); - void (*GGML_CALL event_record) (ggml_backend_event_t event); - void (*GGML_CALL event_wait) (ggml_backend_t backend, ggml_backend_event_t event); - void (*GGML_CALL event_synchronize) (ggml_backend_event_t event); - }; - - struct ggml_backend { - ggml_guid_t guid; - - struct ggml_backend_i iface; - ggml_backend_context_t context; - }; - - struct ggml_backend_event { - ggml_backend_t backend; - void * context; - }; - - // - // Backend registry - // - - typedef ggml_backend_t (*GGML_CALL ggml_backend_init_fn)(const char * params, void * user_data); - - GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data); - -#ifdef __cplusplus -} -#endif +#pragma once + +// ggml-backend internal header + +#include "ggml-backend.h" + +#ifdef __cplusplus +extern "C" { +#endif + + // + // Backend buffer + // + + // buffer type + typedef void * ggml_backend_buffer_type_context_t; + + struct ggml_backend_buffer_type_i { + const char * (*GGML_CALL get_name) (ggml_backend_buffer_type_t buft); + ggml_backend_buffer_t (*GGML_CALL alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size); + size_t (*GGML_CALL get_alignment) (ggml_backend_buffer_type_t buft); // tensor alignment + size_t (*GGML_CALL get_max_size) (ggml_backend_buffer_type_t buft); // allocation max size + size_t (*GGML_CALL get_alloc_size) (ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor); // data size needed to allocate the tensor, including padding + bool (*GGML_CALL supports_backend)(ggml_backend_buffer_type_t buft, ggml_backend_t backend); // check if the buffer type is usable by the backend + // check if tensor data is in host memory + // should be equivalent to supports_backend(buft, ggml_backend_cpu_init()) + bool (*GGML_CALL is_host) (ggml_backend_buffer_type_t buft); + }; + + struct ggml_backend_buffer_type { + struct ggml_backend_buffer_type_i iface; + ggml_backend_buffer_type_context_t context; + }; + + // buffer + typedef void * ggml_backend_buffer_context_t; + + struct ggml_backend_buffer_i { + const char * (*GGML_CALL get_name) (ggml_backend_buffer_t buffer); + void (*GGML_CALL free_buffer)(ggml_backend_buffer_t buffer); + void * (*GGML_CALL get_base) (ggml_backend_buffer_t buffer); + void (*GGML_CALL init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); + void (*GGML_CALL set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); + void (*GGML_CALL get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); + bool (*GGML_CALL cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst); // dst is in the buffer, src may be in any buffer + void (*GGML_CALL clear) (ggml_backend_buffer_t buffer, uint8_t value); + void (*GGML_CALL reset) (ggml_backend_buffer_t buffer); // reset any internal state due to tensor initialization, such as tensor extras + }; + + struct ggml_backend_buffer { + struct ggml_backend_buffer_i iface; + ggml_backend_buffer_type_t buft; + ggml_backend_buffer_context_t context; + size_t size; + enum ggml_backend_buffer_usage usage; + }; + + GGML_CALL ggml_backend_buffer_t ggml_backend_buffer_init( + ggml_backend_buffer_type_t buft, + struct ggml_backend_buffer_i iface, + ggml_backend_buffer_context_t context, + size_t size); + + // do not use directly, use ggml_backend_tensor_copy instead + bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst); + + // buffer that contains a collection of buffers + GGML_CALL ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers); + GGML_CALL bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer); + GGML_CALL void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage); + + // + // Backend + // + + typedef void * ggml_backend_context_t; + + struct ggml_backend_i { + const char * (*GGML_CALL get_name)(ggml_backend_t backend); + + void (*GGML_CALL free)(ggml_backend_t backend); + + // buffer allocation + ggml_backend_buffer_type_t (*GGML_CALL get_default_buffer_type)(ggml_backend_t backend); + + // (optional) asynchronous tensor data access + void (*GGML_CALL set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); + void (*GGML_CALL get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); + bool (*GGML_CALL cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst); + + // (optional) complete all pending operations + void (*GGML_CALL synchronize)(ggml_backend_t backend); + + // compute graph with a plan (not used currently) + ggml_backend_graph_plan_t (*GGML_CALL graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph); + void (*GGML_CALL graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan); + + // compute graph with a plan + enum ggml_status (*GGML_CALL graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan); + // compute graph without a plan (async) + enum ggml_status (*GGML_CALL graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph); + + // check if the backend supports an operation + bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op); + + // check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer + // these should be expensive operations with large batch sizes that may benefit from running on this backend + // even if the weight has to be copied from the CPU temporarily + bool (*GGML_CALL offload_op)(ggml_backend_t backend, const struct ggml_tensor * op); + + // (optional) event synchronization + ggml_backend_event_t (*GGML_CALL event_new) (ggml_backend_t backend); + void (*GGML_CALL event_free) (ggml_backend_event_t event); + void (*GGML_CALL event_record) (ggml_backend_event_t event); + void (*GGML_CALL event_wait) (ggml_backend_t backend, ggml_backend_event_t event); + void (*GGML_CALL event_synchronize) (ggml_backend_event_t event); + }; + + struct ggml_backend { + ggml_guid_t guid; + + struct ggml_backend_i iface; + ggml_backend_context_t context; + }; + + struct ggml_backend_event { + ggml_backend_t backend; + void * context; + }; + + // + // Backend registry + // + + typedef ggml_backend_t (*GGML_CALL ggml_backend_init_fn)(const char * params, void * user_data); + + GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data); + +#ifdef __cplusplus +} +#endif diff --git a/llama/ggml-backend.c b/llama/ggml-backend.c index 9225f7ab..0e6aa8a0 100644 --- a/llama/ggml-backend.c +++ b/llama/ggml-backend.c @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * diff --git a/llama/ggml-backend.h b/llama/ggml-backend.h index 1041d35e..84b5a5d1 100644 --- a/llama/ggml-backend.h +++ b/llama/ggml-backend.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * diff --git a/llama/ggml-common.h b/llama/ggml-common.h index dbbaa5d1..b13765b8 100644 --- a/llama/ggml-common.h +++ b/llama/ggml-common.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * diff --git a/llama/ggml-cuda.cu b/llama/ggml-cuda.cu index a7b17d44..4a598874 100644 --- a/llama/ggml-cuda.cu +++ b/llama/ggml-cuda.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * @@ -1377,10 +1377,30 @@ static void ggml_cuda_set_peer_access(const int n_tokens, int main_device) { GGML_UNUSED(main_device); } +static cudaError_t ggml_cuda_Memcpy2DPeerAsync( + void * dst, int dstDevice, size_t dpitch, void * src, int srcDevice, size_t spitch, size_t width, size_t height, cudaStream_t stream) { + +#if !defined(GGML_USE_HIPBLAS) + // cudaMemcpy2DAsync may fail with copies between vmm pools of different devices + cudaMemcpy3DPeerParms p = {}; + p.dstDevice = dstDevice; + p.dstPtr = make_cudaPitchedPtr(dst, dpitch, dpitch, height); + p.srcDevice = srcDevice; + p.srcPtr = make_cudaPitchedPtr(src, spitch, spitch, height); + p.extent = make_cudaExtent(width, height, 1); + return cudaMemcpy3DPeerAsync(&p, stream); +#else + // HIP does not support cudaMemcpy3DPeerAsync or vmm pools + GGML_UNUSED(dstDevice); + GGML_UNUSED(srcDevice); + return cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height, cudaMemcpyDeviceToDevice, stream); +#endif // !defined(GGML_USE_HIPBLAS) +} + static void ggml_cuda_op_mul_mat( ggml_backend_cuda_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, ggml_cuda_op_mul_mat_t op, - const bool convert_src1_to_q8_1) { + quantize_cuda_t quantize_src1) { const int64_t ne00 = src0->ne[0]; const int64_t ne01 = src0->ne[1]; @@ -1437,7 +1457,9 @@ static void ggml_cuda_op_mul_mat( } struct dev_data { - ggml_cuda_pool_alloc src0_dd_alloc; + int cc; + + ggml_cuda_pool_alloc src0_dd_alloc; ggml_cuda_pool_alloc src1_ddf_alloc; ggml_cuda_pool_alloc src1_ddq_alloc; ggml_cuda_pool_alloc dst_dd_alloc; @@ -1456,6 +1478,8 @@ static void ggml_cuda_op_mul_mat( int used_devices = 0; for (int id = 0; id < ggml_backend_cuda_get_device_count(); ++id) { + dev[id].cc = ggml_cuda_info().devices[id].cc; + // by default, use all rows dev[id].row_low = 0; dev[id].row_high = ne01; @@ -1506,11 +1530,15 @@ static void ggml_cuda_op_mul_mat( dev[id].src1_ddf = dev[id].src1_ddf_alloc.alloc(ctx.pool(id), ggml_nelements(src1)); } - if (convert_src1_to_q8_1) { - dev[id].src1_ddq = dev[id].src1_ddq_alloc.alloc(ctx.pool(id), nrows1*src1_padded_col_size*q8_1_ts/q8_1_bs); + if (quantize_src1) { + size_t src_1_ddq_size = nrows1*src1_padded_col_size*q8_1_ts/q8_1_bs; + if (quantize_src1 == quantize_mmq_q8_1_cuda) { + src_1_ddq_size += get_mmq_x_max_host(dev[id].cc)*sizeof(block_q8_1_mmq); + } + dev[id].src1_ddq = dev[id].src1_ddq_alloc.alloc(ctx.pool(id), src_1_ddq_size); if (src1_on_device && src1_is_contiguous) { - quantize_row_q8_1_cuda(dev[id].src1_ddf, dev[id].src1_ddq, ne10, nrows1, src1_padded_col_size, stream); + quantize_src1(dev[id].src1_ddf, dev[id].src1_ddq, ne10, ne11, ne12*ne13, src1_padded_col_size, src0->type, stream); CUDA_CHECK(cudaGetLastError()); } } @@ -1556,7 +1584,12 @@ static void ggml_cuda_op_mul_mat( const int64_t i03 = i0 / ne12; const int64_t i02 = i0 % ne12; - const size_t src1_ddq_i_offset = (i0*ne11 + src1_col_0) * src1_padded_col_size*q8_1_ts/q8_1_bs; + size_t src1_ddq_i_offset = i0*ne11 * src1_padded_col_size*q8_1_ts/q8_1_bs; + if (quantize_src1 == quantize_mmq_q8_1_cuda) { + src1_ddq_i_offset += src1_col_0 * sizeof(block_q8_1_mmq); + } else { + src1_ddq_i_offset += src1_col_0 * src1_padded_col_size*q8_1_ts/q8_1_bs; + } // for split tensors the data begins at i0 == i0_offset_low char * src0_dd_i = dev[id].src0_dd + (i0/i02_divisor) * (ne01*ne00*src0_ts)/src0_bs; @@ -1573,10 +1606,17 @@ static void ggml_cuda_op_mul_mat( // copy src0, src1 to device if necessary if (src1_is_contiguous) { if (id != ctx.device) { - if (convert_src1_to_q8_1) { + if (quantize_src1) { char * src1_ddq_i_source = dev[ctx.device].src1_ddq + src1_ddq_i_offset; - CUDA_CHECK(cudaMemcpyPeerAsync(src1_ddq_i, id, src1_ddq_i_source, ctx.device, - src1_ncols*src1_padded_col_size*q8_1_ts/q8_1_bs, stream)); + if (quantize_src1 == quantize_mmq_q8_1_cuda) { + const size_t pitch = ne11*sizeof(block_q8_1_mmq); + const size_t width = src1_ncols*sizeof(block_q8_1_mmq); + const size_t height = src1_padded_col_size/(4*QK8_1); + CUDA_CHECK(ggml_cuda_Memcpy2DPeerAsync(src1_ddq_i, id, pitch, src1_ddq_i_source, ctx.device, pitch, width, height, stream)); + } else { + CUDA_CHECK(cudaMemcpyPeerAsync( + src1_ddq_i, id, src1_ddq_i_source, ctx.device, src1_ncols*src1_padded_col_size*q8_1_ts/q8_1_bs, stream)); + } } else { float * src1_ddf_i_source = (float *) src1->data; src1_ddf_i_source += (i0*ne11 + src1_col_0) * ne10; @@ -1591,8 +1631,8 @@ static void ggml_cuda_op_mul_mat( GGML_ASSERT(false); } - if (convert_src1_to_q8_1 && !src1_is_contiguous) { - quantize_row_q8_1_cuda(src1_ddf_i, src1_ddq_i, ne10, src1_ncols, src1_padded_col_size, stream); + if (quantize_src1 && !src1_is_contiguous) { + quantize_src1(src1_ddf_i, src1_ddq_i, ne10, src1_ncols, 1, src1_padded_col_size, src0->type, stream); CUDA_CHECK(cudaGetLastError()); } @@ -1617,22 +1657,8 @@ static void ggml_cuda_op_mul_mat( float * dhf_dst_i = (float *) ((char *) dst_off_device + i02*nb2 + i03*nb3); GGML_ASSERT(dst->nb[1] == ne0*sizeof(float)); dhf_dst_i += src1_col_0*ne0 + dev[id].row_low; -#if !defined(GGML_USE_HIPBLAS) - // cudaMemcpy2DAsync may fail with copies between vmm pools of different devices - cudaMemcpy3DPeerParms p = {}; - p.dstDevice = ctx.device; - p.dstPtr = make_cudaPitchedPtr(dhf_dst_i, ne0*sizeof(float), row_diff, src1_ncols); - p.srcDevice = id; - p.srcPtr = make_cudaPitchedPtr(dst_dd_i, row_diff*sizeof(float), row_diff, src1_ncols); - p.extent = make_cudaExtent(row_diff*sizeof(float), src1_ncols, 1); - CUDA_CHECK(cudaMemcpy3DPeerAsync(&p, stream)); -#else - // HIP does not support cudaMemcpy3DPeerAsync or vmm pools - CUDA_CHECK(cudaMemcpy2DAsync(dhf_dst_i, ne0*sizeof(float), - dst_dd_i, row_diff*sizeof(float), - row_diff*sizeof(float), src1_ncols, - cudaMemcpyDeviceToDevice, stream)); -#endif + CUDA_CHECK(ggml_cuda_Memcpy2DPeerAsync( + dhf_dst_i, ctx.device, ne0*sizeof(float), dst_dd_i, id, row_diff*sizeof(float), row_diff*sizeof(float), src1_ncols, stream)); } else { float * dhf_dst_i = (float *) ((char *) dst_off_device + i02*nb2 + i03*nb3); GGML_ASSERT(dst->nb[1] == ne0*sizeof(float)); @@ -1971,13 +1997,13 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor // KQ + KQV multi-batch ggml_cuda_mul_mat_batched_cublas(ctx, src0, src1, dst); } else if (use_dequantize_mul_mat_vec) { - ggml_cuda_op_mul_mat(ctx, src0, src1, dst, ggml_cuda_op_dequantize_mul_mat_vec, false); + ggml_cuda_op_mul_mat(ctx, src0, src1, dst, ggml_cuda_op_dequantize_mul_mat_vec, nullptr); } else if (use_mul_mat_vec_q) { - ggml_cuda_op_mul_mat(ctx, src0, src1, dst, ggml_cuda_op_mul_mat_vec_q, true); + ggml_cuda_op_mul_mat(ctx, src0, src1, dst, ggml_cuda_op_mul_mat_vec_q, quantize_row_q8_1_cuda); } else if (use_mul_mat_q) { - ggml_cuda_op_mul_mat(ctx, src0, src1, dst, ggml_cuda_op_mul_mat_q, true); + ggml_cuda_op_mul_mat(ctx, src0, src1, dst, ggml_cuda_op_mul_mat_q, quantize_mmq_q8_1_cuda); } else { - ggml_cuda_op_mul_mat(ctx, src0, src1, dst, ggml_cuda_op_mul_mat_cublas, false); + ggml_cuda_op_mul_mat(ctx, src0, src1, dst, ggml_cuda_op_mul_mat_cublas, nullptr); } } diff --git a/llama/ggml-cuda.h b/llama/ggml-cuda.h index 821853e8..684e009c 100644 --- a/llama/ggml-cuda.h +++ b/llama/ggml-cuda.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * diff --git a/llama/ggml-cuda/acc.cu b/llama/ggml-cuda/acc.cu index 96bfe1c9..8d705ac9 100644 --- a/llama/ggml-cuda/acc.cu +++ b/llama/ggml-cuda/acc.cu @@ -1,47 +1,47 @@ -#include "acc.cuh" - -static __global__ void acc_f32(const float * x, const float * y, float * dst, const int ne, - const int ne10, const int ne11, const int ne12, - const int nb1, const int nb2, int offset) { - const int i = blockDim.x * blockIdx.x + threadIdx.x; - if (i >= ne) { - return; - } - int src1_idx = i - offset; - int oz = src1_idx / nb2; - int oy = (src1_idx - (oz * nb2)) / nb1; - int ox = src1_idx % nb1; - if (src1_idx >= 0 && ox < ne10 && oy < ne11 && oz < ne12) { - dst[i] = x[i] + y[ox + oy * ne10 + oz * ne10 * ne11]; - } else { - dst[i] = x[i]; - } -} - -static void acc_f32_cuda(const float * x, const float * y, float * dst, const int n_elements, - const int ne10, const int ne11, const int ne12, - const int nb1, const int nb2, const int offset, cudaStream_t stream) { - int num_blocks = (n_elements + CUDA_ACC_BLOCK_SIZE - 1) / CUDA_ACC_BLOCK_SIZE; - acc_f32<<>>(x, y, dst, n_elements, ne10, ne11, ne12, nb1, nb2, offset); -} - -void ggml_cuda_op_acc(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * src0 = dst->src[0]; - const ggml_tensor * src1 = dst->src[1]; - const float * src0_d = (const float *)src0->data; - const float * src1_d = (const float *)src1->data; - float * dst_d = (float *)dst->data; - cudaStream_t stream = ctx.stream(); - - GGML_ASSERT(src0->type == GGML_TYPE_F32); - GGML_ASSERT(src1->type == GGML_TYPE_F32); - GGML_ASSERT( dst->type == GGML_TYPE_F32); - GGML_ASSERT(dst->ne[3] == 1); // just 3D tensors supported - - int nb1 = dst->op_params[0] / 4; // 4 bytes of float32 - int nb2 = dst->op_params[1] / 4; // 4 bytes of float32 - // int nb3 = dst->op_params[2] / 4; // 4 bytes of float32 - unused - int offset = dst->op_params[3] / 4; // offset in bytes - - acc_f32_cuda(src0_d, src1_d, dst_d, ggml_nelements(dst), src1->ne[0], src1->ne[1], src1->ne[2], nb1, nb2, offset, stream); -} +#include "acc.cuh" + +static __global__ void acc_f32(const float * x, const float * y, float * dst, const int ne, + const int ne10, const int ne11, const int ne12, + const int nb1, const int nb2, int offset) { + const int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i >= ne) { + return; + } + int src1_idx = i - offset; + int oz = src1_idx / nb2; + int oy = (src1_idx - (oz * nb2)) / nb1; + int ox = src1_idx % nb1; + if (src1_idx >= 0 && ox < ne10 && oy < ne11 && oz < ne12) { + dst[i] = x[i] + y[ox + oy * ne10 + oz * ne10 * ne11]; + } else { + dst[i] = x[i]; + } +} + +static void acc_f32_cuda(const float * x, const float * y, float * dst, const int n_elements, + const int ne10, const int ne11, const int ne12, + const int nb1, const int nb2, const int offset, cudaStream_t stream) { + int num_blocks = (n_elements + CUDA_ACC_BLOCK_SIZE - 1) / CUDA_ACC_BLOCK_SIZE; + acc_f32<<>>(x, y, dst, n_elements, ne10, ne11, ne12, nb1, nb2, offset); +} + +void ggml_cuda_op_acc(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + const float * src0_d = (const float *)src0->data; + const float * src1_d = (const float *)src1->data; + float * dst_d = (float *)dst->data; + cudaStream_t stream = ctx.stream(); + + GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(src1->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_F32); + GGML_ASSERT(dst->ne[3] == 1); // just 3D tensors supported + + int nb1 = dst->op_params[0] / 4; // 4 bytes of float32 + int nb2 = dst->op_params[1] / 4; // 4 bytes of float32 + // int nb3 = dst->op_params[2] / 4; // 4 bytes of float32 - unused + int offset = dst->op_params[3] / 4; // offset in bytes + + acc_f32_cuda(src0_d, src1_d, dst_d, ggml_nelements(dst), src1->ne[0], src1->ne[1], src1->ne[2], nb1, nb2, offset, stream); +} diff --git a/llama/ggml-cuda/acc.cuh b/llama/ggml-cuda/acc.cuh index 1168ea1b..a476eb9a 100644 --- a/llama/ggml-cuda/acc.cuh +++ b/llama/ggml-cuda/acc.cuh @@ -1,5 +1,5 @@ -#include "common.cuh" - -#define CUDA_ACC_BLOCK_SIZE 256 - -void ggml_cuda_op_acc(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +#include "common.cuh" + +#define CUDA_ACC_BLOCK_SIZE 256 + +void ggml_cuda_op_acc(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/arange.cu b/llama/ggml-cuda/arange.cu index b5e495a2..e35045dd 100644 --- a/llama/ggml-cuda/arange.cu +++ b/llama/ggml-cuda/arange.cu @@ -1,34 +1,34 @@ -#include "arange.cuh" - -static __global__ void arange_f32(float * dst, const int ne0, const float start, const float step) { - // blockIDx.x: idx of ne0 / BLOCK_SIZE - int nidx = threadIdx.x + blockIdx.x * blockDim.x; - if (nidx >= ne0) { - return; - } - dst[nidx] = start + step * nidx; -} - -static void arange_f32_cuda(float * dst, const int ne0, const float start, const float step, cudaStream_t stream) { - int num_blocks = (ne0 + CUDA_ARANGE_BLOCK_SIZE - 1) / CUDA_ARANGE_BLOCK_SIZE; - arange_f32<<>>(dst, ne0, start, step); -} - -void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - float * dst_d = (float *)dst->data; - cudaStream_t stream = ctx.stream(); - - GGML_ASSERT(dst->type == GGML_TYPE_F32); - - float start; - float stop; - float step; - memcpy(&start, (float *)dst->op_params + 0, sizeof(float)); - memcpy(&stop, (float *)dst->op_params + 1, sizeof(float)); - memcpy(&step, (float *)dst->op_params + 2, sizeof(float)); - - int64_t steps = (int64_t)ceil((stop - start) / step); - GGML_ASSERT(ggml_nelements(dst) == steps); - - arange_f32_cuda(dst_d, dst->ne[0], start, step, stream); -} +#include "arange.cuh" + +static __global__ void arange_f32(float * dst, const int ne0, const float start, const float step) { + // blockIDx.x: idx of ne0 / BLOCK_SIZE + int nidx = threadIdx.x + blockIdx.x * blockDim.x; + if (nidx >= ne0) { + return; + } + dst[nidx] = start + step * nidx; +} + +static void arange_f32_cuda(float * dst, const int ne0, const float start, const float step, cudaStream_t stream) { + int num_blocks = (ne0 + CUDA_ARANGE_BLOCK_SIZE - 1) / CUDA_ARANGE_BLOCK_SIZE; + arange_f32<<>>(dst, ne0, start, step); +} + +void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + float * dst_d = (float *)dst->data; + cudaStream_t stream = ctx.stream(); + + GGML_ASSERT(dst->type == GGML_TYPE_F32); + + float start; + float stop; + float step; + memcpy(&start, (float *)dst->op_params + 0, sizeof(float)); + memcpy(&stop, (float *)dst->op_params + 1, sizeof(float)); + memcpy(&step, (float *)dst->op_params + 2, sizeof(float)); + + int64_t steps = (int64_t)ceil((stop - start) / step); + GGML_ASSERT(ggml_nelements(dst) == steps); + + arange_f32_cuda(dst_d, dst->ne[0], start, step, stream); +} diff --git a/llama/ggml-cuda/arange.cuh b/llama/ggml-cuda/arange.cuh index 41e74fdf..13236306 100644 --- a/llama/ggml-cuda/arange.cuh +++ b/llama/ggml-cuda/arange.cuh @@ -1,5 +1,5 @@ -#include "common.cuh" - -#define CUDA_ARANGE_BLOCK_SIZE 256 - -void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +#include "common.cuh" + +#define CUDA_ARANGE_BLOCK_SIZE 256 + +void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/argsort.cu b/llama/ggml-cuda/argsort.cu index 16414406..d5b42c66 100644 --- a/llama/ggml-cuda/argsort.cu +++ b/llama/ggml-cuda/argsort.cu @@ -1,103 +1,103 @@ -#include "argsort.cuh" - -template -static inline __device__ void ggml_cuda_swap(T & a, T & b) { - T tmp = a; - a = b; - b = tmp; -} - -template -static __global__ void k_argsort_f32_i32(const float * x, int * dst, const int ncols, int ncols_pad) { - // bitonic sort - int col = threadIdx.x; - int row = blockIdx.y; - - if (col >= ncols_pad) { - return; - } - - const float * x_row = x + row * ncols; - extern __shared__ int dst_row[]; - - // initialize indices - dst_row[col] = col; - - __syncthreads(); - - for (int k = 2; k <= ncols_pad; k *= 2) { - for (int j = k / 2; j > 0; j /= 2) { - int ixj = col ^ j; - if (ixj > col) { - if ((col & k) == 0) { - if (dst_row[col] >= ncols || - (dst_row[ixj] < ncols && (order == GGML_SORT_ORDER_ASC ? - x_row[dst_row[col]] > x_row[dst_row[ixj]] : - x_row[dst_row[col]] < x_row[dst_row[ixj]])) - ) { - ggml_cuda_swap(dst_row[col], dst_row[ixj]); - } - } else { - if (dst_row[ixj] >= ncols || - (dst_row[col] < ncols && (order == GGML_SORT_ORDER_ASC ? - x_row[dst_row[col]] < x_row[dst_row[ixj]] : - x_row[dst_row[col]] > x_row[dst_row[ixj]])) - ) { - ggml_cuda_swap(dst_row[col], dst_row[ixj]); - } - } - } - __syncthreads(); - } - } - - // copy the result to dst without the padding - if (col < ncols) { - dst[row * ncols + col] = dst_row[col]; - } -} - -static int next_power_of_2(int x) { - int n = 1; - while (n < x) { - n *= 2; - } - return n; -} - -static void argsort_f32_i32_cuda(const float * x, int * dst, const int ncols, const int nrows, ggml_sort_order order, cudaStream_t stream) { - // bitonic sort requires ncols to be power of 2 - const int ncols_pad = next_power_of_2(ncols); - - const dim3 block_dims(ncols_pad, 1, 1); - const dim3 block_nums(1, nrows, 1); - const size_t shared_mem = ncols_pad * sizeof(int); - - GGML_ASSERT(shared_mem <= ggml_cuda_info().devices[ggml_cuda_get_device()].smpb); - - if (order == GGML_SORT_ORDER_ASC) { - k_argsort_f32_i32<<>>(x, dst, ncols, ncols_pad); - } else if (order == GGML_SORT_ORDER_DESC) { - k_argsort_f32_i32<<>>(x, dst, ncols, ncols_pad); - } else { - GGML_ASSERT(false); - } -} - -void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * src0 = dst->src[0]; - const float * src0_d = (const float *)src0->data; - float * dst_d = (float *)dst->data; - cudaStream_t stream = ctx.stream(); - - GGML_ASSERT(src0->type == GGML_TYPE_F32); - GGML_ASSERT( dst->type == GGML_TYPE_I32); - GGML_ASSERT(ggml_is_contiguous(src0)); - - const int64_t ncols = src0->ne[0]; - const int64_t nrows = ggml_nrows(src0); - - enum ggml_sort_order order = (enum ggml_sort_order) dst->op_params[0]; - - argsort_f32_i32_cuda(src0_d, (int *)dst_d, ncols, nrows, order, stream); -} +#include "argsort.cuh" + +template +static inline __device__ void ggml_cuda_swap(T & a, T & b) { + T tmp = a; + a = b; + b = tmp; +} + +template +static __global__ void k_argsort_f32_i32(const float * x, int * dst, const int ncols, int ncols_pad) { + // bitonic sort + int col = threadIdx.x; + int row = blockIdx.y; + + if (col >= ncols_pad) { + return; + } + + const float * x_row = x + row * ncols; + extern __shared__ int dst_row[]; + + // initialize indices + dst_row[col] = col; + + __syncthreads(); + + for (int k = 2; k <= ncols_pad; k *= 2) { + for (int j = k / 2; j > 0; j /= 2) { + int ixj = col ^ j; + if (ixj > col) { + if ((col & k) == 0) { + if (dst_row[col] >= ncols || + (dst_row[ixj] < ncols && (order == GGML_SORT_ORDER_ASC ? + x_row[dst_row[col]] > x_row[dst_row[ixj]] : + x_row[dst_row[col]] < x_row[dst_row[ixj]])) + ) { + ggml_cuda_swap(dst_row[col], dst_row[ixj]); + } + } else { + if (dst_row[ixj] >= ncols || + (dst_row[col] < ncols && (order == GGML_SORT_ORDER_ASC ? + x_row[dst_row[col]] < x_row[dst_row[ixj]] : + x_row[dst_row[col]] > x_row[dst_row[ixj]])) + ) { + ggml_cuda_swap(dst_row[col], dst_row[ixj]); + } + } + } + __syncthreads(); + } + } + + // copy the result to dst without the padding + if (col < ncols) { + dst[row * ncols + col] = dst_row[col]; + } +} + +static int next_power_of_2(int x) { + int n = 1; + while (n < x) { + n *= 2; + } + return n; +} + +static void argsort_f32_i32_cuda(const float * x, int * dst, const int ncols, const int nrows, ggml_sort_order order, cudaStream_t stream) { + // bitonic sort requires ncols to be power of 2 + const int ncols_pad = next_power_of_2(ncols); + + const dim3 block_dims(ncols_pad, 1, 1); + const dim3 block_nums(1, nrows, 1); + const size_t shared_mem = ncols_pad * sizeof(int); + + GGML_ASSERT(shared_mem <= ggml_cuda_info().devices[ggml_cuda_get_device()].smpb); + + if (order == GGML_SORT_ORDER_ASC) { + k_argsort_f32_i32<<>>(x, dst, ncols, ncols_pad); + } else if (order == GGML_SORT_ORDER_DESC) { + k_argsort_f32_i32<<>>(x, dst, ncols, ncols_pad); + } else { + GGML_ASSERT(false); + } +} + +void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * src0 = dst->src[0]; + const float * src0_d = (const float *)src0->data; + float * dst_d = (float *)dst->data; + cudaStream_t stream = ctx.stream(); + + GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_I32); + GGML_ASSERT(ggml_is_contiguous(src0)); + + const int64_t ncols = src0->ne[0]; + const int64_t nrows = ggml_nrows(src0); + + enum ggml_sort_order order = (enum ggml_sort_order) dst->op_params[0]; + + argsort_f32_i32_cuda(src0_d, (int *)dst_d, ncols, nrows, order, stream); +} diff --git a/llama/ggml-cuda/argsort.cuh b/llama/ggml-cuda/argsort.cuh index 68a00154..b455d21d 100644 --- a/llama/ggml-cuda/argsort.cuh +++ b/llama/ggml-cuda/argsort.cuh @@ -1,3 +1,3 @@ -#include "common.cuh" - -void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +#include "common.cuh" + +void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/binbcast.cu b/llama/ggml-cuda/binbcast.cu index 19b08b74..9ce4a7ca 100644 --- a/llama/ggml-cuda/binbcast.cu +++ b/llama/ggml-cuda/binbcast.cu @@ -1,280 +1,280 @@ -#include "binbcast.cuh" - -static __device__ __forceinline__ float op_repeat(const float a, const float b) { - return b; - GGML_UNUSED(a); -} - -static __device__ __forceinline__ float op_add(const float a, const float b) { - return a + b; -} - -static __device__ __forceinline__ float op_mul(const float a, const float b) { - return a * b; -} - -static __device__ __forceinline__ float op_div(const float a, const float b) { - return a / b; -} - -template -static __global__ void k_bin_bcast(const src0_t * src0, const src1_t * src1, dst_t * dst, - int ne0, int ne1, int ne2, int ne3, - int ne10, int ne11, int ne12, int ne13, - /*int s0, */ int s1, int s2, int s3, - /*int s00,*/ int s01, int s02, int s03, - /*int s10,*/ int s11, int s12, int s13) { - const int i0s = blockDim.x*blockIdx.x + threadIdx.x; - const int i1 = (blockDim.y*blockIdx.y + threadIdx.y); - const int i2 = (blockDim.z*blockIdx.z + threadIdx.z) / ne3; - const int i3 = (blockDim.z*blockIdx.z + threadIdx.z) % ne3; - - if (i0s >= ne0 || i1 >= ne1 || i2 >= ne2 || i3 >= ne3) { - return; - } - - const int i11 = i1 % ne11; - const int i12 = i2 % ne12; - const int i13 = i3 % ne13; - - const size_t i_src0 = i3*s03 + i2*s02 + i1*s01; - const size_t i_src1 = i13*s13 + i12*s12 + i11*s11; - const size_t i_dst = i3*s3 + i2*s2 + i1*s1; - - const src0_t * src0_row = src0 + i_src0; - const src1_t * src1_row = src1 + i_src1; - dst_t * dst_row = dst + i_dst; - - for (int i0 = i0s; i0 < ne0; i0 += blockDim.x*gridDim.x) { - const int i10 = i0 % ne10; - dst_row[i0] = (dst_t)bin_op(src0 ? (float)src0_row[i0] : 0.0f, (float)src1_row[i10]); - } -} - -template -static __global__ void k_bin_bcast_unravel(const src0_t * src0, const src1_t * src1, dst_t * dst, - int ne0, int ne1, int ne2, int ne3, - int ne10, int ne11, int ne12, int ne13, - /*int s0, */ int s1, int s2, int s3, - /*int s00,*/ int s01, int s02, int s03, - /*int s10,*/ int s11, int s12, int s13) { - - const int i = blockDim.x*blockIdx.x + threadIdx.x; - - const int i3 = i/(ne2*ne1*ne0); - const int i2 = (i/(ne1*ne0)) % ne2; - const int i1 = (i/ne0) % ne1; - const int i0 = i % ne0; - - if (i0 >= ne0 || i1 >= ne1 || i2 >= ne2 || i3 >= ne3) { - return; - } - - const int i11 = i1 % ne11; - const int i12 = i2 % ne12; - const int i13 = i3 % ne13; - - const size_t i_src0 = i3*s03 + i2*s02 + i1*s01; - const size_t i_src1 = i13*s13 + i12*s12 + i11*s11; - const size_t i_dst = i3*s3 + i2*s2 + i1*s1; - - const src0_t * src0_row = src0 + i_src0; - const src1_t * src1_row = src1 + i_src1; - dst_t * dst_row = dst + i_dst; - - const int i10 = i0 % ne10; - dst_row[i0] = (dst_t)bin_op(src0 ? (float)src0_row[i0] : 0.0f, (float)src1_row[i10]); -} - -template -struct bin_bcast_cuda { - template - void operator()(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, - const src0_t * src0_dd, const src1_t * src1_dd, dst_t * dst_dd, - cudaStream_t stream) { - - GGML_TENSOR_BINARY_OP_LOCALS - - int nr0 = ne10/ne0; - int nr1 = ne11/ne1; - int nr2 = ne12/ne2; - int nr3 = ne13/ne3; - - int nr[4] = { nr0, nr1, nr2, nr3 }; - - // collapse dimensions until first broadcast dimension - int64_t cne[] = {ne0, ne1, ne2, ne3}; - int64_t cne0[] = {ne00, ne01, ne02, ne03}; - int64_t cne1[] = {ne10, ne11, ne12, ne13}; - - size_t cnb[] = {nb0, nb1, nb2, nb3}; - size_t cnb0[] = {nb00, nb01, nb02, nb03}; - size_t cnb1[] = {nb10, nb11, nb12, nb13}; - - auto collapse = [](int64_t cne[]) { - cne[0] *= cne[1]; - cne[1] = cne[2]; - cne[2] = cne[3]; - cne[3] = 1; - }; - - auto collapse_nb = [](size_t cnb[], const int64_t cne[]) { - cnb[1] *= cne[1]; - cnb[2] *= cne[2]; - cnb[3] *= cne[3]; - }; - - if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1) && ggml_is_contiguous(dst)) { - for (int i = 0; i < 4; i++) { - if (nr[i] != 1) { - break; - } - if (i > 0) { - collapse_nb(cnb, cne); - collapse_nb(cnb0, cne0); - collapse_nb(cnb1, cne1); - collapse(cne); - collapse(cne0); - collapse(cne1); - } - } - } - - { - int64_t ne0 = cne[0]; - int64_t ne1 = cne[1]; - int64_t ne2 = cne[2]; - int64_t ne3 = cne[3]; - - //int64_t ne00 = cne0[0]; GGML_UNUSED(ne00); - //int64_t ne01 = cne0[1]; GGML_UNUSED(ne01); - //int64_t ne02 = cne0[2]; GGML_UNUSED(ne02); - //int64_t ne03 = cne0[3]; GGML_UNUSED(ne03); - - int64_t ne10 = cne1[0]; - int64_t ne11 = cne1[1]; - int64_t ne12 = cne1[2]; - int64_t ne13 = cne1[3]; - - size_t nb0 = cnb[0]; - size_t nb1 = cnb[1]; - size_t nb2 = cnb[2]; - size_t nb3 = cnb[3]; - - size_t nb00 = cnb0[0]; - size_t nb01 = cnb0[1]; - size_t nb02 = cnb0[2]; - size_t nb03 = cnb0[3]; - - size_t nb10 = cnb1[0]; - size_t nb11 = cnb1[1]; - size_t nb12 = cnb1[2]; - size_t nb13 = cnb1[3]; - - size_t s0 = nb0 / sizeof(dst_t); - size_t s1 = nb1 / sizeof(dst_t); - size_t s2 = nb2 / sizeof(dst_t); - size_t s3 = nb3 / sizeof(dst_t); - - size_t s10 = nb10 / sizeof(src1_t); - size_t s11 = nb11 / sizeof(src1_t); - size_t s12 = nb12 / sizeof(src1_t); - size_t s13 = nb13 / sizeof(src1_t); - - size_t s00 = nb00 / sizeof(src0_t); - size_t s01 = nb01 / sizeof(src0_t); - size_t s02 = nb02 / sizeof(src0_t); - size_t s03 = nb03 / sizeof(src0_t); - - GGML_ASSERT(nb0 % sizeof(dst_t) == 0); - GGML_ASSERT(nb1 % sizeof(dst_t) == 0); - GGML_ASSERT(nb2 % sizeof(dst_t) == 0); - GGML_ASSERT(nb3 % sizeof(dst_t) == 0); - - GGML_ASSERT(nb00 % sizeof(src0_t) == 0); - GGML_ASSERT(nb01 % sizeof(src0_t) == 0); - GGML_ASSERT(nb02 % sizeof(src0_t) == 0); - GGML_ASSERT(nb03 % sizeof(src0_t) == 0); - - GGML_ASSERT(nb10 % sizeof(src1_t) == 0); - GGML_ASSERT(nb11 % sizeof(src1_t) == 0); - GGML_ASSERT(nb12 % sizeof(src1_t) == 0); - GGML_ASSERT(nb13 % sizeof(src1_t) == 0); - - GGML_ASSERT(s0 == 1); - GGML_ASSERT(s00 == 1); - GGML_ASSERT(s10 == 1); - - const int block_size = 128; - - int64_t hne0 = std::max(ne0/2LL, 1LL); - - dim3 block_dims; - block_dims.x = std::min(hne0, block_size); - block_dims.y = std::min(ne1, block_size / block_dims.x); - block_dims.z = std::min(std::min(ne2*ne3, block_size / block_dims.x / block_dims.y), 64U); - - dim3 block_nums( - (hne0 + block_dims.x - 1) / block_dims.x, - (ne1 + block_dims.y - 1) / block_dims.y, - (ne2*ne3 + block_dims.z - 1) / block_dims.z - ); - - if (block_nums.z > 65535) { - // this is the maximum number of blocks in z dimension, fallback to 1D grid kernel - int block_num = (ne0*ne1*ne2*ne3 + block_size - 1) / block_size; - k_bin_bcast_unravel<<>>( - src0_dd, src1_dd, dst_dd, - ne0, ne1, ne2, ne3, - ne10, ne11, ne12, ne13, - /* s0, */ s1, s2, s3, - /* s00, */ s01, s02, s03, - /* s10, */ s11, s12, s13); - } else { - k_bin_bcast<<>>( - src0_dd, src1_dd, dst_dd, - ne0, ne1, ne2, ne3, - ne10, ne11, ne12, ne13, - /* s0, */ s1, s2, s3, - /* s00, */ s01, s02, s03, - /* s10, */ s11, s12, s13); - } - } - } -}; - -template -static void ggml_cuda_op_bin_bcast( - const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, - const void * src0_dd, const void * src1_dd, void * dst_dd, cudaStream_t stream) { - - GGML_ASSERT(src1->type == GGML_TYPE_F32); - - if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { - op()(src0, src1, dst, (const float *)src0_dd, (const float *)src1_dd, (float *)dst_dd, stream); - } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) { - op()(src0, src1, dst, (const half *) src0_dd, (const float *)src1_dd, (half *) dst_dd, stream); - } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F32) { - op()(src0, src1, dst, (const half *) src0_dd, (const float *)src1_dd, (float *)dst_dd, stream); - } else { - fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__, - ggml_type_name(dst->type), ggml_type_name(src0->type), ggml_type_name(src1->type)); - GGML_ASSERT(false); - } -} - -void ggml_cuda_op_repeat(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - ggml_cuda_op_bin_bcast>(dst, dst->src[0], dst, nullptr, dst->src[0]->data, dst->data, ctx.stream()); -} - -void ggml_cuda_op_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - ggml_cuda_op_bin_bcast>(dst->src[0], dst->src[1], dst, dst->src[0]->data, dst->src[1]->data, dst->data, ctx.stream()); -} - -void ggml_cuda_op_mul(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - ggml_cuda_op_bin_bcast>(dst->src[0], dst->src[1], dst, dst->src[0]->data, dst->src[1]->data, dst->data, ctx.stream()); -} - -void ggml_cuda_op_div(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - ggml_cuda_op_bin_bcast>(dst->src[0], dst->src[1], dst, dst->src[0]->data, dst->src[1]->data, dst->data, ctx.stream()); -} +#include "binbcast.cuh" + +static __device__ __forceinline__ float op_repeat(const float a, const float b) { + return b; + GGML_UNUSED(a); +} + +static __device__ __forceinline__ float op_add(const float a, const float b) { + return a + b; +} + +static __device__ __forceinline__ float op_mul(const float a, const float b) { + return a * b; +} + +static __device__ __forceinline__ float op_div(const float a, const float b) { + return a / b; +} + +template +static __global__ void k_bin_bcast(const src0_t * src0, const src1_t * src1, dst_t * dst, + int ne0, int ne1, int ne2, int ne3, + int ne10, int ne11, int ne12, int ne13, + /*int s0, */ int s1, int s2, int s3, + /*int s00,*/ int s01, int s02, int s03, + /*int s10,*/ int s11, int s12, int s13) { + const int i0s = blockDim.x*blockIdx.x + threadIdx.x; + const int i1 = (blockDim.y*blockIdx.y + threadIdx.y); + const int i2 = (blockDim.z*blockIdx.z + threadIdx.z) / ne3; + const int i3 = (blockDim.z*blockIdx.z + threadIdx.z) % ne3; + + if (i0s >= ne0 || i1 >= ne1 || i2 >= ne2 || i3 >= ne3) { + return; + } + + const int i11 = i1 % ne11; + const int i12 = i2 % ne12; + const int i13 = i3 % ne13; + + const size_t i_src0 = i3*s03 + i2*s02 + i1*s01; + const size_t i_src1 = i13*s13 + i12*s12 + i11*s11; + const size_t i_dst = i3*s3 + i2*s2 + i1*s1; + + const src0_t * src0_row = src0 + i_src0; + const src1_t * src1_row = src1 + i_src1; + dst_t * dst_row = dst + i_dst; + + for (int i0 = i0s; i0 < ne0; i0 += blockDim.x*gridDim.x) { + const int i10 = i0 % ne10; + dst_row[i0] = (dst_t)bin_op(src0 ? (float)src0_row[i0] : 0.0f, (float)src1_row[i10]); + } +} + +template +static __global__ void k_bin_bcast_unravel(const src0_t * src0, const src1_t * src1, dst_t * dst, + int ne0, int ne1, int ne2, int ne3, + int ne10, int ne11, int ne12, int ne13, + /*int s0, */ int s1, int s2, int s3, + /*int s00,*/ int s01, int s02, int s03, + /*int s10,*/ int s11, int s12, int s13) { + + const int i = blockDim.x*blockIdx.x + threadIdx.x; + + const int i3 = i/(ne2*ne1*ne0); + const int i2 = (i/(ne1*ne0)) % ne2; + const int i1 = (i/ne0) % ne1; + const int i0 = i % ne0; + + if (i0 >= ne0 || i1 >= ne1 || i2 >= ne2 || i3 >= ne3) { + return; + } + + const int i11 = i1 % ne11; + const int i12 = i2 % ne12; + const int i13 = i3 % ne13; + + const size_t i_src0 = i3*s03 + i2*s02 + i1*s01; + const size_t i_src1 = i13*s13 + i12*s12 + i11*s11; + const size_t i_dst = i3*s3 + i2*s2 + i1*s1; + + const src0_t * src0_row = src0 + i_src0; + const src1_t * src1_row = src1 + i_src1; + dst_t * dst_row = dst + i_dst; + + const int i10 = i0 % ne10; + dst_row[i0] = (dst_t)bin_op(src0 ? (float)src0_row[i0] : 0.0f, (float)src1_row[i10]); +} + +template +struct bin_bcast_cuda { + template + void operator()(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, + const src0_t * src0_dd, const src1_t * src1_dd, dst_t * dst_dd, + cudaStream_t stream) { + + GGML_TENSOR_BINARY_OP_LOCALS + + int nr0 = ne10/ne0; + int nr1 = ne11/ne1; + int nr2 = ne12/ne2; + int nr3 = ne13/ne3; + + int nr[4] = { nr0, nr1, nr2, nr3 }; + + // collapse dimensions until first broadcast dimension + int64_t cne[] = {ne0, ne1, ne2, ne3}; + int64_t cne0[] = {ne00, ne01, ne02, ne03}; + int64_t cne1[] = {ne10, ne11, ne12, ne13}; + + size_t cnb[] = {nb0, nb1, nb2, nb3}; + size_t cnb0[] = {nb00, nb01, nb02, nb03}; + size_t cnb1[] = {nb10, nb11, nb12, nb13}; + + auto collapse = [](int64_t cne[]) { + cne[0] *= cne[1]; + cne[1] = cne[2]; + cne[2] = cne[3]; + cne[3] = 1; + }; + + auto collapse_nb = [](size_t cnb[], const int64_t cne[]) { + cnb[1] *= cne[1]; + cnb[2] *= cne[2]; + cnb[3] *= cne[3]; + }; + + if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1) && ggml_is_contiguous(dst)) { + for (int i = 0; i < 4; i++) { + if (nr[i] != 1) { + break; + } + if (i > 0) { + collapse_nb(cnb, cne); + collapse_nb(cnb0, cne0); + collapse_nb(cnb1, cne1); + collapse(cne); + collapse(cne0); + collapse(cne1); + } + } + } + + { + int64_t ne0 = cne[0]; + int64_t ne1 = cne[1]; + int64_t ne2 = cne[2]; + int64_t ne3 = cne[3]; + + //int64_t ne00 = cne0[0]; GGML_UNUSED(ne00); + //int64_t ne01 = cne0[1]; GGML_UNUSED(ne01); + //int64_t ne02 = cne0[2]; GGML_UNUSED(ne02); + //int64_t ne03 = cne0[3]; GGML_UNUSED(ne03); + + int64_t ne10 = cne1[0]; + int64_t ne11 = cne1[1]; + int64_t ne12 = cne1[2]; + int64_t ne13 = cne1[3]; + + size_t nb0 = cnb[0]; + size_t nb1 = cnb[1]; + size_t nb2 = cnb[2]; + size_t nb3 = cnb[3]; + + size_t nb00 = cnb0[0]; + size_t nb01 = cnb0[1]; + size_t nb02 = cnb0[2]; + size_t nb03 = cnb0[3]; + + size_t nb10 = cnb1[0]; + size_t nb11 = cnb1[1]; + size_t nb12 = cnb1[2]; + size_t nb13 = cnb1[3]; + + size_t s0 = nb0 / sizeof(dst_t); + size_t s1 = nb1 / sizeof(dst_t); + size_t s2 = nb2 / sizeof(dst_t); + size_t s3 = nb3 / sizeof(dst_t); + + size_t s10 = nb10 / sizeof(src1_t); + size_t s11 = nb11 / sizeof(src1_t); + size_t s12 = nb12 / sizeof(src1_t); + size_t s13 = nb13 / sizeof(src1_t); + + size_t s00 = nb00 / sizeof(src0_t); + size_t s01 = nb01 / sizeof(src0_t); + size_t s02 = nb02 / sizeof(src0_t); + size_t s03 = nb03 / sizeof(src0_t); + + GGML_ASSERT(nb0 % sizeof(dst_t) == 0); + GGML_ASSERT(nb1 % sizeof(dst_t) == 0); + GGML_ASSERT(nb2 % sizeof(dst_t) == 0); + GGML_ASSERT(nb3 % sizeof(dst_t) == 0); + + GGML_ASSERT(nb00 % sizeof(src0_t) == 0); + GGML_ASSERT(nb01 % sizeof(src0_t) == 0); + GGML_ASSERT(nb02 % sizeof(src0_t) == 0); + GGML_ASSERT(nb03 % sizeof(src0_t) == 0); + + GGML_ASSERT(nb10 % sizeof(src1_t) == 0); + GGML_ASSERT(nb11 % sizeof(src1_t) == 0); + GGML_ASSERT(nb12 % sizeof(src1_t) == 0); + GGML_ASSERT(nb13 % sizeof(src1_t) == 0); + + GGML_ASSERT(s0 == 1); + GGML_ASSERT(s00 == 1); + GGML_ASSERT(s10 == 1); + + const int block_size = 128; + + int64_t hne0 = std::max(ne0/2LL, 1LL); + + dim3 block_dims; + block_dims.x = std::min(hne0, block_size); + block_dims.y = std::min(ne1, block_size / block_dims.x); + block_dims.z = std::min(std::min(ne2*ne3, block_size / block_dims.x / block_dims.y), 64U); + + dim3 block_nums( + (hne0 + block_dims.x - 1) / block_dims.x, + (ne1 + block_dims.y - 1) / block_dims.y, + (ne2*ne3 + block_dims.z - 1) / block_dims.z + ); + + if (block_nums.z > 65535) { + // this is the maximum number of blocks in z dimension, fallback to 1D grid kernel + int block_num = (ne0*ne1*ne2*ne3 + block_size - 1) / block_size; + k_bin_bcast_unravel<<>>( + src0_dd, src1_dd, dst_dd, + ne0, ne1, ne2, ne3, + ne10, ne11, ne12, ne13, + /* s0, */ s1, s2, s3, + /* s00, */ s01, s02, s03, + /* s10, */ s11, s12, s13); + } else { + k_bin_bcast<<>>( + src0_dd, src1_dd, dst_dd, + ne0, ne1, ne2, ne3, + ne10, ne11, ne12, ne13, + /* s0, */ s1, s2, s3, + /* s00, */ s01, s02, s03, + /* s10, */ s11, s12, s13); + } + } + } +}; + +template +static void ggml_cuda_op_bin_bcast( + const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, + const void * src0_dd, const void * src1_dd, void * dst_dd, cudaStream_t stream) { + + GGML_ASSERT(src1->type == GGML_TYPE_F32); + + if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { + op()(src0, src1, dst, (const float *)src0_dd, (const float *)src1_dd, (float *)dst_dd, stream); + } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) { + op()(src0, src1, dst, (const half *) src0_dd, (const float *)src1_dd, (half *) dst_dd, stream); + } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F32) { + op()(src0, src1, dst, (const half *) src0_dd, (const float *)src1_dd, (float *)dst_dd, stream); + } else { + fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__, + ggml_type_name(dst->type), ggml_type_name(src0->type), ggml_type_name(src1->type)); + GGML_ASSERT(false); + } +} + +void ggml_cuda_op_repeat(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + ggml_cuda_op_bin_bcast>(dst, dst->src[0], dst, nullptr, dst->src[0]->data, dst->data, ctx.stream()); +} + +void ggml_cuda_op_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + ggml_cuda_op_bin_bcast>(dst->src[0], dst->src[1], dst, dst->src[0]->data, dst->src[1]->data, dst->data, ctx.stream()); +} + +void ggml_cuda_op_mul(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + ggml_cuda_op_bin_bcast>(dst->src[0], dst->src[1], dst, dst->src[0]->data, dst->src[1]->data, dst->data, ctx.stream()); +} + +void ggml_cuda_op_div(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + ggml_cuda_op_bin_bcast>(dst->src[0], dst->src[1], dst, dst->src[0]->data, dst->src[1]->data, dst->data, ctx.stream()); +} diff --git a/llama/ggml-cuda/binbcast.cuh b/llama/ggml-cuda/binbcast.cuh index 4f63d637..0e1d3c7c 100644 --- a/llama/ggml-cuda/binbcast.cuh +++ b/llama/ggml-cuda/binbcast.cuh @@ -1,6 +1,6 @@ -#include "common.cuh" - -void ggml_cuda_op_repeat(ggml_backend_cuda_context & ctx, ggml_tensor * dst); -void ggml_cuda_op_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst); -void ggml_cuda_op_mul(ggml_backend_cuda_context & ctx, ggml_tensor * dst); -void ggml_cuda_op_div(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +#include "common.cuh" + +void ggml_cuda_op_repeat(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +void ggml_cuda_op_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +void ggml_cuda_op_mul(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +void ggml_cuda_op_div(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/clamp.cuh b/llama/ggml-cuda/clamp.cuh index 7f9559dd..8cbba12c 100644 --- a/llama/ggml-cuda/clamp.cuh +++ b/llama/ggml-cuda/clamp.cuh @@ -1,5 +1,5 @@ -#include "common.cuh" - -#define CUDA_CLAMP_BLOCK_SIZE 256 - -void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +#include "common.cuh" + +#define CUDA_CLAMP_BLOCK_SIZE 256 + +void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/concat.cuh b/llama/ggml-cuda/concat.cuh index aa506a05..b88a8d07 100644 --- a/llama/ggml-cuda/concat.cuh +++ b/llama/ggml-cuda/concat.cuh @@ -1,5 +1,5 @@ -#include "common.cuh" - -#define CUDA_CONCAT_BLOCK_SIZE 256 - -void ggml_cuda_op_concat(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +#include "common.cuh" + +#define CUDA_CONCAT_BLOCK_SIZE 256 + +void ggml_cuda_op_concat(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/convert.cuh b/llama/ggml-cuda/convert.cuh index 5394be9f..f34d783f 100644 --- a/llama/ggml-cuda/convert.cuh +++ b/llama/ggml-cuda/convert.cuh @@ -1,13 +1,13 @@ -#include "common.cuh" - -#define CUDA_DEQUANTIZE_BLOCK_SIZE 256 - -template -using to_t_cuda_t = void (*)(const void * __restrict__ x, T * __restrict__ y, int64_t k, cudaStream_t stream); - -typedef to_t_cuda_t to_fp32_cuda_t; -typedef to_t_cuda_t to_fp16_cuda_t; - -to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type); - -to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type); +#include "common.cuh" + +#define CUDA_DEQUANTIZE_BLOCK_SIZE 256 + +template +using to_t_cuda_t = void (*)(const void * __restrict__ x, T * __restrict__ y, int64_t k, cudaStream_t stream); + +typedef to_t_cuda_t to_fp32_cuda_t; +typedef to_t_cuda_t to_fp16_cuda_t; + +to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type); + +to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type); diff --git a/llama/ggml-cuda/dequantize.cuh b/llama/ggml-cuda/dequantize.cuh index bd3c2d9d..44eb23f8 100644 --- a/llama/ggml-cuda/dequantize.cuh +++ b/llama/ggml-cuda/dequantize.cuh @@ -1,103 +1,103 @@ -#include "common.cuh" - -static __device__ __forceinline__ void dequantize_q4_0(const void * vx, const int64_t ib, const int iqs, dfloat2 & v){ - const block_q4_0 * x = (const block_q4_0 *) vx; - - const dfloat d = x[ib].d; - - const int vui = x[ib].qs[iqs]; - - v.x = vui & 0xF; - v.y = vui >> 4; - -#ifdef GGML_CUDA_F16 - v = __hsub2(v, {8.0f, 8.0f}); - v = __hmul2(v, {d, d}); -#else - v.x = (v.x - 8.0f) * d; - v.y = (v.y - 8.0f) * d; -#endif // GGML_CUDA_F16 -} - -static __device__ __forceinline__ void dequantize_q4_1(const void * vx, const int64_t ib, const int iqs, dfloat2 & v){ - const block_q4_1 * x = (const block_q4_1 *) vx; - - const dfloat d = __low2half(x[ib].dm); - const dfloat m = __high2half(x[ib].dm); - - const int vui = x[ib].qs[iqs]; - - v.x = vui & 0xF; - v.y = vui >> 4; - -#ifdef GGML_CUDA_F16 - v = __hmul2(v, {d, d}); - v = __hadd2(v, {m, m}); -#else - v.x = (v.x * d) + m; - v.y = (v.y * d) + m; -#endif // GGML_CUDA_F16 -} - -static __device__ __forceinline__ void dequantize_q5_0(const void * vx, const int64_t ib, const int iqs, dfloat2 & v){ - const block_q5_0 * x = (const block_q5_0 *) vx; - - const dfloat d = x[ib].d; - - uint32_t qh; - memcpy(&qh, x[ib].qh, sizeof(qh)); - - const int xh_0 = ((qh >> (iqs + 0)) << 4) & 0x10; - const int xh_1 = ((qh >> (iqs + 12)) ) & 0x10; - - v.x = ((x[ib].qs[iqs] & 0xf) | xh_0); - v.y = ((x[ib].qs[iqs] >> 4) | xh_1); - -#ifdef GGML_CUDA_F16 - v = __hsub2(v, {16.0f, 16.0f}); - v = __hmul2(v, {d, d}); -#else - v.x = (v.x - 16.0f) * d; - v.y = (v.y - 16.0f) * d; -#endif // GGML_CUDA_F16 -} - -static __device__ __forceinline__ void dequantize_q5_1(const void * vx, const int64_t ib, const int iqs, dfloat2 & v){ - const block_q5_1 * x = (const block_q5_1 *) vx; - - const dfloat d = __low2half(x[ib].dm); - const dfloat m = __high2half(x[ib].dm); - - uint32_t qh; - memcpy(&qh, x[ib].qh, sizeof(qh)); - - const int xh_0 = ((qh >> (iqs + 0)) << 4) & 0x10; - const int xh_1 = ((qh >> (iqs + 12)) ) & 0x10; - - v.x = ((x[ib].qs[iqs] & 0xf) | xh_0); - v.y = ((x[ib].qs[iqs] >> 4) | xh_1); - -#ifdef GGML_CUDA_F16 - v = __hmul2(v, {d, d}); - v = __hadd2(v, {m, m}); -#else - v.x = (v.x * d) + m; - v.y = (v.y * d) + m; -#endif // GGML_CUDA_F16 -} - -static __device__ __forceinline__ void dequantize_q8_0(const void * vx, const int64_t ib, const int iqs, dfloat2 & v){ - const block_q8_0 * x = (const block_q8_0 *) vx; - - const dfloat d = x[ib].d; - - v.x = x[ib].qs[iqs + 0]; - v.y = x[ib].qs[iqs + 1]; - -#ifdef GGML_CUDA_F16 - v = __hmul2(v, {d, d}); -#else - v.x *= d; - v.y *= d; -#endif // GGML_CUDA_F16 -} +#include "common.cuh" + +static __device__ __forceinline__ void dequantize_q4_0(const void * vx, const int64_t ib, const int iqs, dfloat2 & v){ + const block_q4_0 * x = (const block_q4_0 *) vx; + + const dfloat d = x[ib].d; + + const int vui = x[ib].qs[iqs]; + + v.x = vui & 0xF; + v.y = vui >> 4; + +#ifdef GGML_CUDA_F16 + v = __hsub2(v, {8.0f, 8.0f}); + v = __hmul2(v, {d, d}); +#else + v.x = (v.x - 8.0f) * d; + v.y = (v.y - 8.0f) * d; +#endif // GGML_CUDA_F16 +} + +static __device__ __forceinline__ void dequantize_q4_1(const void * vx, const int64_t ib, const int iqs, dfloat2 & v){ + const block_q4_1 * x = (const block_q4_1 *) vx; + + const dfloat d = __low2half(x[ib].dm); + const dfloat m = __high2half(x[ib].dm); + + const int vui = x[ib].qs[iqs]; + + v.x = vui & 0xF; + v.y = vui >> 4; + +#ifdef GGML_CUDA_F16 + v = __hmul2(v, {d, d}); + v = __hadd2(v, {m, m}); +#else + v.x = (v.x * d) + m; + v.y = (v.y * d) + m; +#endif // GGML_CUDA_F16 +} + +static __device__ __forceinline__ void dequantize_q5_0(const void * vx, const int64_t ib, const int iqs, dfloat2 & v){ + const block_q5_0 * x = (const block_q5_0 *) vx; + + const dfloat d = x[ib].d; + + uint32_t qh; + memcpy(&qh, x[ib].qh, sizeof(qh)); + + const int xh_0 = ((qh >> (iqs + 0)) << 4) & 0x10; + const int xh_1 = ((qh >> (iqs + 12)) ) & 0x10; + + v.x = ((x[ib].qs[iqs] & 0xf) | xh_0); + v.y = ((x[ib].qs[iqs] >> 4) | xh_1); + +#ifdef GGML_CUDA_F16 + v = __hsub2(v, {16.0f, 16.0f}); + v = __hmul2(v, {d, d}); +#else + v.x = (v.x - 16.0f) * d; + v.y = (v.y - 16.0f) * d; +#endif // GGML_CUDA_F16 +} + +static __device__ __forceinline__ void dequantize_q5_1(const void * vx, const int64_t ib, const int iqs, dfloat2 & v){ + const block_q5_1 * x = (const block_q5_1 *) vx; + + const dfloat d = __low2half(x[ib].dm); + const dfloat m = __high2half(x[ib].dm); + + uint32_t qh; + memcpy(&qh, x[ib].qh, sizeof(qh)); + + const int xh_0 = ((qh >> (iqs + 0)) << 4) & 0x10; + const int xh_1 = ((qh >> (iqs + 12)) ) & 0x10; + + v.x = ((x[ib].qs[iqs] & 0xf) | xh_0); + v.y = ((x[ib].qs[iqs] >> 4) | xh_1); + +#ifdef GGML_CUDA_F16 + v = __hmul2(v, {d, d}); + v = __hadd2(v, {m, m}); +#else + v.x = (v.x * d) + m; + v.y = (v.y * d) + m; +#endif // GGML_CUDA_F16 +} + +static __device__ __forceinline__ void dequantize_q8_0(const void * vx, const int64_t ib, const int iqs, dfloat2 & v){ + const block_q8_0 * x = (const block_q8_0 *) vx; + + const dfloat d = x[ib].d; + + v.x = x[ib].qs[iqs + 0]; + v.y = x[ib].qs[iqs + 1]; + +#ifdef GGML_CUDA_F16 + v = __hmul2(v, {d, d}); +#else + v.x *= d; + v.y *= d; +#endif // GGML_CUDA_F16 +} diff --git a/llama/ggml-cuda/diagmask.cu b/llama/ggml-cuda/diagmask.cu index 4b713ba2..a2ac4a85 100644 --- a/llama/ggml-cuda/diagmask.cu +++ b/llama/ggml-cuda/diagmask.cu @@ -1,40 +1,40 @@ -#include "diagmask.cuh" - -static __global__ void diag_mask_inf_f32(const float * x, float * dst, const int ncols, const int rows_per_channel, const int n_past) { - const int col = blockDim.y*blockIdx.y + threadIdx.y; - const int row = blockDim.x*blockIdx.x + threadIdx.x; - - if (col >= ncols) { - return; - } - - const int i = row*ncols + col; - //dst[i] = col > (n_past + row % rows_per_channel) ? -INFINITY : x[i]; - //dst[i] = x[i] - (col > n_past + row % rows_per_channel) * INT_MAX; // equivalent within rounding error but slightly faster on GPU - dst[i] = x[i] - (col > n_past + row % rows_per_channel) * FLT_MAX; -} - -static void diag_mask_inf_f32_cuda(const float * x, float * dst, const int ncols_x, const int nrows_x, const int rows_per_channel, const int n_past, cudaStream_t stream) { - const dim3 block_dims(1, CUDA_DIAG_MASK_INF_BLOCK_SIZE, 1); - const int block_num_x = (ncols_x + CUDA_DIAG_MASK_INF_BLOCK_SIZE - 1) / CUDA_DIAG_MASK_INF_BLOCK_SIZE; - const dim3 block_nums(nrows_x, block_num_x, 1); - diag_mask_inf_f32<<>>(x, dst, ncols_x, rows_per_channel, n_past); -} - -void ggml_cuda_op_diag_mask_inf(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * src0 = dst->src[0]; - const float * src0_d = (const float *)src0->data; - float * dst_d = (float *)dst->data; - cudaStream_t stream = ctx.stream(); - - GGML_ASSERT(src0->type == GGML_TYPE_F32); - GGML_ASSERT( dst->type == GGML_TYPE_F32); - - const int64_t ne00 = src0->ne[0]; - const int64_t ne01 = src0->ne[1]; - const int nrows0 = ggml_nrows(src0); - - const int n_past = ((int32_t *) dst->op_params)[0]; - - diag_mask_inf_f32_cuda(src0_d, dst_d, ne00, nrows0, ne01, n_past, stream); -} +#include "diagmask.cuh" + +static __global__ void diag_mask_inf_f32(const float * x, float * dst, const int ncols, const int rows_per_channel, const int n_past) { + const int col = blockDim.y*blockIdx.y + threadIdx.y; + const int row = blockDim.x*blockIdx.x + threadIdx.x; + + if (col >= ncols) { + return; + } + + const int i = row*ncols + col; + //dst[i] = col > (n_past + row % rows_per_channel) ? -INFINITY : x[i]; + //dst[i] = x[i] - (col > n_past + row % rows_per_channel) * INT_MAX; // equivalent within rounding error but slightly faster on GPU + dst[i] = x[i] - (col > n_past + row % rows_per_channel) * FLT_MAX; +} + +static void diag_mask_inf_f32_cuda(const float * x, float * dst, const int ncols_x, const int nrows_x, const int rows_per_channel, const int n_past, cudaStream_t stream) { + const dim3 block_dims(1, CUDA_DIAG_MASK_INF_BLOCK_SIZE, 1); + const int block_num_x = (ncols_x + CUDA_DIAG_MASK_INF_BLOCK_SIZE - 1) / CUDA_DIAG_MASK_INF_BLOCK_SIZE; + const dim3 block_nums(nrows_x, block_num_x, 1); + diag_mask_inf_f32<<>>(x, dst, ncols_x, rows_per_channel, n_past); +} + +void ggml_cuda_op_diag_mask_inf(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * src0 = dst->src[0]; + const float * src0_d = (const float *)src0->data; + float * dst_d = (float *)dst->data; + cudaStream_t stream = ctx.stream(); + + GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_F32); + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int nrows0 = ggml_nrows(src0); + + const int n_past = ((int32_t *) dst->op_params)[0]; + + diag_mask_inf_f32_cuda(src0_d, dst_d, ne00, nrows0, ne01, n_past, stream); +} diff --git a/llama/ggml-cuda/diagmask.cuh b/llama/ggml-cuda/diagmask.cuh index 6cdbef17..b7da8f89 100644 --- a/llama/ggml-cuda/diagmask.cuh +++ b/llama/ggml-cuda/diagmask.cuh @@ -1,5 +1,5 @@ -#include "common.cuh" - -#define CUDA_DIAG_MASK_INF_BLOCK_SIZE 32 - -void ggml_cuda_op_diag_mask_inf(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +#include "common.cuh" + +#define CUDA_DIAG_MASK_INF_BLOCK_SIZE 32 + +void ggml_cuda_op_diag_mask_inf(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/dmmv.cuh b/llama/ggml-cuda/dmmv.cuh index 4c5ebd47..85329e7c 100644 --- a/llama/ggml-cuda/dmmv.cuh +++ b/llama/ggml-cuda/dmmv.cuh @@ -1,18 +1,18 @@ -#include "common.cuh" - -// dmmv = dequantize_mul_mat_vec - -// TODO: remove this? -#ifndef GGML_CUDA_DMMV_X -#define GGML_CUDA_DMMV_X 32 -#endif - -#ifndef GGML_CUDA_MMV_Y -#define GGML_CUDA_MMV_Y 1 -#endif - -void ggml_cuda_op_dequantize_mul_mat_vec( - ggml_backend_cuda_context & ctx, - const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i, - const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols, - const int64_t src1_padded_row_size, cudaStream_t stream); +#include "common.cuh" + +// dmmv = dequantize_mul_mat_vec + +// TODO: remove this? +#ifndef GGML_CUDA_DMMV_X +#define GGML_CUDA_DMMV_X 32 +#endif + +#ifndef GGML_CUDA_MMV_Y +#define GGML_CUDA_MMV_Y 1 +#endif + +void ggml_cuda_op_dequantize_mul_mat_vec( + ggml_backend_cuda_context & ctx, + const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i, + const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols, + const int64_t src1_padded_row_size, cudaStream_t stream); diff --git a/llama/ggml-cuda/fattn-vec-f16.cu b/llama/ggml-cuda/fattn-vec-f16.cu deleted file mode 100644 index 54e1ac5d..00000000 --- a/llama/ggml-cuda/fattn-vec-f16.cu +++ /dev/null @@ -1,326 +0,0 @@ -#include "common.cuh" -#include "fattn-common.cuh" -#include "fattn-vec-f16.cuh" - -template // D == head size -#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) -__launch_bounds__(D, 1) -#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) -static __global__ void flash_attn_vec_ext_f16( - const char * __restrict__ Q, - const char * __restrict__ K, - const char * __restrict__ V, - const char * __restrict__ mask, - float * __restrict__ dst, - float2 * __restrict__ dst_meta, - const float scale, - const float max_bias, - const float m0, - const float m1, - const uint32_t n_head_log2, - const int ne00, - const int ne01, - const int ne02, - const int ne03, - const int ne10, - const int ne11, - const int ne12, - const int ne13, - const int ne31, - const int nb31, - const int nb01, - const int nb02, - const int nb03, - const int nb11, - const int nb12, - const int nb13, - const int ne0, - const int ne1, - const int ne2, - const int ne3) { -#if FP16_AVAILABLE - //In this kernel Q, K, V are matrices while i, j, k are matrix indices. - - const int ic0 = (blockIdx.x / parallel_blocks) * ncols; // Index of the Q/QKV column to work on. - const int ip = blockIdx.x % parallel_blocks; // Index in group of blocks running for the same column in parallel. - - const int gqa_ratio = ne02 / ne12; // With grouped query attention there are > 1 Q matrices per K, V matrix. - const float2 * Q_f2 = (const float2 *) (Q + nb02* blockIdx.y + nb01*ic0); - const half2 * K_h2 = (const half2 *) (K + nb12*(blockIdx.y / gqa_ratio)); - const half * V_h = (const half *) (V + nb12*(blockIdx.y / gqa_ratio)); // K and V have same shape - const half * maskh = (const half *) mask + ne11*ic0; - - const int stride_KV = nb11 / sizeof(half); - const int stride_KV2 = nb11 / sizeof(half2); - - const float slopef = get_alibi_slope(max_bias, blockIdx.y, n_head_log2, m0, m1); - const half slopeh = __float2half(slopef); - - static_assert(D % (2*WARP_SIZE) == 0, "D not divisible by 2*WARP_SIZE == 64."); - constexpr int nwarps = D / WARP_SIZE; - const int tid = WARP_SIZE*threadIdx.y + threadIdx.x; - __builtin_assume(tid < D); - - __shared__ half KQ[ncols*D]; -#pragma unroll - for (int j = 0; j < ncols; ++j) { - KQ[j*D + tid] = -HALF_MAX_HALF; - } - half2 * KQ2 = (half2 *) KQ; - - half kqmax[ncols]; -#pragma unroll - for (int j = 0; j < ncols; ++j) { - kqmax[j] = -HALF_MAX_HALF; - } - half kqsum[ncols] = {0.0f}; - - __shared__ half kqmax_shared[ncols][WARP_SIZE]; - __shared__ half kqsum_shared[ncols][WARP_SIZE]; -#pragma unroll - for (int j = 0; j < ncols; ++j) { - if (threadIdx.y == 0) { - kqmax_shared[j][threadIdx.x] = -HALF_MAX_HALF; - kqsum_shared[j][threadIdx.x] = 0.0f; - } - } - __syncthreads(); - - // Convert Q to half2 and store in registers: - half2 Q_h2[ncols][D/(2*WARP_SIZE)]; -#pragma unroll - for (int j = 0; j < ncols; ++j) { -#pragma unroll - for (int i0 = 0; i0 < D/2; i0 += WARP_SIZE) { - const int i = i0 + threadIdx.x; - - const float2 tmp = Q_f2[j*(nb01/sizeof(float2)) + i]; - Q_h2[j][i0/WARP_SIZE] = make_half2(scale, scale) * make_half2(tmp.x, tmp.y); - } - } - - half2 VKQ[ncols] = {{0.0f, 0.0f}}; - - const int k_start = parallel_blocks == 1 ? 0 : ip*D; - for (int k_VKQ_0 = k_start; k_VKQ_0 < ne11; k_VKQ_0 += parallel_blocks*D) { - // Calculate KQ tile and keep track of new maximum KQ values: - - // For unknown reasons using a half array of size 1 for kqmax_new causes a performance regression, - // see https://github.com/ggerganov/llama.cpp/pull/7061 . - // Therefore this variable is defined twice but only used once (so that the compiler can optimize out the unused variable). - half kqmax_new = kqmax[0]; - half kqmax_new_arr[ncols]; -#pragma unroll - for (int j = 0; j < ncols; ++j) { - kqmax_new_arr[j] = kqmax[j]; - } - -#pragma unroll - for (int i_KQ_0 = 0; i_KQ_0 < D; i_KQ_0 += nwarps) { - const int i_KQ = i_KQ_0 + threadIdx.y; - - if ((i_KQ_0 + nwarps > D && i_KQ >= D) || (FATTN_KQ_STRIDE % D != 0 && k_VKQ_0 + i_KQ >= ne11)) { - break; - } - - half2 sum2[ncols] = {{0.0f, 0.0f}}; -#pragma unroll - for (int k_KQ_0 = 0; k_KQ_0 < D/2; k_KQ_0 += WARP_SIZE) { - const int k_KQ = k_KQ_0 + threadIdx.x; - - const half2 K_ik = K_h2[(k_VKQ_0 + i_KQ)*stride_KV2 + k_KQ]; -#pragma unroll - for (int j = 0; j < ncols; ++j) { - sum2[j] += K_ik * Q_h2[j][k_KQ_0/WARP_SIZE]; - } - } - -#pragma unroll - for (int j = 0; j < ncols; ++j) { - sum2[j] = warp_reduce_sum(sum2[j]); - half sum = __low2half(sum2[j]) + __high2half(sum2[j]); - sum += mask ? slopeh*maskh[j*ne11 + k_VKQ_0 + i_KQ] : __float2half(0.0f); - - if (ncols == 1) { - kqmax_new = ggml_cuda_hmax(kqmax_new, sum); - } else { - kqmax_new_arr[j] = ggml_cuda_hmax(kqmax_new_arr[j], sum); - } - - if (threadIdx.x == 0) { - KQ[j*D + i_KQ] = sum; - } - } - } - -#pragma unroll - for (int j = 0; j < ncols; ++j) { - half kqmax_new_j = ncols == 1 ? kqmax_new : kqmax_new_arr[j]; - - kqmax_new_j = warp_reduce_max(kqmax_new_j); - if (threadIdx.x == 0) { - kqmax_shared[j][threadIdx.y] = kqmax_new_j; - } - } - - __syncthreads(); - -#pragma unroll - for (int j = 0; j < ncols; ++j) { - half kqmax_new_j = kqmax_shared[j][threadIdx.x]; - kqmax_new_j = warp_reduce_max(kqmax_new_j); - - const half KQ_max_scale = hexp(kqmax[j] - kqmax_new_j); - kqmax[j] = kqmax_new_j; - - const half val = hexp(KQ[j*D + tid] - kqmax[j]); - kqsum[j] = kqsum[j]*KQ_max_scale + val; - KQ[j*D + tid] = val; - - VKQ[j] *= __half2half2(KQ_max_scale); - } - - __syncthreads(); - -#pragma unroll - for (int k0 = 0; k0 < D; k0 += 2) { - if (FATTN_KQ_STRIDE % D != 0 && k_VKQ_0 + k0 >= ne11) { - break; - } - - half2 V_k; - reinterpret_cast(V_k.x) = V_h[(k_VKQ_0 + k0 + 0)*stride_KV + tid]; - reinterpret_cast(V_k.y) = V_h[(k_VKQ_0 + k0 + 1)*stride_KV + tid]; -#pragma unroll - for (int j = 0; j < ncols; ++j) { - VKQ[j] += V_k*KQ2[j*(D/2) + k0/2]; - } - } - - __syncthreads(); - } - -#pragma unroll - for (int j = 0; j < ncols; ++j) { - kqsum[j] = warp_reduce_sum(kqsum[j]); - if (threadIdx.x == 0) { - kqsum_shared[j][threadIdx.y] = kqsum[j]; - } - } - - __syncthreads(); - -#pragma unroll - for (int j_VKQ = 0; j_VKQ < ncols; ++j_VKQ) { - kqsum[j_VKQ] = kqsum_shared[j_VKQ][threadIdx.x]; - kqsum[j_VKQ] = warp_reduce_sum(kqsum[j_VKQ]); - - half dst_val = (__low2half(VKQ[j_VKQ]) + __high2half(VKQ[j_VKQ])); - if (parallel_blocks == 1) { - dst_val /= kqsum[j_VKQ]; - } - const int j_dst = (ic0 + j_VKQ)*parallel_blocks + ip; - dst[j_dst*D*gridDim.y + D*blockIdx.y + tid] = dst_val; - } - - if (parallel_blocks != 1 && tid < ncols) { - dst_meta[(ic0 + tid)*gridDim.y*parallel_blocks + blockIdx.y*parallel_blocks + ip] = make_float2(kqmax[tid], kqsum[tid]); - } -#else - NO_DEVICE_CODE; -#endif // FP16_AVAILABLE -} - -void ggml_cuda_flash_attn_ext_vec_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - ggml_tensor * KQV = dst; - ggml_tensor * Q = dst->src[0]; - - const int32_t precision = KQV->op_params[2]; - GGML_ASSERT(precision == GGML_PREC_DEFAULT); - - constexpr int cols_per_block = 1; - constexpr int parallel_blocks = 4; - switch (Q->ne[0]) { - case 64: { - constexpr int D = 64; - constexpr int nwarps = D/WARP_SIZE; - fattn_kernel_t fattn_kernel = flash_attn_vec_ext_f16; - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block); - } break; - case 128: { - constexpr int D = 128; - constexpr int nwarps = D/WARP_SIZE; - fattn_kernel_t fattn_kernel = flash_attn_vec_ext_f16; - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block); - } break; - case 256: { - constexpr int D = 256; - constexpr int nwarps = D/WARP_SIZE; - fattn_kernel_t fattn_kernel = flash_attn_vec_ext_f16; - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block); - } break; - default: - GGML_ASSERT(false); - break; - } -} - -template -void launch_fattn_vec_f16_64_128(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * Q = dst->src[0]; - switch (Q->ne[0]) { - case 64: { - constexpr int D = 64; - constexpr int nwarps = D/WARP_SIZE; - fattn_kernel_t fattn_kernel = flash_attn_vec_ext_f16; - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block); - } break; - case 128: { - constexpr int D = 128; - constexpr int nwarps = D/WARP_SIZE; - fattn_kernel_t fattn_kernel = flash_attn_vec_ext_f16; - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block); - } break; - default: { - GGML_ASSERT(false && "FlashAttention without tensor cores only supports head sizes 64 and 128."); - } break; - } -} - -void ggml_cuda_flash_attn_ext_vec_f16_no_mma(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * KQV = dst; - const ggml_tensor * Q = dst->src[0]; - - const int32_t precision = KQV->op_params[2]; - GGML_ASSERT(precision == GGML_PREC_DEFAULT); - - if (Q->ne[1] == 1) { - ggml_cuda_flash_attn_ext_vec_f16(ctx, dst); - return; - } - - if (Q->ne[1] == 2) { - constexpr int cols_per_block = 2; - constexpr int parallel_blocks = 4; - launch_fattn_vec_f16_64_128(ctx, dst); - return; - } - - if (Q->ne[1] <= 4) { - constexpr int cols_per_block = 4; - constexpr int parallel_blocks = 4; - launch_fattn_vec_f16_64_128(ctx, dst); - return; - } - - if (Q->ne[1] <= 8) { - constexpr int cols_per_block = 8; - constexpr int parallel_blocks = 4; - launch_fattn_vec_f16_64_128(ctx, dst); - return; - } - - constexpr int cols_per_block = 8; - constexpr int parallel_blocks = 1; - launch_fattn_vec_f16_64_128(ctx, dst); -} diff --git a/llama/ggml-cuda/fattn-vec-f32.cu b/llama/ggml-cuda/fattn-vec-f32.cu deleted file mode 100644 index 5bcabd09..00000000 --- a/llama/ggml-cuda/fattn-vec-f32.cu +++ /dev/null @@ -1,275 +0,0 @@ -#include "common.cuh" -#include "fattn-common.cuh" -#include "fattn-vec-f32.cuh" - -template // D == head size -#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) -__launch_bounds__(D, 1) -#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) -static __global__ void flash_attn_vec_ext_f32( - const char * __restrict__ Q, - const char * __restrict__ K, - const char * __restrict__ V, - const char * __restrict__ mask, - float * __restrict__ dst, - float2 * __restrict__ dst_meta, - const float scale, - const float max_bias, - const float m0, - const float m1, - const uint32_t n_head_log2, - const int ne00, - const int ne01, - const int ne02, - const int ne03, - const int ne10, - const int ne11, - const int ne12, - const int ne13, - const int ne31, - const int nb31, - const int nb01, - const int nb02, - const int nb03, - const int nb11, - const int nb12, - const int nb13, - const int ne0, - const int ne1, - const int ne2, - const int ne3) { - //In this kernel Q, K, V are matrices while i, j, k are matrix indices. - - const int ic0 = (blockIdx.x / parallel_blocks) * ncols; // Index of the Q/QKV column to work on. - const int ip = blockIdx.x % parallel_blocks; // Index in group of blocks running for the same column in parallel. - - const int gqa_ratio = ne02 / ne12; // With grouped query attention there are > 1 Q matrices per K, V matrix. - const float2 * Q_f2 = (const float2 *) (Q + nb02* blockIdx.y + nb01*ic0); - const half2 * K_h2 = (const half2 *) (K + nb12*(blockIdx.y / gqa_ratio)); - const half * V_h = (const half *) (V + nb12*(blockIdx.y / gqa_ratio)); // K and V have same shape - const half * maskh = (const half *) mask + ne11*ic0; - - const int stride_KV = nb11 / sizeof(half); - const int stride_KV2 = nb11 / sizeof(half2); - - const float slope = get_alibi_slope(max_bias, blockIdx.y, n_head_log2, m0, m1); - - static_assert(D % (2*WARP_SIZE) == 0, "D not divisible by 2*WARP_SIZE == 64."); - constexpr int nwarps = D / WARP_SIZE; - const int tid = WARP_SIZE*threadIdx.y + threadIdx.x; - __builtin_assume(tid < D); - - __shared__ float KQ[ncols*D]; -#pragma unroll - for (int j = 0; j < ncols; ++j) { - KQ[j*D + tid] = -FLT_MAX/2.0f; - } - - float kqmax[ncols]; -#pragma unroll - for (int j = 0; j < ncols; ++j) { - kqmax[j] = -FLT_MAX/2.0f; - } - float kqsum[ncols] = {0.0f}; - - __shared__ float kqmax_shared[ncols][WARP_SIZE]; - __shared__ float kqsum_shared[ncols][WARP_SIZE]; -#pragma unroll - for (int j = 0; j < ncols; ++j) { - if (threadIdx.y == 0) { - kqmax_shared[j][threadIdx.x] = -FLT_MAX/2.0f; - kqsum_shared[j][threadIdx.x] = 0.0f; - } - } - __syncthreads(); - - // Convert Q to half2 and store in registers: - float2 Q_h2[ncols][D/(2*WARP_SIZE)]; -#pragma unroll - for (int j = 0; j < ncols; ++j) { -#pragma unroll - for (int i0 = 0; i0 < D/2; i0 += WARP_SIZE) { - const int i = i0 + threadIdx.x; - - Q_h2[j][i0/WARP_SIZE] = Q_f2[j*(nb01/sizeof(float2)) + i]; - Q_h2[j][i0/WARP_SIZE].x *= scale; - Q_h2[j][i0/WARP_SIZE].y *= scale; - } - } - - float VKQ[ncols] = {0.0f}; - - const int k_start = parallel_blocks == 1 ? 0 : ip*D; - for (int k_VKQ_0 = k_start; k_VKQ_0 < ne11; k_VKQ_0 += parallel_blocks*D) { - // Calculate KQ tile and keep track of new maximum KQ values: - - float kqmax_new_arr[ncols]; -#pragma unroll - for (int j = 0; j < ncols; ++j) { - kqmax_new_arr[j] = kqmax[j]; - } - -#pragma unroll - for (int i_KQ_0 = 0; i_KQ_0 < D; i_KQ_0 += nwarps) { - const int i_KQ = i_KQ_0 + threadIdx.y; - - if ((i_KQ_0 + nwarps > D && i_KQ >= D) || (FATTN_KQ_STRIDE % D != 0 && k_VKQ_0 + i_KQ >= ne11)) { - break; - } - - float sum[ncols] = {0.0f}; -#pragma unroll - for (int k_KQ_0 = 0; k_KQ_0 < D/2; k_KQ_0 += WARP_SIZE) { - const int k_KQ = k_KQ_0 + threadIdx.x; - - const half2 K_ik = K_h2[(k_VKQ_0 + i_KQ)*stride_KV2 + k_KQ]; -#pragma unroll - for (int j = 0; j < ncols; ++j) { - sum[j] += __low2float(K_ik) * Q_h2[j][k_KQ_0/WARP_SIZE].x; - sum[j] += __high2float(K_ik) * Q_h2[j][k_KQ_0/WARP_SIZE].y; - } - } - -#pragma unroll - for (int j = 0; j < ncols; ++j) { - sum[j] = warp_reduce_sum(sum[j]); - sum[j] += mask ? slope*__half2float(maskh[j*ne11 + k_VKQ_0 + i_KQ]) : 0.0f; - - kqmax_new_arr[j] = fmaxf(kqmax_new_arr[j], sum[j]); - - if (threadIdx.x == 0) { - KQ[j*D + i_KQ] = sum[j]; - } - } - } - -#pragma unroll - for (int j = 0; j < ncols; ++j) { - float kqmax_new_j = kqmax_new_arr[j]; - - kqmax_new_j = warp_reduce_max(kqmax_new_j); - if (threadIdx.x == 0) { - kqmax_shared[j][threadIdx.y] = kqmax_new_j; - } - } - - __syncthreads(); - -#pragma unroll - for (int j = 0; j < ncols; ++j) { - float kqmax_new_j = kqmax_shared[j][threadIdx.x]; - kqmax_new_j = warp_reduce_max(kqmax_new_j); - - const float KQ_max_scale = expf(kqmax[j] - kqmax_new_j); - kqmax[j] = kqmax_new_j; - - const float val = expf(KQ[j*D + tid] - kqmax[j]); - kqsum[j] = kqsum[j]*KQ_max_scale + val; - KQ[j*D + tid] = val; - - VKQ[j] *= KQ_max_scale; - } - - __syncthreads(); - -#pragma unroll - for (int k = 0; k < D; ++k) { - if (FATTN_KQ_STRIDE % D != 0 && k_VKQ_0 + k >= ne11) { - break; - } - - const float V_ki = __half2float(V_h[(k_VKQ_0 + k)*stride_KV + tid]); -#pragma unroll - for (int j = 0; j < ncols; ++j) { - VKQ[j] += V_ki*KQ[j*D + k]; - } - } - - __syncthreads(); - } - -#pragma unroll - for (int j = 0; j < ncols; ++j) { - kqsum[j] = warp_reduce_sum(kqsum[j]); - if (threadIdx.x == 0) { - kqsum_shared[j][threadIdx.y] = kqsum[j]; - } - } - - __syncthreads(); - -#pragma unroll - for (int j_VKQ = 0; j_VKQ < ncols; ++j_VKQ) { - kqsum[j_VKQ] = kqsum_shared[j_VKQ][threadIdx.x]; - kqsum[j_VKQ] = warp_reduce_sum(kqsum[j_VKQ]); - - float dst_val = VKQ[j_VKQ]; - if (parallel_blocks == 1) { - dst_val /= kqsum[j_VKQ]; - } - const int j_dst = (ic0 + j_VKQ)*parallel_blocks + ip; - dst[j_dst*D*gridDim.y + D*blockIdx.y + tid] = dst_val; - } - - if (parallel_blocks != 1 && tid < ncols) { - dst_meta[(ic0 + tid)*gridDim.y*parallel_blocks + blockIdx.y*parallel_blocks + ip] = make_float2(kqmax[tid], kqsum[tid]); - } -} - -template -void launch_fattn_vec_f32_64_128(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * Q = dst->src[0]; - switch (Q->ne[0]) { - case 64: { - constexpr int D = 64; - constexpr int nwarps = D/WARP_SIZE; - fattn_kernel_t fattn_kernel = flash_attn_vec_ext_f32; - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block); - } break; - case 128: { - constexpr int D = 128; - constexpr int nwarps = D/WARP_SIZE; - fattn_kernel_t fattn_kernel = flash_attn_vec_ext_f32; - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block); - } break; - default: { - GGML_ASSERT(false && "FlashAttention without tensor cores only supports head sizes 64 and 128."); - } break; - } -} - -void ggml_cuda_flash_attn_ext_vec_f32(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * Q = dst->src[0]; - - if (Q->ne[1] == 1) { - constexpr int cols_per_block = 1; - constexpr int parallel_blocks = 4; - launch_fattn_vec_f32_64_128(ctx, dst); - return; - } - - if (Q->ne[1] == 2) { - constexpr int cols_per_block = 2; - constexpr int parallel_blocks = 4; - launch_fattn_vec_f32_64_128(ctx, dst); - return; - } - - if (Q->ne[1] <= 4) { - constexpr int cols_per_block = 4; - constexpr int parallel_blocks = 4; - launch_fattn_vec_f32_64_128(ctx, dst); - return; - } - - if (Q->ne[1] <= 8) { - constexpr int cols_per_block = 8; - constexpr int parallel_blocks = 4; - launch_fattn_vec_f32_64_128(ctx, dst); - return; - } - - constexpr int cols_per_block = 8; - constexpr int parallel_blocks = 1; - launch_fattn_vec_f32_64_128(ctx, dst); -} diff --git a/llama/ggml-cuda/getrows.cu b/llama/ggml-cuda/getrows.cu index 55af195f..7f8091bc 100644 --- a/llama/ggml-cuda/getrows.cu +++ b/llama/ggml-cuda/getrows.cu @@ -1,178 +1,178 @@ -#include "getrows.cuh" -#include "dequantize.cuh" - -template -static __global__ void k_get_rows( - const void * src0, const int32_t * src1, dst_t * dst, - int64_t ne00, /*int64_t ne01, int64_t ne02, int64_t ne03,*/ - /*int64_t ne10, int64_t ne11,*/ int64_t ne12, /*int64_t ne13,*/ - /*size_t s0,*/ size_t s1, size_t s2, size_t s3, - /*size_t nb00,*/ size_t nb01, size_t nb02, size_t nb03, - size_t s10, size_t s11, size_t s12/*, size_t s13*/) { - - const int i00 = (blockIdx.x*blockDim.x + threadIdx.x)*2; - const int i10 = blockDim.y*blockIdx.y + threadIdx.y; - const int i11 = (blockIdx.z*blockDim.z + threadIdx.z)/ne12; - const int i12 = (blockIdx.z*blockDim.z + threadIdx.z)%ne12; - - if (i00 >= ne00) { - return; - } - - const int i01 = src1[i10*s10 + i11*s11 + i12*s12]; - - dst_t * dst_row = dst + i10*s1 + i11*s2 + i12*s3; - const void * src0_row = (const char *)src0 + i01*nb01 + i11*nb02 + i12*nb03; - - const int ib = i00/qk; // block index - const int iqs = (i00%qk)/qr; // quant index - const int iybs = i00 - i00%qk; // dst block start index - const int y_offset = qr == 1 ? 1 : qk/2; - - // dequantize - dfloat2 v; - dequantize_kernel(src0_row, ib, iqs, v); - - dst_row[iybs + iqs + 0] = v.x; - dst_row[iybs + iqs + y_offset] = v.y; -} - -template -static __global__ void k_get_rows_float( - const src0_t * src0, const int32_t * src1, dst_t * dst, - int64_t ne00, /*int64_t ne01, int64_t ne02, int64_t ne03,*/ - /*int64_t ne10, int64_t ne11,*/ int64_t ne12, /*int64_t ne13,*/ - /*size_t s0,*/ size_t s1, size_t s2, size_t s3, - /*size_t nb00,*/ size_t nb01, size_t nb02, size_t nb03, - size_t s10, size_t s11, size_t s12/*, size_t s13*/) { - - const int i00 = blockIdx.x*blockDim.x + threadIdx.x; - const int i10 = blockDim.y*blockIdx.y + threadIdx.y; - const int i11 = (blockIdx.z*blockDim.z + threadIdx.z)/ne12; - const int i12 = (blockIdx.z*blockDim.z + threadIdx.z)%ne12; - - if (i00 >= ne00) { - return; - } - - const int i01 = src1[i10*s10 + i11*s11 + i12*s12]; - - dst_t * dst_row = dst + i10*s1 + i11*s2 + i12*s3; - const src0_t * src0_row = (const src0_t *)((const char *)src0 + i01*nb01 + i11*nb02 + i12*nb03); - - dst_row[i00] = src0_row[i00]; -} - -template -static void get_rows_cuda(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, - const void * src0_dd, const int32_t * src1_dd, float * dst_dd, cudaStream_t stream) { - - GGML_TENSOR_BINARY_OP_LOCALS - - const dim3 block_dims(CUDA_GET_ROWS_BLOCK_SIZE, 1, 1); - const int block_num_x = (ne00 + 2*CUDA_GET_ROWS_BLOCK_SIZE - 1) / (2*CUDA_GET_ROWS_BLOCK_SIZE); - const dim3 block_nums(block_num_x, ne10, ne11*ne12); - - // strides in elements - //const size_t s0 = nb0 / ggml_element_size(dst); - const size_t s1 = nb1 / ggml_element_size(dst); - const size_t s2 = nb2 / ggml_element_size(dst); - const size_t s3 = nb3 / ggml_element_size(dst); - - const size_t s10 = nb10 / ggml_element_size(src1); - const size_t s11 = nb11 / ggml_element_size(src1); - const size_t s12 = nb12 / ggml_element_size(src1); - //const size_t s13 = nb13 / ggml_element_size(src1); - - GGML_ASSERT(ne00 % 2 == 0); - - k_get_rows<<>>( - src0_dd, src1_dd, dst_dd, - ne00, /*ne01, ne02, ne03,*/ - /*ne10, ne11,*/ ne12, /*ne13,*/ - /* s0,*/ s1, s2, s3, - /* nb00,*/ nb01, nb02, nb03, - s10, s11, s12/*, s13*/); - - GGML_UNUSED(dst); -} - -template -static void get_rows_cuda_float(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, - const src0_t * src0_dd, const int32_t * src1_dd, float * dst_dd, cudaStream_t stream) { - - GGML_TENSOR_BINARY_OP_LOCALS - - const dim3 block_dims(CUDA_GET_ROWS_BLOCK_SIZE, 1, 1); - const int block_num_x = (ne00 + CUDA_GET_ROWS_BLOCK_SIZE - 1) / CUDA_GET_ROWS_BLOCK_SIZE; - const dim3 block_nums(block_num_x, ne10, ne11*ne12); - - // strides in elements - //const size_t s0 = nb0 / ggml_element_size(dst); - const size_t s1 = nb1 / ggml_element_size(dst); - const size_t s2 = nb2 / ggml_element_size(dst); - const size_t s3 = nb3 / ggml_element_size(dst); - - const size_t s10 = nb10 / ggml_element_size(src1); - const size_t s11 = nb11 / ggml_element_size(src1); - const size_t s12 = nb12 / ggml_element_size(src1); - //const size_t s13 = nb13 / ggml_element_size(src1); - - k_get_rows_float<<>>( - src0_dd, src1_dd, dst_dd, - ne00, /*ne01, ne02, ne03,*/ - /*ne10, ne11,*/ ne12, /*ne13,*/ - /* s0,*/ s1, s2, s3, - /* nb00,*/ nb01, nb02, nb03, - s10, s11, s12/*, s13*/); - - GGML_UNUSED(dst); -} - -void ggml_cuda_op_get_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * src0 = dst->src[0]; - const ggml_tensor * src1 = dst->src[1]; - const float * src0_d = (const float *)src0->data; - const float * src1_d = (const float *)src1->data; - float * dst_d = (float *)dst->data; - cudaStream_t stream = ctx.stream(); - - - GGML_ASSERT(src1->type == GGML_TYPE_I32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); - - GGML_ASSERT(src0->nb[0] == ggml_type_size(src0->type)); - GGML_ASSERT(src1->nb[0] == ggml_type_size(src1->type)); - GGML_ASSERT(dst->nb[0] == ggml_type_size(dst->type)); - - const int32_t * src1_i32 = (const int32_t *) src1_d; - - switch (src0->type) { - case GGML_TYPE_F16: - get_rows_cuda_float(src0, src1, dst, (const half *)src0_d, src1_i32, dst_d, stream); - break; - case GGML_TYPE_F32: - get_rows_cuda_float(src0, src1, dst, src0_d, src1_i32, dst_d, stream); - break; - case GGML_TYPE_Q4_0: - get_rows_cuda(src0, src1, dst, src0_d, src1_i32, dst_d, stream); - break; - case GGML_TYPE_Q4_1: - get_rows_cuda(src0, src1, dst, src0_d, src1_i32, dst_d, stream); - break; - case GGML_TYPE_Q5_0: - get_rows_cuda(src0, src1, dst, src0_d, src1_i32, dst_d, stream); - break; - case GGML_TYPE_Q5_1: - get_rows_cuda(src0, src1, dst, src0_d, src1_i32, dst_d, stream); - break; - case GGML_TYPE_Q8_0: - get_rows_cuda(src0, src1, dst, src0_d, src1_i32, dst_d, stream); - break; - default: - // TODO: k-quants - fprintf(stderr, "%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type)); - GGML_ASSERT(false); - break; - } -} +#include "getrows.cuh" +#include "dequantize.cuh" + +template +static __global__ void k_get_rows( + const void * src0, const int32_t * src1, dst_t * dst, + int64_t ne00, /*int64_t ne01, int64_t ne02, int64_t ne03,*/ + /*int64_t ne10, int64_t ne11,*/ int64_t ne12, /*int64_t ne13,*/ + /*size_t s0,*/ size_t s1, size_t s2, size_t s3, + /*size_t nb00,*/ size_t nb01, size_t nb02, size_t nb03, + size_t s10, size_t s11, size_t s12/*, size_t s13*/) { + + const int i00 = (blockIdx.x*blockDim.x + threadIdx.x)*2; + const int i10 = blockDim.y*blockIdx.y + threadIdx.y; + const int i11 = (blockIdx.z*blockDim.z + threadIdx.z)/ne12; + const int i12 = (blockIdx.z*blockDim.z + threadIdx.z)%ne12; + + if (i00 >= ne00) { + return; + } + + const int i01 = src1[i10*s10 + i11*s11 + i12*s12]; + + dst_t * dst_row = dst + i10*s1 + i11*s2 + i12*s3; + const void * src0_row = (const char *)src0 + i01*nb01 + i11*nb02 + i12*nb03; + + const int ib = i00/qk; // block index + const int iqs = (i00%qk)/qr; // quant index + const int iybs = i00 - i00%qk; // dst block start index + const int y_offset = qr == 1 ? 1 : qk/2; + + // dequantize + dfloat2 v; + dequantize_kernel(src0_row, ib, iqs, v); + + dst_row[iybs + iqs + 0] = v.x; + dst_row[iybs + iqs + y_offset] = v.y; +} + +template +static __global__ void k_get_rows_float( + const src0_t * src0, const int32_t * src1, dst_t * dst, + int64_t ne00, /*int64_t ne01, int64_t ne02, int64_t ne03,*/ + /*int64_t ne10, int64_t ne11,*/ int64_t ne12, /*int64_t ne13,*/ + /*size_t s0,*/ size_t s1, size_t s2, size_t s3, + /*size_t nb00,*/ size_t nb01, size_t nb02, size_t nb03, + size_t s10, size_t s11, size_t s12/*, size_t s13*/) { + + const int i00 = blockIdx.x*blockDim.x + threadIdx.x; + const int i10 = blockDim.y*blockIdx.y + threadIdx.y; + const int i11 = (blockIdx.z*blockDim.z + threadIdx.z)/ne12; + const int i12 = (blockIdx.z*blockDim.z + threadIdx.z)%ne12; + + if (i00 >= ne00) { + return; + } + + const int i01 = src1[i10*s10 + i11*s11 + i12*s12]; + + dst_t * dst_row = dst + i10*s1 + i11*s2 + i12*s3; + const src0_t * src0_row = (const src0_t *)((const char *)src0 + i01*nb01 + i11*nb02 + i12*nb03); + + dst_row[i00] = src0_row[i00]; +} + +template +static void get_rows_cuda(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, + const void * src0_dd, const int32_t * src1_dd, float * dst_dd, cudaStream_t stream) { + + GGML_TENSOR_BINARY_OP_LOCALS + + const dim3 block_dims(CUDA_GET_ROWS_BLOCK_SIZE, 1, 1); + const int block_num_x = (ne00 + 2*CUDA_GET_ROWS_BLOCK_SIZE - 1) / (2*CUDA_GET_ROWS_BLOCK_SIZE); + const dim3 block_nums(block_num_x, ne10, ne11*ne12); + + // strides in elements + //const size_t s0 = nb0 / ggml_element_size(dst); + const size_t s1 = nb1 / ggml_element_size(dst); + const size_t s2 = nb2 / ggml_element_size(dst); + const size_t s3 = nb3 / ggml_element_size(dst); + + const size_t s10 = nb10 / ggml_element_size(src1); + const size_t s11 = nb11 / ggml_element_size(src1); + const size_t s12 = nb12 / ggml_element_size(src1); + //const size_t s13 = nb13 / ggml_element_size(src1); + + GGML_ASSERT(ne00 % 2 == 0); + + k_get_rows<<>>( + src0_dd, src1_dd, dst_dd, + ne00, /*ne01, ne02, ne03,*/ + /*ne10, ne11,*/ ne12, /*ne13,*/ + /* s0,*/ s1, s2, s3, + /* nb00,*/ nb01, nb02, nb03, + s10, s11, s12/*, s13*/); + + GGML_UNUSED(dst); +} + +template +static void get_rows_cuda_float(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, + const src0_t * src0_dd, const int32_t * src1_dd, float * dst_dd, cudaStream_t stream) { + + GGML_TENSOR_BINARY_OP_LOCALS + + const dim3 block_dims(CUDA_GET_ROWS_BLOCK_SIZE, 1, 1); + const int block_num_x = (ne00 + CUDA_GET_ROWS_BLOCK_SIZE - 1) / CUDA_GET_ROWS_BLOCK_SIZE; + const dim3 block_nums(block_num_x, ne10, ne11*ne12); + + // strides in elements + //const size_t s0 = nb0 / ggml_element_size(dst); + const size_t s1 = nb1 / ggml_element_size(dst); + const size_t s2 = nb2 / ggml_element_size(dst); + const size_t s3 = nb3 / ggml_element_size(dst); + + const size_t s10 = nb10 / ggml_element_size(src1); + const size_t s11 = nb11 / ggml_element_size(src1); + const size_t s12 = nb12 / ggml_element_size(src1); + //const size_t s13 = nb13 / ggml_element_size(src1); + + k_get_rows_float<<>>( + src0_dd, src1_dd, dst_dd, + ne00, /*ne01, ne02, ne03,*/ + /*ne10, ne11,*/ ne12, /*ne13,*/ + /* s0,*/ s1, s2, s3, + /* nb00,*/ nb01, nb02, nb03, + s10, s11, s12/*, s13*/); + + GGML_UNUSED(dst); +} + +void ggml_cuda_op_get_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + const float * src0_d = (const float *)src0->data; + const float * src1_d = (const float *)src1->data; + float * dst_d = (float *)dst->data; + cudaStream_t stream = ctx.stream(); + + + GGML_ASSERT(src1->type == GGML_TYPE_I32); + GGML_ASSERT(dst->type == GGML_TYPE_F32); + + GGML_ASSERT(src0->nb[0] == ggml_type_size(src0->type)); + GGML_ASSERT(src1->nb[0] == ggml_type_size(src1->type)); + GGML_ASSERT(dst->nb[0] == ggml_type_size(dst->type)); + + const int32_t * src1_i32 = (const int32_t *) src1_d; + + switch (src0->type) { + case GGML_TYPE_F16: + get_rows_cuda_float(src0, src1, dst, (const half *)src0_d, src1_i32, dst_d, stream); + break; + case GGML_TYPE_F32: + get_rows_cuda_float(src0, src1, dst, src0_d, src1_i32, dst_d, stream); + break; + case GGML_TYPE_Q4_0: + get_rows_cuda(src0, src1, dst, src0_d, src1_i32, dst_d, stream); + break; + case GGML_TYPE_Q4_1: + get_rows_cuda(src0, src1, dst, src0_d, src1_i32, dst_d, stream); + break; + case GGML_TYPE_Q5_0: + get_rows_cuda(src0, src1, dst, src0_d, src1_i32, dst_d, stream); + break; + case GGML_TYPE_Q5_1: + get_rows_cuda(src0, src1, dst, src0_d, src1_i32, dst_d, stream); + break; + case GGML_TYPE_Q8_0: + get_rows_cuda(src0, src1, dst, src0_d, src1_i32, dst_d, stream); + break; + default: + // TODO: k-quants + fprintf(stderr, "%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type)); + GGML_ASSERT(false); + break; + } +} diff --git a/llama/ggml-cuda/getrows.cuh b/llama/ggml-cuda/getrows.cuh index bbf13023..04e1c187 100644 --- a/llama/ggml-cuda/getrows.cuh +++ b/llama/ggml-cuda/getrows.cuh @@ -1,5 +1,5 @@ -#include "common.cuh" - -#define CUDA_GET_ROWS_BLOCK_SIZE 256 - -void ggml_cuda_op_get_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +#include "common.cuh" + +#define CUDA_GET_ROWS_BLOCK_SIZE 256 + +void ggml_cuda_op_get_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/im2col.cu b/llama/ggml-cuda/im2col.cu index 3d0d8d4e..5510b43c 100644 --- a/llama/ggml-cuda/im2col.cu +++ b/llama/ggml-cuda/im2col.cu @@ -1,104 +1,104 @@ -#include "im2col.cuh" - -template -static __global__ void im2col_kernel( - const float * x, T * dst, int64_t batch_offset, - int64_t offset_delta, int64_t IC, int64_t IW, int64_t IH, int64_t OH, int64_t OW, int64_t KW, int64_t KH, int64_t pelements, int64_t CHW, - int s0, int s1, int p0, int p1, int d0, int d1) { - const int64_t i = threadIdx.x + blockIdx.x * blockDim.x; - if (i >= pelements) { - return; - } - - const int64_t ksize = OW * (KH > 1 ? KW : 1); - const int64_t kx = i / ksize; - const int64_t kd = kx * ksize; - const int64_t ky = (i - kd) / OW; - const int64_t ix = i % OW; - - const int64_t oh = blockIdx.y; - const int64_t batch = blockIdx.z / IC; - const int64_t ic = blockIdx.z % IC; - - const int64_t iiw = ix * s0 + kx * d0 - p0; - const int64_t iih = oh * s1 + ky * d1 - p1; - - const int64_t offset_dst = - ((batch * OH + oh) * OW + ix) * CHW + - (ic * (KW * KH) + ky * KW + kx); - - if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) { - dst[offset_dst] = 0.0f; - } else { - const int64_t offset_src = ic * offset_delta + batch * batch_offset; - dst[offset_dst] = x[offset_src + iih * IW + iiw]; - } -} - -template -static void im2col_cuda(const float * x, T* dst, - int64_t IW, int64_t IH, int64_t OW, int64_t OH, int64_t KW, int64_t KH, int64_t IC, - int64_t batch, int64_t batch_offset, int64_t offset_delta, - int s0,int s1,int p0,int p1,int d0,int d1, cudaStream_t stream) { - const int parallel_elements = OW * KW * KH; - const int num_blocks = (parallel_elements + CUDA_IM2COL_BLOCK_SIZE - 1) / CUDA_IM2COL_BLOCK_SIZE; - dim3 block_nums(num_blocks, OH, batch * IC); - im2col_kernel<<>>(x, dst, batch_offset, offset_delta, IC, IW, IH, OH, OW, KW, KH, parallel_elements, (IC * KH * KW), s0, s1, p0, p1, d0, d1); -} - -static void im2col_cuda_f16(const float * x, half * dst, - int64_t IW, int64_t IH, int64_t OW, int64_t OH, int64_t KW, int64_t KH, int64_t IC, - int64_t batch, int64_t batch_offset, int64_t offset_delta, - int s0,int s1,int p0,int p1,int d0,int d1, cudaStream_t stream) { - - im2col_cuda(x, dst, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, offset_delta, s0, s1, p0, p1, d0, d1, stream); -} - -static void im2col_cuda_f32(const float * x, float * dst, - int64_t IW, int64_t IH, int64_t OW, int64_t OH, int64_t KW, int64_t KH, int64_t IC, - int64_t batch, int64_t batch_offset, int64_t offset_delta, - int s0,int s1,int p0,int p1,int d0,int d1, cudaStream_t stream) { - - im2col_cuda(x, dst, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, offset_delta, s0, s1, p0, p1, d0, d1, stream); -} - -void ggml_cuda_op_im2col(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * src0 = dst->src[0]; - const ggml_tensor * src1 = dst->src[1]; - const float * src1_d = (const float *)src1->data; - float * dst_d = (float *)dst->data; - cudaStream_t stream = ctx.stream(); - - GGML_ASSERT(src0->type == GGML_TYPE_F16); - GGML_ASSERT(src1->type == GGML_TYPE_F32); - GGML_ASSERT( dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32); - - const int32_t s0 = ((const int32_t*)(dst->op_params))[0]; - const int32_t s1 = ((const int32_t*)(dst->op_params))[1]; - const int32_t p0 = ((const int32_t*)(dst->op_params))[2]; - const int32_t p1 = ((const int32_t*)(dst->op_params))[3]; - const int32_t d0 = ((const int32_t*)(dst->op_params))[4]; - const int32_t d1 = ((const int32_t*)(dst->op_params))[5]; - - const bool is_2D = ((const int32_t*)(dst->op_params))[6] == 1; - - const int64_t IC = src1->ne[is_2D ? 2 : 1]; - const int64_t IH = is_2D ? src1->ne[1] : 1; - const int64_t IW = src1->ne[0]; - - const int64_t KH = is_2D ? src0->ne[1] : 1; - const int64_t KW = src0->ne[0]; - - const int64_t OH = is_2D ? dst->ne[2] : 1; - const int64_t OW = dst->ne[1]; - - const size_t delta_offset = src1->nb[is_2D ? 2 : 1] / 4; // nb is byte offset, src is type float32 - const int64_t batch = src1->ne[3]; - const size_t batch_offset = src1->nb[3] / 4; // nb is byte offset, src is type float32 - - if(dst->type == GGML_TYPE_F16) { - im2col_cuda_f16(src1_d, (half *) dst_d, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, stream); - } else { - im2col_cuda_f32(src1_d, (float *) dst_d, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, stream); - } -} +#include "im2col.cuh" + +template +static __global__ void im2col_kernel( + const float * x, T * dst, int64_t batch_offset, + int64_t offset_delta, int64_t IC, int64_t IW, int64_t IH, int64_t OH, int64_t OW, int64_t KW, int64_t KH, int64_t pelements, int64_t CHW, + int s0, int s1, int p0, int p1, int d0, int d1) { + const int64_t i = threadIdx.x + blockIdx.x * blockDim.x; + if (i >= pelements) { + return; + } + + const int64_t ksize = OW * (KH > 1 ? KW : 1); + const int64_t kx = i / ksize; + const int64_t kd = kx * ksize; + const int64_t ky = (i - kd) / OW; + const int64_t ix = i % OW; + + const int64_t oh = blockIdx.y; + const int64_t batch = blockIdx.z / IC; + const int64_t ic = blockIdx.z % IC; + + const int64_t iiw = ix * s0 + kx * d0 - p0; + const int64_t iih = oh * s1 + ky * d1 - p1; + + const int64_t offset_dst = + ((batch * OH + oh) * OW + ix) * CHW + + (ic * (KW * KH) + ky * KW + kx); + + if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) { + dst[offset_dst] = 0.0f; + } else { + const int64_t offset_src = ic * offset_delta + batch * batch_offset; + dst[offset_dst] = x[offset_src + iih * IW + iiw]; + } +} + +template +static void im2col_cuda(const float * x, T* dst, + int64_t IW, int64_t IH, int64_t OW, int64_t OH, int64_t KW, int64_t KH, int64_t IC, + int64_t batch, int64_t batch_offset, int64_t offset_delta, + int s0,int s1,int p0,int p1,int d0,int d1, cudaStream_t stream) { + const int parallel_elements = OW * KW * KH; + const int num_blocks = (parallel_elements + CUDA_IM2COL_BLOCK_SIZE - 1) / CUDA_IM2COL_BLOCK_SIZE; + dim3 block_nums(num_blocks, OH, batch * IC); + im2col_kernel<<>>(x, dst, batch_offset, offset_delta, IC, IW, IH, OH, OW, KW, KH, parallel_elements, (IC * KH * KW), s0, s1, p0, p1, d0, d1); +} + +static void im2col_cuda_f16(const float * x, half * dst, + int64_t IW, int64_t IH, int64_t OW, int64_t OH, int64_t KW, int64_t KH, int64_t IC, + int64_t batch, int64_t batch_offset, int64_t offset_delta, + int s0,int s1,int p0,int p1,int d0,int d1, cudaStream_t stream) { + + im2col_cuda(x, dst, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, offset_delta, s0, s1, p0, p1, d0, d1, stream); +} + +static void im2col_cuda_f32(const float * x, float * dst, + int64_t IW, int64_t IH, int64_t OW, int64_t OH, int64_t KW, int64_t KH, int64_t IC, + int64_t batch, int64_t batch_offset, int64_t offset_delta, + int s0,int s1,int p0,int p1,int d0,int d1, cudaStream_t stream) { + + im2col_cuda(x, dst, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, offset_delta, s0, s1, p0, p1, d0, d1, stream); +} + +void ggml_cuda_op_im2col(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + const float * src1_d = (const float *)src1->data; + float * dst_d = (float *)dst->data; + cudaStream_t stream = ctx.stream(); + + GGML_ASSERT(src0->type == GGML_TYPE_F16); + GGML_ASSERT(src1->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32); + + const int32_t s0 = ((const int32_t*)(dst->op_params))[0]; + const int32_t s1 = ((const int32_t*)(dst->op_params))[1]; + const int32_t p0 = ((const int32_t*)(dst->op_params))[2]; + const int32_t p1 = ((const int32_t*)(dst->op_params))[3]; + const int32_t d0 = ((const int32_t*)(dst->op_params))[4]; + const int32_t d1 = ((const int32_t*)(dst->op_params))[5]; + + const bool is_2D = ((const int32_t*)(dst->op_params))[6] == 1; + + const int64_t IC = src1->ne[is_2D ? 2 : 1]; + const int64_t IH = is_2D ? src1->ne[1] : 1; + const int64_t IW = src1->ne[0]; + + const int64_t KH = is_2D ? src0->ne[1] : 1; + const int64_t KW = src0->ne[0]; + + const int64_t OH = is_2D ? dst->ne[2] : 1; + const int64_t OW = dst->ne[1]; + + const size_t delta_offset = src1->nb[is_2D ? 2 : 1] / 4; // nb is byte offset, src is type float32 + const int64_t batch = src1->ne[3]; + const size_t batch_offset = src1->nb[3] / 4; // nb is byte offset, src is type float32 + + if(dst->type == GGML_TYPE_F16) { + im2col_cuda_f16(src1_d, (half *) dst_d, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, stream); + } else { + im2col_cuda_f32(src1_d, (float *) dst_d, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, stream); + } +} diff --git a/llama/ggml-cuda/im2col.cuh b/llama/ggml-cuda/im2col.cuh index 1ce8fae4..55c898bf 100644 --- a/llama/ggml-cuda/im2col.cuh +++ b/llama/ggml-cuda/im2col.cuh @@ -1,5 +1,5 @@ -#include "common.cuh" - -#define CUDA_IM2COL_BLOCK_SIZE 256 - -void ggml_cuda_op_im2col(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +#include "common.cuh" + +#define CUDA_IM2COL_BLOCK_SIZE 256 + +void ggml_cuda_op_im2col(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/mmq.cu b/llama/ggml-cuda/mmq.cu index 58799e4c..1d6b9e69 100644 --- a/llama/ggml-cuda/mmq.cu +++ b/llama/ggml-cuda/mmq.cu @@ -11,6 +11,7 @@ void ggml_cuda_op_mul_mat_q( const int64_t nb01 = src0->nb[1]; const int64_t ne10 = src1->ne[0]; + const int64_t ne11 = src1->ne[1]; GGML_ASSERT(ne10 % QK8_1 == 0); const int64_t ne0 = dst->ne[0]; @@ -25,7 +26,7 @@ void ggml_cuda_op_mul_mat_q( // nrows_dst == nrows of the matrix that the kernel writes into const int64_t nrows_dst = id == ctx.device ? ne0 : row_diff; - const mmq_args args = {src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stride00, src1_padded_row_size, src1_ncols, nrows_dst}; + const mmq_args args = {src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stride00, src1_padded_row_size, src1_ncols, ne11, nrows_dst}; switch (src0->type) { case GGML_TYPE_Q4_0: diff --git a/llama/ggml-cuda/mmq.cuh b/llama/ggml-cuda/mmq.cuh index 6744cce6..3ccae8a0 100644 --- a/llama/ggml-cuda/mmq.cuh +++ b/llama/ggml-cuda/mmq.cuh @@ -1,15 +1,26 @@ +#pragma once + #include "common.cuh" #include "vecdotq.cuh" #include #include +#define MMQ_TILE_Y_K (WARP_SIZE + WARP_SIZE/QI8_1) + typedef void (*load_tiles_mmq_t)( const char * __restrict__ x, int * __restrict__ x_ql, half2 * __restrict__ x_dm, int * __restrict__ x_qh, int * __restrict__ x_sc, const int & kbx0, const int & i_max, const int & stride); typedef void (*vec_dot_mmq_t)( const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc, - const int * __restrict__ y_qs, const half2 * __restrict__ y_ms, float * __restrict__ sum, const int & k0); + const int * __restrict__ y, float * __restrict__ sum, const int & k0); + +struct block_q8_1_mmq { + half2 ds[4]; + int8_t qs[4*QK8_1]; +}; +static_assert(sizeof(block_q8_1_mmq) == 4*QK8_1 + 4*sizeof(half2), "Unexpected block_q8_1_mmq size"); +static_assert(sizeof(block_q8_1_mmq) == 4*sizeof(block_q8_1), "Unexpected block_q8_1_mmq size"); struct tile_x_sizes { int ql; @@ -132,10 +143,14 @@ template static __device__ __forceinlin template static __device__ __forceinline__ void vec_dot_q4_0_q8_1_mul_mat( const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc, - const int * __restrict__ y_qs, const half2 * __restrict__ y_ds, float * __restrict__ sum, const int & k0) { + const int * __restrict__ y, float * __restrict__ sum, const int & k0) { GGML_UNUSED(x_qh); GGML_UNUSED(x_sc); + const float * x_dmf = (const float *) x_dm; + const int * y_qs = (const int *) y + 4; + const half2 * y_ds = (const half2 *) y; + #pragma unroll for (int j0 = 0; j0 < mmq_x; j0 += nwarps) { const int j = j0 + threadIdx.y; @@ -145,19 +160,18 @@ static __device__ __forceinline__ void vec_dot_q4_0_q8_1_mul_mat( const int i = i0 + threadIdx.x; const int kyqs = k0 % (QI8_1/2) + QI8_1 * (k0 / (QI8_1/2)); - const float * x_dmf = (const float *) x_dm; int u[2*VDR_Q4_0_Q8_1_MMQ]; #pragma unroll for (int l = 0; l < VDR_Q4_0_Q8_1_MMQ; ++l) { - u[2*l+0] = y_qs[j * WARP_SIZE + (kyqs + l) % WARP_SIZE]; - u[2*l+1] = y_qs[j * WARP_SIZE + (kyqs + l + QI4_0) % WARP_SIZE]; + u[2*l+0] = y_qs[j*MMQ_TILE_Y_K + (kyqs + l) % WARP_SIZE]; + u[2*l+1] = y_qs[j*MMQ_TILE_Y_K + (kyqs + l + QI4_0) % WARP_SIZE]; } sum[j0/nwarps*mmq_y/WARP_SIZE + i0/WARP_SIZE] += vec_dot_q4_0_q8_1_impl - (&x_ql[i * (WARP_SIZE + 1) + k0], u, x_dmf[i * (WARP_SIZE/QI4_0) + i/QI4_0 + k0/QI4_0], - y_ds[j * (WARP_SIZE/QI8_1) + (2*k0/QI8_1) % (WARP_SIZE/QI8_1)]); + (&x_ql[i*(WARP_SIZE + 1) + k0], u, x_dmf[i*(WARP_SIZE/QI4_0) + i/QI4_0 + k0/QI4_0], + y_ds[j*MMQ_TILE_Y_K + (2*k0/QI8_1) % (WARP_SIZE/QI8_1)]); } } } @@ -203,10 +217,13 @@ template static __device__ __forceinlin template static __device__ __forceinline__ void vec_dot_q4_1_q8_1_mul_mat( const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc, - const int * __restrict__ y_qs, const half2 * __restrict__ y_ds, float * __restrict__ sum, const int & k0) { + const int * __restrict__ y, float * __restrict__ sum, const int & k0) { GGML_UNUSED(x_qh); GGML_UNUSED(x_sc); + const int * y_qs = (const int *) y + 4; + const half2 * y_ds = (const half2 *) y; + #pragma unroll for (int j0 = 0; j0 < mmq_x; j0 += nwarps) { const int j = j0 + threadIdx.y; @@ -221,13 +238,13 @@ static __device__ __forceinline__ void vec_dot_q4_1_q8_1_mul_mat( #pragma unroll for (int l = 0; l < VDR_Q4_1_Q8_1_MMQ; ++l) { - u[2*l+0] = y_qs[j * WARP_SIZE + (kyqs + l) % WARP_SIZE]; - u[2*l+1] = y_qs[j * WARP_SIZE + (kyqs + l + QI4_1) % WARP_SIZE]; + u[2*l+0] = y_qs[j*MMQ_TILE_Y_K + (kyqs + l) % WARP_SIZE]; + u[2*l+1] = y_qs[j*MMQ_TILE_Y_K + (kyqs + l + QI4_1) % WARP_SIZE]; } sum[j0/nwarps*mmq_y/WARP_SIZE + i0/WARP_SIZE] += vec_dot_q4_1_q8_1_impl - (&x_ql[i * (WARP_SIZE + 1) + k0], u, x_dm[i * (WARP_SIZE/QI4_1) + i/QI4_1 + k0/QI4_1], - y_ds[j * (WARP_SIZE/QI8_1) + (2*k0/QI8_1) % (WARP_SIZE/QI8_1)]); + (&x_ql[i*(WARP_SIZE + 1) + k0], u, x_dm[i*(WARP_SIZE/QI4_1) + i/QI4_1 + k0/QI4_1], + y_ds[j*MMQ_TILE_Y_K + (2*k0/QI8_1) % (WARP_SIZE/QI8_1)]); } } } @@ -293,10 +310,14 @@ template static __device__ __forceinlin template static __device__ __forceinline__ void vec_dot_q5_0_q8_1_mul_mat( const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc, - const int * __restrict__ y_qs, const half2 * __restrict__ y_ds, float * __restrict__ sum, const int & k0) { + const int * __restrict__ y, float * __restrict__ sum, const int & k0) { GGML_UNUSED(x_qh); GGML_UNUSED(x_sc); + const float * x_dmf = (const float *) x_dm; + const int * y_qs = (const int *) y + 4; + const float * y_df = (const float *) y; + #pragma unroll for (int j0 = 0; j0 < mmq_x; j0 += nwarps) { const int j = j0 + threadIdx.y; @@ -306,20 +327,18 @@ static __device__ __forceinline__ void vec_dot_q5_0_q8_1_mul_mat( const int i = i0 + threadIdx.x; const int kyqs = k0 % (QI8_1/2) + QI8_1 * (k0 / (QI8_1/2)); - const int index_bx = i * (WARP_SIZE/QI5_0) + i/QI5_0 + k0/QI5_0; - const float * x_dmf = (const float *) x_dm; - const float * y_df = (const float *) y_ds; + const int index_bx = i*(WARP_SIZE/QI5_0) + i/QI5_0 + k0/QI5_0; int u[2*VDR_Q5_0_Q8_1_MMQ]; #pragma unroll for (int l = 0; l < VDR_Q5_0_Q8_1_MMQ; ++l) { - u[2*l+0] = y_qs[j * WARP_SIZE + (kyqs + l) % WARP_SIZE]; - u[2*l+1] = y_qs[j * WARP_SIZE + (kyqs + l + QI5_0) % WARP_SIZE]; + u[2*l+0] = y_qs[j*MMQ_TILE_Y_K + (kyqs + l) % WARP_SIZE]; + u[2*l+1] = y_qs[j*MMQ_TILE_Y_K + (kyqs + l + QI5_0) % WARP_SIZE]; } sum[j0/nwarps*mmq_y/WARP_SIZE + i0/WARP_SIZE] += vec_dot_q8_0_q8_1_impl - (&x_ql[i * (2*WARP_SIZE + 1) + 2 * k0], u, x_dmf[index_bx], y_df[j * (WARP_SIZE/QI8_1) + (2*k0/QI8_1) % (WARP_SIZE/QI8_1)]); + (&x_ql[i*(2*WARP_SIZE + 1) + 2*k0], u, x_dmf[index_bx], y_df[j*MMQ_TILE_Y_K + (2*k0/QI8_1) % (WARP_SIZE/QI8_1)]); } } } @@ -383,10 +402,13 @@ template static __device__ __forceinlin template static __device__ __forceinline__ void vec_dot_q5_1_q8_1_mul_mat( const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc, - const int * __restrict__ y_qs, const half2 * __restrict__ y_ds, float * __restrict__ sum, const int & k0) { + const int * __restrict__ y, float * __restrict__ sum, const int & k0) { GGML_UNUSED(x_qh); GGML_UNUSED(x_sc); + const int * y_qs = (const int *) y + 4; + const half2 * y_ds = (const half2 *) y; + #pragma unroll for (int j0 = 0; j0 < mmq_x; j0 += nwarps) { const int j = j0 + threadIdx.y; @@ -396,18 +418,18 @@ static __device__ __forceinline__ void vec_dot_q5_1_q8_1_mul_mat( const int i = i0 + threadIdx.x; const int kyqs = k0 % (QI8_1/2) + QI8_1 * (k0 / (QI8_1/2)); - const int index_bx = i * (WARP_SIZE/QI5_1) + + i/QI5_1 + k0/QI5_1; + const int index_bx = i*(WARP_SIZE/QI5_1) + i/QI5_1 + k0/QI5_1; int u[2*VDR_Q5_1_Q8_1_MMQ]; #pragma unroll for (int l = 0; l < VDR_Q5_1_Q8_1_MMQ; ++l) { - u[2*l+0] = y_qs[j * WARP_SIZE + (kyqs + l) % WARP_SIZE]; - u[2*l+1] = y_qs[j * WARP_SIZE + (kyqs + l + QI5_1) % WARP_SIZE]; + u[2*l+0] = y_qs[j*MMQ_TILE_Y_K + (kyqs + l) % WARP_SIZE]; + u[2*l+1] = y_qs[j*MMQ_TILE_Y_K + (kyqs + l + QI5_1) % WARP_SIZE]; } sum[j0/nwarps*mmq_y/WARP_SIZE + i0/WARP_SIZE] += vec_dot_q8_1_q8_1_impl - (&x_ql[i * (2*WARP_SIZE + 1) + 2 * k0], u, x_dm[index_bx], y_ds[j * (WARP_SIZE/QI8_1) + (2*k0/QI8_1) % (WARP_SIZE/QI8_1)]); + (&x_ql[i*(2*WARP_SIZE + 1) + 2*k0], u, x_dm[index_bx], y_ds[j*MMQ_TILE_Y_K + (2*k0/QI8_1) % (WARP_SIZE/QI8_1)]); } } } @@ -455,10 +477,14 @@ template static __device__ __forceinlin template static __device__ __forceinline__ void vec_dot_q8_0_q8_1_mul_mat( const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc, - const int * __restrict__ y_qs, const half2 * __restrict__ y_ds, float * __restrict__ sum, const int & k0) { + const int * __restrict__ y, float * __restrict__ sum, const int & k0) { GGML_UNUSED(x_qh); GGML_UNUSED(x_sc); + const float * x_dmf = (const float *) x_dm; + const int * y_qs = (const int *) y + 4; + const float * y_df = (const float *) y; + #pragma unroll for (int j0 = 0; j0 < mmq_x; j0 += nwarps) { const int j = j0 + threadIdx.y; @@ -467,12 +493,9 @@ static __device__ __forceinline__ void vec_dot_q8_0_q8_1_mul_mat( for (int i0 = 0; i0 < mmq_y; i0 += WARP_SIZE) { const int i = i0 + threadIdx.x; - const float * x_dmf = (const float *) x_dm; - const float * y_df = (const float *) y_ds; - sum[j0/nwarps*mmq_y/WARP_SIZE + i0/WARP_SIZE] += vec_dot_q8_0_q8_1_impl - (&x_ql[i * (WARP_SIZE + 1) + k0], &y_qs[j * WARP_SIZE + k0], x_dmf[i * (WARP_SIZE/QI8_0) + i/QI8_0 + k0/QI8_0], - y_df[j * (WARP_SIZE/QI8_1) + k0/QI8_1]); + (&x_ql[i*(WARP_SIZE + 1) + k0], &y_qs[j*MMQ_TILE_Y_K + k0], x_dmf[i*(WARP_SIZE/QI8_0) + i/QI8_0 + k0/QI8_0], + y_df[j*MMQ_TILE_Y_K + k0/QI8_1]); } } } @@ -531,10 +554,13 @@ template static __device__ __forceinlin template static __device__ __forceinline__ void vec_dot_q2_K_q8_1_mul_mat( const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc, - const int * __restrict__ y_qs, const half2 * __restrict__ y_ds, float * __restrict__ sum, const int & k0) { + const int * __restrict__ y, float * __restrict__ sum, const int & k0) { GGML_UNUSED(x_qh); + const int * y_qs = (const int *) y + 4; + const float * y_df = (const float *) y; + #pragma unroll for (int j0 = 0; j0 < mmq_x; j0 += nwarps) { const int j = j0 + threadIdx.y; @@ -545,11 +571,10 @@ static __device__ __forceinline__ void vec_dot_q2_K_q8_1_mul_mat( const int kbx = k0 / QI2_K; const int ky = (k0 % QI2_K) * QR2_K; - const float * y_df = (const float *) y_ds; int v[QR2_K*VDR_Q2_K_Q8_1_MMQ]; - const int kqsx = i * (WARP_SIZE + 1) + kbx*QI2_K + (QI2_K/2) * (ky/(2*QI2_K)) + ky % (QI2_K/2); + const int kqsx = i*(WARP_SIZE + 1) + kbx*QI2_K + (QI2_K/2) * (ky/(2*QI2_K)) + ky % (QI2_K/2); const int shift = 2 * ((ky % (2*QI2_K)) / (QI2_K/2)); #pragma unroll @@ -557,11 +582,11 @@ static __device__ __forceinline__ void vec_dot_q2_K_q8_1_mul_mat( v[l] = (x_ql[kqsx + l] >> shift) & 0x03030303; } - const uint8_t * scales = ((const uint8_t *) &x_sc[i * (WARP_SIZE/4) + i/4 + kbx*4]) + ky/4; + const uint8_t * scales = ((const uint8_t *) &x_sc[i*(WARP_SIZE/4) + i/4 + kbx*4]) + ky/4; - const int index_y = j * WARP_SIZE + (QR2_K*k0) % WARP_SIZE; sum[j0/nwarps*mmq_y/WARP_SIZE + i0/WARP_SIZE] += vec_dot_q2_K_q8_1_impl_mmq( - v, &y_qs[index_y], scales, x_dm[i * (WARP_SIZE/QI2_K) + i/QI2_K + kbx], y_df[index_y/QI8_1]); + v, &y_qs[j*MMQ_TILE_Y_K + (QR2_K*k0) % WARP_SIZE], scales, + x_dm[i*(WARP_SIZE/QI2_K) + i/QI2_K + kbx], y_df[j*MMQ_TILE_Y_K + ((QR2_K*k0) % WARP_SIZE)/QI8_1]); } } } @@ -646,7 +671,11 @@ template static __device__ __forceinlin template static __device__ __forceinline__ void vec_dot_q3_K_q8_1_mul_mat( const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc, - const int * __restrict__ y_qs, const half2 * __restrict__ y_ds, float * __restrict__ sum, const int & k0) { + const int * __restrict__ y, float * __restrict__ sum, const int & k0) { + + const float * x_dmf = (const float *) x_dm; + const int * y_qs = (const int *) y + 4; + const float * y_df = (const float *) y; #pragma unroll for (int j0 = 0; j0 < mmq_x; j0 += nwarps) { @@ -658,8 +687,6 @@ static __device__ __forceinline__ void vec_dot_q3_K_q8_1_mul_mat( const int kbx = k0 / QI3_K; const int ky = (k0 % QI3_K) * QR3_K; - const float * x_dmf = (const float *) x_dm; - const float * y_df = (const float *) y_ds; const int8_t * scales = ((const int8_t *) (x_sc + i * (WARP_SIZE/4) + i/4 + kbx*4)) + ky/4; @@ -667,19 +694,19 @@ static __device__ __forceinline__ void vec_dot_q3_K_q8_1_mul_mat( #pragma unroll for (int l = 0; l < QR3_K*VDR_Q3_K_Q8_1_MMQ; ++l) { - const int kqsx = i * (WARP_SIZE + 1) + kbx*QI3_K + (QI3_K/2) * (ky/(2*QI3_K)) + ky % (QI3_K/2); + const int kqsx = i*(WARP_SIZE + 1) + kbx*QI3_K + (QI3_K/2) * (ky/(2*QI3_K)) + ky % (QI3_K/2); const int shift = 2 * ((ky % 32) / 8); const int vll = (x_ql[kqsx + l] >> shift) & 0x03030303; - const int vh = x_qh[i * (WARP_SIZE/2) + i/2 + kbx * (QI3_K/2) + (ky+l)%8] >> ((ky+l) / 8); + const int vh = x_qh[i*(WARP_SIZE/2) + i/2 + kbx * (QI3_K/2) + (ky+l)%8] >> ((ky+l) / 8); const int vlh = (vh << 2) & 0x04040404; v[l] = __vsubss4(vll, vlh); } - const int index_y = j * WARP_SIZE + (k0*QR3_K) % WARP_SIZE; sum[j0/nwarps*mmq_y/WARP_SIZE + i0/WARP_SIZE] += vec_dot_q3_K_q8_1_impl_mmq( - v, &y_qs[index_y], scales, x_dmf[i * (WARP_SIZE/QI3_K) + i/QI3_K + kbx], y_df[index_y/QI8_1]); + v, &y_qs[j*MMQ_TILE_Y_K + (k0*QR3_K) % WARP_SIZE], scales, + x_dmf[i*(WARP_SIZE/QI3_K) + i/QI3_K + kbx], y_df[j*MMQ_TILE_Y_K + ((k0*QR3_K) % WARP_SIZE)/QI8_1]); } } } @@ -746,10 +773,13 @@ template static __device__ __forceinlin template static __device__ __forceinline__ void vec_dot_q4_K_q8_1_mul_mat( const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc, - const int * __restrict__ y_qs, const half2 * __restrict__ y_ds, float * __restrict__ sum, const int & k0) { + const int * __restrict__ y, float * __restrict__ sum, const int & k0) { GGML_UNUSED(x_qh); + const int * y_qs = (const int *) y + 4; + const half2 * y_ds = (const half2 *) y; + #pragma unroll for (int j0 = 0; j0 < mmq_x; j0 += nwarps) { const int j = j0 + threadIdx.y; @@ -760,9 +790,9 @@ static __device__ __forceinline__ void vec_dot_q4_K_q8_1_mul_mat( const uint8_t * sc = ((const uint8_t *) &x_sc[i * (WARP_SIZE/8) + i/8 + k0/16]) + 2*((k0 % 16) / 8); - const int index_y = j * WARP_SIZE + (QR4_K*k0) % WARP_SIZE; sum[j0/nwarps*mmq_y/WARP_SIZE + i0/WARP_SIZE] += vec_dot_q4_K_q8_1_impl_mmq( - &x_ql[i * (WARP_SIZE + 1) + k0], &y_qs[index_y], sc, sc+8, x_dm[i * (WARP_SIZE/QI4_K) + i/QI4_K], &y_ds[index_y/QI8_1]); + &x_ql[i*(WARP_SIZE + 1) + k0], &y_qs[j*MMQ_TILE_Y_K + (QR4_K*k0) % WARP_SIZE], sc, sc+8, + x_dm[i*(WARP_SIZE/QI4_K) + i/QI4_K], &y_ds[j*MMQ_TILE_Y_K + ((QR4_K*k0) % WARP_SIZE)/QI8_1]); } } } @@ -842,10 +872,13 @@ template static __device__ __forceinlin template static __device__ __forceinline__ void vec_dot_q5_K_q8_1_mul_mat( const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc, - const int * __restrict__ y_qs, const half2 * __restrict__ y_ds, float * __restrict__ sum, const int & k0) { + const int * __restrict__ y, float * __restrict__ sum, const int & k0) { GGML_UNUSED(x_qh); + const int * y_qs = (const int *) y + 4; + const half2 * y_ds = (const half2 *) y; + #pragma unroll for (int j0 = 0; j0 < mmq_x; j0 += nwarps) { const int j = j0 + threadIdx.y; @@ -856,10 +889,9 @@ static __device__ __forceinline__ void vec_dot_q5_K_q8_1_mul_mat( const uint8_t * sc = ((const uint8_t *) &x_sc[i * (WARP_SIZE/8) + i/8 + k0/16]) + 2 * ((k0 % 16) / 8); - const int index_x = i * (QR5_K*WARP_SIZE + 1) + QR5_K*k0; - const int index_y = j * WARP_SIZE + (QR5_K*k0) % WARP_SIZE; sum[j0/nwarps*mmq_y/WARP_SIZE + i0/WARP_SIZE] += vec_dot_q5_K_q8_1_impl_mmq( - &x_ql[index_x], &y_qs[index_y], sc, sc+8, x_dm[i * (WARP_SIZE/QI5_K) + i/QI5_K], &y_ds[index_y/QI8_1]); + &x_ql[i*(QR5_K*WARP_SIZE + 1) + QR5_K*k0], &y_qs[j*MMQ_TILE_Y_K + (QR5_K*k0) % WARP_SIZE], sc, sc+8, + x_dm[i*(WARP_SIZE/QI5_K) + i/QI5_K], &y_ds[j*MMQ_TILE_Y_K + ((QR5_K*k0) % WARP_SIZE)/QI8_1]); } } } @@ -932,10 +964,14 @@ template static __device__ __forceinlin template static __device__ __forceinline__ void vec_dot_q6_K_q8_1_mul_mat( const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc, - const int * __restrict__ y_qs, const half2 * __restrict__ y_ds, float * __restrict__ sum, const int & k0) { + const int * __restrict__ y, float * __restrict__ sum, const int & k0) { GGML_UNUSED(x_qh); + const float * x_dmf = (const float *) x_dm; + const int * y_qs = (const int *) y + 4; + const float * y_df = (const float *) y; + #pragma unroll for (int j0 = 0; j0 < mmq_x; j0 += nwarps) { const int j = j0 + threadIdx.y; @@ -944,15 +980,11 @@ static __device__ __forceinline__ void vec_dot_q6_K_q8_1_mul_mat( for (int i0 = 0; i0 < mmq_y; i0 += WARP_SIZE) { const int i = i0 + threadIdx.x; - const float * x_dmf = (const float *) x_dm; - const float * y_df = (const float *) y_ds; - const int8_t * sc = ((const int8_t *) &x_sc[i * (WARP_SIZE/8) + i/8 + k0/8]); - const int index_x = i * (QR6_K*WARP_SIZE + 1) + QR6_K*k0; - const int index_y = j * WARP_SIZE + (QR6_K*k0) % WARP_SIZE; sum[j0/nwarps*mmq_y/WARP_SIZE + i0/WARP_SIZE] += vec_dot_q6_K_q8_1_impl_mmq( - &x_ql[index_x], &y_qs[index_y], sc, x_dmf[i * (WARP_SIZE/QI6_K) + i/QI6_K], &y_df[index_y/QI8_1]); + &x_ql[i*(QR6_K*WARP_SIZE + 1) + QR6_K*k0], &y_qs[j*MMQ_TILE_Y_K + (QR6_K*k0) % WARP_SIZE], sc, + x_dmf[i*(WARP_SIZE/QI6_K) + i/QI6_K], &y_df[j*MMQ_TILE_Y_K + ((QR6_K*k0) % WARP_SIZE)/QI8_1]); } } } @@ -964,7 +996,6 @@ struct mmq_type_traits; template struct mmq_type_traits { - static constexpr bool need_sum = true; static constexpr int vdr = VDR_Q4_0_Q8_1_MMQ; static constexpr load_tiles_mmq_t load_tiles = load_tiles_q4_0; static constexpr vec_dot_mmq_t vec_dot = vec_dot_q4_0_q8_1_mul_mat; @@ -972,7 +1003,6 @@ struct mmq_type_traits { template struct mmq_type_traits { - static constexpr bool need_sum = true; static constexpr int vdr = VDR_Q4_1_Q8_1_MMQ; static constexpr load_tiles_mmq_t load_tiles = load_tiles_q4_1; static constexpr vec_dot_mmq_t vec_dot = vec_dot_q4_1_q8_1_mul_mat; @@ -980,7 +1010,6 @@ struct mmq_type_traits { template struct mmq_type_traits { - static constexpr bool need_sum = false; static constexpr int vdr = VDR_Q5_0_Q8_1_MMQ; static constexpr load_tiles_mmq_t load_tiles = load_tiles_q5_0; static constexpr vec_dot_mmq_t vec_dot = vec_dot_q5_0_q8_1_mul_mat; @@ -988,7 +1017,6 @@ struct mmq_type_traits { template struct mmq_type_traits { - static constexpr bool need_sum = true; static constexpr int vdr = VDR_Q5_1_Q8_1_MMQ; static constexpr load_tiles_mmq_t load_tiles = load_tiles_q5_1; static constexpr vec_dot_mmq_t vec_dot = vec_dot_q5_1_q8_1_mul_mat; @@ -996,7 +1024,6 @@ struct mmq_type_traits { template struct mmq_type_traits { - static constexpr bool need_sum = false; static constexpr int vdr = VDR_Q8_0_Q8_1_MMQ; static constexpr load_tiles_mmq_t load_tiles = load_tiles_q8_0; static constexpr vec_dot_mmq_t vec_dot = vec_dot_q8_0_q8_1_mul_mat; @@ -1004,7 +1031,6 @@ struct mmq_type_traits { template struct mmq_type_traits { - static constexpr bool need_sum = false; static constexpr int vdr = VDR_Q2_K_Q8_1_MMQ; static constexpr load_tiles_mmq_t load_tiles = load_tiles_q2_K; static constexpr vec_dot_mmq_t vec_dot = vec_dot_q2_K_q8_1_mul_mat; @@ -1012,7 +1038,6 @@ struct mmq_type_traits { template struct mmq_type_traits { - static constexpr bool need_sum = false; static constexpr int vdr = VDR_Q3_K_Q8_1_MMQ; static constexpr load_tiles_mmq_t load_tiles = load_tiles_q3_K; static constexpr vec_dot_mmq_t vec_dot = vec_dot_q3_K_q8_1_mul_mat; @@ -1020,7 +1045,6 @@ struct mmq_type_traits { template struct mmq_type_traits { - static constexpr bool need_sum = true; static constexpr int vdr = VDR_Q4_K_Q8_1_MMQ; static constexpr load_tiles_mmq_t load_tiles = load_tiles_q4_K; static constexpr vec_dot_mmq_t vec_dot = vec_dot_q4_K_q8_1_mul_mat; @@ -1028,7 +1052,6 @@ struct mmq_type_traits { template struct mmq_type_traits { - static constexpr bool need_sum = true; static constexpr int vdr = VDR_Q5_K_Q8_1_MMQ; static constexpr load_tiles_mmq_t load_tiles = load_tiles_q5_K; static constexpr vec_dot_mmq_t vec_dot = vec_dot_q5_K_q8_1_mul_mat; @@ -1036,12 +1059,36 @@ struct mmq_type_traits { template struct mmq_type_traits { - static constexpr bool need_sum = false; static constexpr int vdr = VDR_Q6_K_Q8_1_MMQ; static constexpr load_tiles_mmq_t load_tiles = load_tiles_q6_K; static constexpr vec_dot_mmq_t vec_dot = vec_dot_q6_K_q8_1_mul_mat; }; +static int mmq_need_sum(const ggml_type type_x) { + switch (type_x) { + case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_1: + return true; + case GGML_TYPE_Q5_0: + return false; + case GGML_TYPE_Q5_1: + return true; + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q2_K: + case GGML_TYPE_Q3_K: + return false; + case GGML_TYPE_Q4_K: + case GGML_TYPE_Q5_K: + return true; + case GGML_TYPE_Q6_K: + return false; + default: + GGML_ASSERT(false); + break; + } + return false; +} + template #if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) #if defined(RDNA3) || defined(RDNA2) @@ -1056,7 +1103,7 @@ template #endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) static __global__ void mul_mat_q( const char * __restrict__ x, const char * __restrict__ yc, float * __restrict__ dst, - const int ne00, const int ne01, const int stride00, const int ne10, const int ne11, const int ne0) { + const int ne00, const int ne01, const int stride01, const int ne10, const int ne11, const int stride11, const int ne0) { // Skip unused template specializations for faster compilation: if (mmq_x > get_mmq_x_max_device()) { @@ -1068,7 +1115,6 @@ static __global__ void mul_mat_q( constexpr int qr = ggml_cuda_type_traits::qr; constexpr int qi = ggml_cuda_type_traits::qi; constexpr int mmq_y = get_mmq_y_device(mmq_x); - constexpr bool need_sum = mmq_type_traits::need_sum; constexpr int vdr = mmq_type_traits::vdr; constexpr load_tiles_mmq_t load_tiles = mmq_type_traits::load_tiles; constexpr vec_dot_mmq_t vec_dot = mmq_type_traits::vec_dot; @@ -1080,62 +1126,38 @@ static __global__ void mul_mat_q( half2 * tile_x_dm = (half2 *) (tile_x_ql + txs.ql); int * tile_x_qh = (int *) (tile_x_dm + txs.dm); int * tile_x_sc = (int *) (tile_x_qh + txs.qh); - int * tile_y_qs = (int *) (tile_x_sc + txs.sc); // [mmq_x * WARP_SIZE] - half2 * tile_y_ds = (half2 *) (tile_y_qs + mmq_x*WARP_SIZE); // [mmq_x * WARP_SIZE/QI8_1]; - - const block_q8_1 * y = (const block_q8_1 *) yc; + int * tile_y = (int *) (tile_x_sc + txs.sc); // [mmq_x * (WARP_SIZE + WARP_SIZE/QI8_1)] const int blocks_per_row_x = ne00 / qk; - const int blocks_per_col_y = ne10 / QK8_1; const int blocks_per_warp = WARP_SIZE / qi; const int & ne1 = ne11; const int tile_x_max_i = ne01 - blockIdx.x*mmq_y - 1; + const int * y = (const int *) yc + blockIdx.y*(mmq_x*sizeof(block_q8_1_mmq)/sizeof(int)); + float sum[(mmq_x/nwarps) * (mmq_y/WARP_SIZE)] = {0.0f}; for (int kb0 = 0; kb0 < blocks_per_row_x; kb0 += blocks_per_warp) { - load_tiles(x, tile_x_ql, tile_x_dm, tile_x_qh, tile_x_sc, stride00*blockIdx.x*mmq_y + kb0, tile_x_max_i, stride00); + load_tiles(x, tile_x_ql, tile_x_dm, tile_x_qh, tile_x_sc, stride01*blockIdx.x*mmq_y + kb0, tile_x_max_i, stride01); #pragma unroll for (int kr = 0; kr < qr; ++kr) { - const int kqs = kr*WARP_SIZE + threadIdx.x; - const int kbxd = kqs / QI8_1; - + const int * by0 = y + stride11*(kb0*(qk*sizeof(block_q8_1_mmq) / (4*QK8_1*sizeof(int))) + kr*sizeof(block_q8_1_mmq)/sizeof(int)); #pragma unroll - for (int i0 = 0; i0 < mmq_x; i0 += nwarps) { - const int i = min(blockIdx.y*mmq_x + threadIdx.y + i0, ne11-1); // to prevent out-of-bounds memory accesses + for (int l0 = 0; l0 < mmq_x*MMQ_TILE_Y_K; l0 += nwarps*WARP_SIZE) { + int l = l0 + threadIdx.y*WARP_SIZE + threadIdx.x; - const block_q8_1 * by0 = &y[i*blocks_per_col_y + kb0 * (qk/QK8_1) + kbxd]; - - const int index_y = (i0 + threadIdx.y) * WARP_SIZE + kqs % WARP_SIZE; - tile_y_qs[index_y] = get_int_from_int8_aligned(by0->qs, threadIdx.x % QI8_1); - } - -#pragma unroll - for (int ids0 = 0; ids0 < mmq_x; ids0 += nwarps * QI8_1) { - const int ids = (ids0 + threadIdx.y * QI8_1 + threadIdx.x / (WARP_SIZE/QI8_1)) % mmq_x; - const int kby = threadIdx.x % (WARP_SIZE/QI8_1); - const int i_y_eff = min(blockIdx.y*mmq_x + ids, ne11-1); - - // if the sum is not needed it's faster to transform the scale to f32 ahead of time - const half2 * dsi_src = &y[i_y_eff*blocks_per_col_y + kb0 * (qk/QK8_1) + kr*(WARP_SIZE/QI8_1) + kby].ds; - half2 * dsi_dst = &tile_y_ds[ids * (WARP_SIZE/QI8_1) + kby]; - if (need_sum) { - *dsi_dst = *dsi_src; - } else { - float * dfi_dst = (float *) dsi_dst; - *dfi_dst = __low2float(*dsi_src); - } + tile_y[l] = by0[l]; } __syncthreads(); // #pragma unroll // unrolling this loop causes too much register pressure for (int k0 = kr*WARP_SIZE/qr; k0 < (kr+1)*WARP_SIZE/qr; k0 += vdr) { - vec_dot(tile_x_ql, tile_x_dm, tile_x_qh, tile_x_sc, tile_y_qs, tile_y_ds, sum, k0); + vec_dot(tile_x_ql, tile_x_dm, tile_x_qh, tile_x_sc, tile_y, sum, k0); } __syncthreads(); @@ -1165,8 +1187,8 @@ static __global__ void mul_mat_q( struct mmq_args { const char * x; const char * y; float * dst; - int64_t ne00; int64_t ne01; int64_t stride00; - int64_t ne10; int64_t ne11; + int64_t ne00; int64_t ne01; int64_t stride01; + int64_t ne10; int64_t ne11; int64_t stride11; int64_t ne0; }; @@ -1184,7 +1206,7 @@ static void launch_mul_mat_q(const mmq_args & args, cudaStream_t stream) { const tile_x_sizes txs = get_tile_x_sizes_host(type, mmq_y); const int shmem_x = txs.ql*sizeof(int) + txs.dm*sizeof(half2) + txs.qh*sizeof(int) + txs.sc*sizeof(int); const int shmem_y = mmq_x*WARP_SIZE*sizeof(int) + mmq_x*(WARP_SIZE/QI8_1)*sizeof(half2); - const int shmem = shmem_x + shmem_y; + const int shmem = shmem_x + GGML_PAD(shmem_y, nwarps*WARP_SIZE*sizeof(int)); #if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) static bool shmem_limit_raised[GGML_CUDA_MAX_DEVICES] = {false}; @@ -1198,11 +1220,11 @@ static void launch_mul_mat_q(const mmq_args & args, cudaStream_t stream) { if (args.ne01 % mmq_y == 0) { const bool need_check = false; mul_mat_q<<>> - (args.x, args.y, args.dst, args.ne00, args.ne01, args.stride00, args.ne10, args.ne11, args.ne0); + (args.x, args.y, args.dst, args.ne00, args.ne01, args.stride01, args.ne10, args.ne11, args.stride11, args.ne0); } else { const bool need_check = true; mul_mat_q<<>> - (args.x, args.y, args.dst, args.ne00, args.ne01, args.stride00, args.ne10, args.ne11, args.ne0); + (args.x, args.y, args.dst, args.ne00, args.ne01, args.stride01, args.ne10, args.ne11, args.stride11, args.ne0); } } diff --git a/llama/ggml-cuda/mmvq.cuh b/llama/ggml-cuda/mmvq.cuh index 88c42c4b..46785d5a 100644 --- a/llama/ggml-cuda/mmvq.cuh +++ b/llama/ggml-cuda/mmvq.cuh @@ -1,7 +1,7 @@ -#include "common.cuh" - -void ggml_cuda_op_mul_mat_vec_q( - ggml_backend_cuda_context & ctx, - const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i, - const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols, - const int64_t src1_padded_row_size, cudaStream_t stream); +#include "common.cuh" + +void ggml_cuda_op_mul_mat_vec_q( + ggml_backend_cuda_context & ctx, + const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i, + const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols, + const int64_t src1_padded_row_size, cudaStream_t stream); diff --git a/llama/ggml-cuda/norm.cuh b/llama/ggml-cuda/norm.cuh index 431a8f74..61f880b0 100644 --- a/llama/ggml-cuda/norm.cuh +++ b/llama/ggml-cuda/norm.cuh @@ -1,7 +1,7 @@ -#include "common.cuh" - -void ggml_cuda_op_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst); - -void ggml_cuda_op_group_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst); - -void ggml_cuda_op_rms_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +#include "common.cuh" + +void ggml_cuda_op_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst); + +void ggml_cuda_op_group_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst); + +void ggml_cuda_op_rms_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/pad.cu b/llama/ggml-cuda/pad.cu index aba539e8..a033a8d0 100644 --- a/llama/ggml-cuda/pad.cu +++ b/llama/ggml-cuda/pad.cu @@ -1,49 +1,49 @@ -#include "pad.cuh" - -static __global__ void pad_f32(const float * x, float * dst, const int ne0, const int ne00, const int ne01, const int ne02, const int ne03) { - // blockIdx.z: idx of ne2*ne3, aka ne02*ne03 - // blockIdx.y: idx of ne1 - // blockIDx.x: idx of ne0 / BLOCK_SIZE - int nidx = threadIdx.x + blockIdx.x * blockDim.x; - if (nidx >= ne0) { - return; - } - - // operation - int offset_dst = - nidx + - blockIdx.y * ne0 + - blockIdx.z * ne0 * gridDim.y; - if (nidx < ne00 && blockIdx.y < ne01 && blockIdx.z < ne02*ne03) { - int offset_src = - nidx + - blockIdx.y * ne00 + - blockIdx.z * ne00 * ne01; - dst[offset_dst] = x[offset_src]; - } else { - dst[offset_dst] = 0.0f; - } -} - -static void pad_f32_cuda(const float * x, float * dst, - const int ne00, const int ne01, const int ne02, const int ne03, - const int ne0, const int ne1, const int ne2, const int ne3, cudaStream_t stream) { - int num_blocks = (ne0 + CUDA_PAD_BLOCK_SIZE - 1) / CUDA_PAD_BLOCK_SIZE; - dim3 gridDim(num_blocks, ne1, ne2*ne3); - pad_f32<<>>(x, dst, ne0, ne00, ne01, ne02, ne03); -} - -void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * src0 = dst->src[0]; - const float * src0_d = (const float *)src0->data; - float * dst_d = (float *)dst->data; - cudaStream_t stream = ctx.stream(); - - GGML_ASSERT(src0->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(src0->ne[3] == 1 && dst->ne[3] == 1); // just 3D tensors - - pad_f32_cuda(src0_d, dst_d, - src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], - dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], stream); -} +#include "pad.cuh" + +static __global__ void pad_f32(const float * x, float * dst, const int ne0, const int ne00, const int ne01, const int ne02, const int ne03) { + // blockIdx.z: idx of ne2*ne3, aka ne02*ne03 + // blockIdx.y: idx of ne1 + // blockIDx.x: idx of ne0 / BLOCK_SIZE + int nidx = threadIdx.x + blockIdx.x * blockDim.x; + if (nidx >= ne0) { + return; + } + + // operation + int offset_dst = + nidx + + blockIdx.y * ne0 + + blockIdx.z * ne0 * gridDim.y; + if (nidx < ne00 && blockIdx.y < ne01 && blockIdx.z < ne02*ne03) { + int offset_src = + nidx + + blockIdx.y * ne00 + + blockIdx.z * ne00 * ne01; + dst[offset_dst] = x[offset_src]; + } else { + dst[offset_dst] = 0.0f; + } +} + +static void pad_f32_cuda(const float * x, float * dst, + const int ne00, const int ne01, const int ne02, const int ne03, + const int ne0, const int ne1, const int ne2, const int ne3, cudaStream_t stream) { + int num_blocks = (ne0 + CUDA_PAD_BLOCK_SIZE - 1) / CUDA_PAD_BLOCK_SIZE; + dim3 gridDim(num_blocks, ne1, ne2*ne3); + pad_f32<<>>(x, dst, ne0, ne00, ne01, ne02, ne03); +} + +void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * src0 = dst->src[0]; + const float * src0_d = (const float *)src0->data; + float * dst_d = (float *)dst->data; + cudaStream_t stream = ctx.stream(); + + GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->type == GGML_TYPE_F32); + GGML_ASSERT(src0->ne[3] == 1 && dst->ne[3] == 1); // just 3D tensors + + pad_f32_cuda(src0_d, dst_d, + src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], + dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], stream); +} diff --git a/llama/ggml-cuda/pad.cuh b/llama/ggml-cuda/pad.cuh index 8fd386b0..b8ebc809 100644 --- a/llama/ggml-cuda/pad.cuh +++ b/llama/ggml-cuda/pad.cuh @@ -1,5 +1,5 @@ -#include "common.cuh" - -#define CUDA_PAD_BLOCK_SIZE 256 - -void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +#include "common.cuh" + +#define CUDA_PAD_BLOCK_SIZE 256 + +void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/pool2d.cu b/llama/ggml-cuda/pool2d.cu index c6d51e4d..2b098207 100644 --- a/llama/ggml-cuda/pool2d.cu +++ b/llama/ggml-cuda/pool2d.cu @@ -1,94 +1,94 @@ -#include "pool2d.cuh" - -template -static __global__ void pool2d_nchw_kernel( - const int ih, const int iw, const int oh, const int ow, - const int kh, const int kw, const int sh, const int sw, - const int ph, const int pw, const int parallel_elements, - const Ti* src, To* dst, const enum ggml_op_pool op) { - int idx = threadIdx.x + blockIdx.x * blockDim.x; - if (idx >= parallel_elements) { - return; - } - - const int I_HW = ih * iw; - const int O_HW = oh * ow; - const int nc = idx / O_HW; - const int cur_oh = idx % O_HW / ow; - const int cur_ow = idx % O_HW % ow; - const Ti* i_ptr = src + nc * I_HW; - To* o_ptr = dst + nc * O_HW; - const int start_h = cur_oh * sh - ph; - const int bh = max(0, start_h); - const int eh = min(ih, start_h + kh); - const int start_w = cur_ow * sw - pw; - const int bw = max(0, start_w); - const int ew = min(iw, start_w + kw); - const To scale = 1. / (kh * kw); - To res = 0; - - switch (op) { - case GGML_OP_POOL_AVG: res = 0; break; - case GGML_OP_POOL_MAX: res = -FLT_MAX; break; - default: assert(false); - } - - for (int i = bh; i < eh; i += 1) { - for (int j = bw; j < ew; j += 1) { -#if __CUDA_ARCH__ >= 350 - Ti cur = __ldg(i_ptr + i * iw + j); -#else - Ti cur = i_ptr[i * iw + j]; -#endif - switch (op) { - case GGML_OP_POOL_AVG: res += cur * scale; break; - case GGML_OP_POOL_MAX: res = max(res, (To)cur); break; - default: assert(false); - } - } - } - o_ptr[cur_oh * ow + cur_ow] = res; -} - -static void pool2d_nchw_kernel_f32_f32_cuda( - const int ih, const int iw, const int oh, const int ow, - const int kh, const int kw, const int sh, const int sw, - const int ph, const int pw, const int parallel_elements, - const float * src, float * dst, const enum ggml_op_pool op, - cudaStream_t stream) { - - const int num_blocks = (parallel_elements + CUDA_POOL2D_BLOCK_SIZE - 1) / CUDA_POOL2D_BLOCK_SIZE; - dim3 block_nums(num_blocks); - pool2d_nchw_kernel<<>>(ih, iw, oh, ow, kh, kw, sh, sw, ph, pw, parallel_elements, src, dst, op); -} - -void ggml_cuda_op_pool2d(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * src0 = dst->src[0]; - const float * src0_d = (const float *)src0->data; - float * dst_d = (float *)dst->data; - cudaStream_t stream = ctx.stream(); - - GGML_ASSERT(src0->type == GGML_TYPE_F32); - GGML_ASSERT( dst->type == GGML_TYPE_F32); - - const int32_t * opts = (const int32_t *)dst->op_params; - enum ggml_op_pool op = static_cast(opts[0]); - const int k0 = opts[1]; - const int k1 = opts[2]; - const int s0 = opts[3]; - const int s1 = opts[4]; - const int p0 = opts[5]; - const int p1 = opts[6]; - - const int64_t IH = src0->ne[1]; - const int64_t IW = src0->ne[0]; - - const int64_t N = dst->ne[3]; - const int64_t OC = dst->ne[2]; - const int64_t OH = dst->ne[1]; - const int64_t OW = dst->ne[0]; - - const int parallel_elements = N * OC * OH * OW; - - pool2d_nchw_kernel_f32_f32_cuda(IH, IW, OH, OW, k1, k0, s1, s0, p1, p0, parallel_elements, src0_d, dst_d, op, stream); -} +#include "pool2d.cuh" + +template +static __global__ void pool2d_nchw_kernel( + const int ih, const int iw, const int oh, const int ow, + const int kh, const int kw, const int sh, const int sw, + const int ph, const int pw, const int parallel_elements, + const Ti* src, To* dst, const enum ggml_op_pool op) { + int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx >= parallel_elements) { + return; + } + + const int I_HW = ih * iw; + const int O_HW = oh * ow; + const int nc = idx / O_HW; + const int cur_oh = idx % O_HW / ow; + const int cur_ow = idx % O_HW % ow; + const Ti* i_ptr = src + nc * I_HW; + To* o_ptr = dst + nc * O_HW; + const int start_h = cur_oh * sh - ph; + const int bh = max(0, start_h); + const int eh = min(ih, start_h + kh); + const int start_w = cur_ow * sw - pw; + const int bw = max(0, start_w); + const int ew = min(iw, start_w + kw); + const To scale = 1. / (kh * kw); + To res = 0; + + switch (op) { + case GGML_OP_POOL_AVG: res = 0; break; + case GGML_OP_POOL_MAX: res = -FLT_MAX; break; + default: assert(false); + } + + for (int i = bh; i < eh; i += 1) { + for (int j = bw; j < ew; j += 1) { +#if __CUDA_ARCH__ >= 350 + Ti cur = __ldg(i_ptr + i * iw + j); +#else + Ti cur = i_ptr[i * iw + j]; +#endif + switch (op) { + case GGML_OP_POOL_AVG: res += cur * scale; break; + case GGML_OP_POOL_MAX: res = max(res, (To)cur); break; + default: assert(false); + } + } + } + o_ptr[cur_oh * ow + cur_ow] = res; +} + +static void pool2d_nchw_kernel_f32_f32_cuda( + const int ih, const int iw, const int oh, const int ow, + const int kh, const int kw, const int sh, const int sw, + const int ph, const int pw, const int parallel_elements, + const float * src, float * dst, const enum ggml_op_pool op, + cudaStream_t stream) { + + const int num_blocks = (parallel_elements + CUDA_POOL2D_BLOCK_SIZE - 1) / CUDA_POOL2D_BLOCK_SIZE; + dim3 block_nums(num_blocks); + pool2d_nchw_kernel<<>>(ih, iw, oh, ow, kh, kw, sh, sw, ph, pw, parallel_elements, src, dst, op); +} + +void ggml_cuda_op_pool2d(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * src0 = dst->src[0]; + const float * src0_d = (const float *)src0->data; + float * dst_d = (float *)dst->data; + cudaStream_t stream = ctx.stream(); + + GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_F32); + + const int32_t * opts = (const int32_t *)dst->op_params; + enum ggml_op_pool op = static_cast(opts[0]); + const int k0 = opts[1]; + const int k1 = opts[2]; + const int s0 = opts[3]; + const int s1 = opts[4]; + const int p0 = opts[5]; + const int p1 = opts[6]; + + const int64_t IH = src0->ne[1]; + const int64_t IW = src0->ne[0]; + + const int64_t N = dst->ne[3]; + const int64_t OC = dst->ne[2]; + const int64_t OH = dst->ne[1]; + const int64_t OW = dst->ne[0]; + + const int parallel_elements = N * OC * OH * OW; + + pool2d_nchw_kernel_f32_f32_cuda(IH, IW, OH, OW, k1, k0, s1, s0, p1, p0, parallel_elements, src0_d, dst_d, op, stream); +} diff --git a/llama/ggml-cuda/pool2d.cuh b/llama/ggml-cuda/pool2d.cuh index 7841292b..de65151b 100644 --- a/llama/ggml-cuda/pool2d.cuh +++ b/llama/ggml-cuda/pool2d.cuh @@ -1,5 +1,5 @@ -#include "common.cuh" - -#define CUDA_POOL2D_BLOCK_SIZE 256 - -void ggml_cuda_op_pool2d(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +#include "common.cuh" + +#define CUDA_POOL2D_BLOCK_SIZE 256 + +void ggml_cuda_op_pool2d(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/quantize.cu b/llama/ggml-cuda/quantize.cu index 7578c4b6..b4678682 100644 --- a/llama/ggml-cuda/quantize.cu +++ b/llama/ggml-cuda/quantize.cu @@ -1,22 +1,23 @@ #include "quantize.cuh" +#include -static __global__ void quantize_q8_1(const float * __restrict__ x, void * __restrict__ vy, const int64_t kx, const int64_t kx_padded) { - const int64_t ix = (int64_t)blockDim.x*blockIdx.x + threadIdx.x; +static __global__ void quantize_q8_1(const float * __restrict__ x, void * __restrict__ vy, const int64_t kx, const int64_t kx0_padded) { + const int64_t ix0 = (int64_t)blockDim.x*blockIdx.x + threadIdx.x; - if (ix >= kx_padded) { + if (ix0 >= kx0_padded) { return; } - const int64_t iy = (int64_t)blockDim.y*blockIdx.y + threadIdx.y; + const int64_t ix1 = blockIdx.y; - const int64_t i_padded = (int64_t)iy*kx_padded + ix; + const int64_t i_padded = ix1*kx0_padded + ix0; block_q8_1 * y = (block_q8_1 *) vy; const int64_t ib = i_padded / QK8_1; // block index const int64_t iqs = i_padded % QK8_1; // quant index - const float xi = ix < kx ? x[iy*kx + ix] : 0.0f; + const float xi = ix0 < kx ? x[ix1*kx + ix0] : 0.0f; float amax = fabsf(xi); float sum = xi; @@ -36,10 +37,76 @@ static __global__ void quantize_q8_1(const float * __restrict__ x, void * __rest reinterpret_cast(y[ib].ds.y) = sum; } -void quantize_row_q8_1_cuda(const float * x, void * vy, const int64_t kx, const int64_t ky, const int64_t kx_padded, cudaStream_t stream) { - const int64_t block_num_x = (kx_padded + CUDA_QUANTIZE_BLOCK_SIZE - 1) / CUDA_QUANTIZE_BLOCK_SIZE; - const dim3 num_blocks(block_num_x, ky, 1); - const dim3 block_size(CUDA_QUANTIZE_BLOCK_SIZE, 1, 1); - quantize_q8_1<<>>(x, vy, kx, kx_padded); +template +static __global__ void quantize_mmq_q8_1( + const float * __restrict__ x, void * __restrict__ vy, const int64_t kx0, const int64_t kx1, const int64_t kx0_padded) { + + const int64_t ix0 = (int64_t)blockDim.x*blockIdx.x + threadIdx.x; + + if (ix0 >= kx0_padded) { + return; + } + + const int64_t ix1 = kx1*blockIdx.z + blockIdx.y; + + block_q8_1_mmq * y = (block_q8_1_mmq *) vy; + + const int64_t ib0 = blockIdx.z*(gridDim.y*gridDim.x*blockDim.x/(4*QK8_1)); // first block of channel + const int64_t ib = ib0 + (ix0 / (4*QK8_1))*kx1 + blockIdx.y; // block index in channel + const int64_t iqs = ix0 % (4*QK8_1); // quant index in block + + const float xi = ix0 < kx0 ? x[ix1*kx0 + ix0] : 0.0f; + float amax = fabsf(xi); + + amax = warp_reduce_max(amax); + + float sum; + if (need_sum) { + sum = warp_reduce_sum(xi); + } + + const float d = amax / 127; + const int8_t q = amax == 0.0f ? 0 : roundf(xi / d); + + y[ib].qs[iqs] = q; + + if (iqs % QK8_1 != 0) { + return; + } + + if (need_sum) { + y[ib].ds[iqs/QK8_1] = make_half2(d, sum); + } else { + ((float *) y[ib].ds)[iqs/QK8_1] = d; + } } +void quantize_row_q8_1_cuda( + const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, + const int64_t kx0_padded, const ggml_type type_x, cudaStream_t stream) { + + GGML_ASSERT(kx0_padded % QK8_1 == 0); + + const int64_t block_num_x = (kx0_padded + CUDA_QUANTIZE_BLOCK_SIZE - 1) / CUDA_QUANTIZE_BLOCK_SIZE; + const dim3 num_blocks(block_num_x, kx1*channels, 1); + const dim3 block_size(CUDA_QUANTIZE_BLOCK_SIZE, 1, 1); + quantize_q8_1<<>>(x, vy, kx0, kx0_padded); + + GGML_UNUSED(type_x); +} + +void quantize_mmq_q8_1_cuda( + const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, + const int64_t kx0_padded, const ggml_type type_x, cudaStream_t stream) { + + GGML_ASSERT(kx0_padded % (4*QK8_1) == 0); + + const int64_t block_num_x = (kx0_padded + CUDA_QUANTIZE_BLOCK_SIZE - 1) / CUDA_QUANTIZE_BLOCK_SIZE; + const dim3 num_blocks(block_num_x, kx1, channels); + const dim3 block_size(CUDA_QUANTIZE_BLOCK_SIZE, 1, 1); + if (mmq_need_sum(type_x)) { + quantize_mmq_q8_1<<>>(x, vy, kx0, kx1, kx0_padded); + } else { + quantize_mmq_q8_1<<>>(x, vy, kx0, kx1, kx0_padded); + } +} diff --git a/llama/ggml-cuda/quantize.cuh b/llama/ggml-cuda/quantize.cuh index b37a4752..486c9360 100644 --- a/llama/ggml-cuda/quantize.cuh +++ b/llama/ggml-cuda/quantize.cuh @@ -1,5 +1,20 @@ +#pragma once + #include "common.cuh" +#include "mmq.cuh" + +#include #define CUDA_QUANTIZE_BLOCK_SIZE 256 -void quantize_row_q8_1_cuda(const float * x, void * vy, const int64_t kx, const int64_t ky, const int64_t kx_padded, cudaStream_t stream); +typedef void (*quantize_cuda_t)( + const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded, + const ggml_type type_x, cudaStream_t stream); + +void quantize_row_q8_1_cuda( + const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded, + const ggml_type type_x, cudaStream_t stream); + +void quantize_mmq_q8_1_cuda( + const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded, + const ggml_type type_x, cudaStream_t stream); diff --git a/llama/ggml-cuda/rope.cuh b/llama/ggml-cuda/rope.cuh index 0f787a0b..c2b29a7d 100644 --- a/llama/ggml-cuda/rope.cuh +++ b/llama/ggml-cuda/rope.cuh @@ -1,5 +1,5 @@ -#include "common.cuh" - -#define CUDA_ROPE_BLOCK_SIZE 256 - -void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +#include "common.cuh" + +#define CUDA_ROPE_BLOCK_SIZE 256 + +void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/scale.cuh b/llama/ggml-cuda/scale.cuh index 8ff75c82..f35503f7 100644 --- a/llama/ggml-cuda/scale.cuh +++ b/llama/ggml-cuda/scale.cuh @@ -1,5 +1,5 @@ -#include "common.cuh" - -#define CUDA_SCALE_BLOCK_SIZE 256 - -void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +#include "common.cuh" + +#define CUDA_SCALE_BLOCK_SIZE 256 + +void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/softmax.cuh b/llama/ggml-cuda/softmax.cuh index 4ef4ff86..5f6564af 100644 --- a/llama/ggml-cuda/softmax.cuh +++ b/llama/ggml-cuda/softmax.cuh @@ -1,5 +1,5 @@ -#include "common.cuh" - -#define CUDA_SOFT_MAX_BLOCK_SIZE 1024 - -void ggml_cuda_op_soft_max(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +#include "common.cuh" + +#define CUDA_SOFT_MAX_BLOCK_SIZE 1024 + +void ggml_cuda_op_soft_max(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/sumrows.cu b/llama/ggml-cuda/sumrows.cu index 82e8e875..0422821d 100644 --- a/llama/ggml-cuda/sumrows.cu +++ b/llama/ggml-cuda/sumrows.cu @@ -1,40 +1,40 @@ -#include "sumrows.cuh" - -static __global__ void k_sum_rows_f32(const float * x, float * dst, const int ncols) { - const int row = blockIdx.x; - const int col = threadIdx.x; - - float sum = 0.0f; - for (int i = col; i < ncols; i += blockDim.x) { - sum += x[row * ncols + i]; - } - - sum = warp_reduce_sum(sum); - - if (col == 0) { - dst[row] = sum; - } -} - -static void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream) { - const dim3 block_dims(WARP_SIZE, 1, 1); - const dim3 block_nums(nrows, 1, 1); - k_sum_rows_f32<<>>(x, dst, ncols); -} - -void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * src0 = dst->src[0]; - const float * src0_d = (const float *)src0->data; - float * dst_d = (float *)dst->data; - cudaStream_t stream = ctx.stream(); - - GGML_ASSERT(src0->type == GGML_TYPE_F32); - GGML_ASSERT( dst->type == GGML_TYPE_F32); - GGML_ASSERT(ggml_is_contiguous(src0)); - - - const int64_t ncols = src0->ne[0]; - const int64_t nrows = ggml_nrows(src0); - - sum_rows_f32_cuda(src0_d, dst_d, ncols, nrows, stream); -} +#include "sumrows.cuh" + +static __global__ void k_sum_rows_f32(const float * x, float * dst, const int ncols) { + const int row = blockIdx.x; + const int col = threadIdx.x; + + float sum = 0.0f; + for (int i = col; i < ncols; i += blockDim.x) { + sum += x[row * ncols + i]; + } + + sum = warp_reduce_sum(sum); + + if (col == 0) { + dst[row] = sum; + } +} + +static void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream) { + const dim3 block_dims(WARP_SIZE, 1, 1); + const dim3 block_nums(nrows, 1, 1); + k_sum_rows_f32<<>>(x, dst, ncols); +} + +void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * src0 = dst->src[0]; + const float * src0_d = (const float *)src0->data; + float * dst_d = (float *)dst->data; + cudaStream_t stream = ctx.stream(); + + GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_F32); + GGML_ASSERT(ggml_is_contiguous(src0)); + + + const int64_t ncols = src0->ne[0]; + const int64_t nrows = ggml_nrows(src0); + + sum_rows_f32_cuda(src0_d, dst_d, ncols, nrows, stream); +} diff --git a/llama/ggml-cuda/sumrows.cuh b/llama/ggml-cuda/sumrows.cuh index e7545f83..7558b493 100644 --- a/llama/ggml-cuda/sumrows.cuh +++ b/llama/ggml-cuda/sumrows.cuh @@ -1,3 +1,3 @@ -#include "common.cuh" - -void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +#include "common.cuh" + +void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu new file mode 100644 index 00000000..6696a238 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu new file mode 100644 index 00000000..dd070db2 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu new file mode 100644 index 00000000..54dcde6f --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu new file mode 100644 index 00000000..4ec22f79 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu new file mode 100644 index 00000000..3c15bf7f --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu new file mode 100644 index 00000000..7e61b5fd --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu new file mode 100644 index 00000000..fdb15b58 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu new file mode 100644 index 00000000..0f7c417d --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu new file mode 100644 index 00000000..851f33c4 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu new file mode 100644 index 00000000..763809cb --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu new file mode 100644 index 00000000..f2a276e5 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu new file mode 100644 index 00000000..cb227f6f --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu new file mode 100644 index 00000000..97ac0520 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu new file mode 100644 index 00000000..c772b426 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu new file mode 100644 index 00000000..5cb74308 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu new file mode 100644 index 00000000..98a709d1 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu new file mode 100644 index 00000000..4f2f947a --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu new file mode 100644 index 00000000..11f96b6f --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu new file mode 100644 index 00000000..b39bdc06 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu new file mode 100644 index 00000000..bbd6a2c7 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu new file mode 100644 index 00000000..9d84ff2b --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu new file mode 100644 index 00000000..bc8a5bff --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu new file mode 100644 index 00000000..a679100c --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu new file mode 100644 index 00000000..8f21bccf --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu new file mode 100644 index 00000000..858b00fd --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu new file mode 100644 index 00000000..0fc8011f --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu new file mode 100644 index 00000000..261fdf62 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu new file mode 100644 index 00000000..0fb82473 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu new file mode 100644 index 00000000..a9d9d089 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu new file mode 100644 index 00000000..7d7b2792 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu new file mode 100644 index 00000000..a092ee2d --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu new file mode 100644 index 00000000..db55927a --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu new file mode 100644 index 00000000..c3c21cef --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu new file mode 100644 index 00000000..35dd9f52 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu new file mode 100644 index 00000000..050c22ac --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu new file mode 100644 index 00000000..de4866c5 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu new file mode 100644 index 00000000..57a10bc4 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(256, GGML_TYPE_F16, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu new file mode 100644 index 00000000..e0f08b46 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu new file mode 100644 index 00000000..1c8e8a46 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu new file mode 100644 index 00000000..cefed83f --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu new file mode 100644 index 00000000..aede6e35 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu new file mode 100644 index 00000000..1a1a92c7 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu new file mode 100644 index 00000000..ad667473 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu new file mode 100644 index 00000000..c499f455 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu new file mode 100644 index 00000000..8286ebf3 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu new file mode 100644 index 00000000..45878688 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu new file mode 100644 index 00000000..d89103ce --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu new file mode 100644 index 00000000..bb75fd42 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu new file mode 100644 index 00000000..b1629817 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu new file mode 100644 index 00000000..d8657604 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu new file mode 100644 index 00000000..2e5bd2f1 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu new file mode 100644 index 00000000..be5f302d --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu new file mode 100644 index 00000000..8dd91cd7 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu new file mode 100644 index 00000000..4cb79150 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu new file mode 100644 index 00000000..09dea426 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu new file mode 100644 index 00000000..0fbb6076 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu new file mode 100644 index 00000000..2aeab83b --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu new file mode 100644 index 00000000..599415b4 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu new file mode 100644 index 00000000..e4f8e308 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu new file mode 100644 index 00000000..34d16652 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu new file mode 100644 index 00000000..4bebef45 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu new file mode 100644 index 00000000..326468da --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu new file mode 100644 index 00000000..511b58f4 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu new file mode 100644 index 00000000..d9906d14 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu new file mode 100644 index 00000000..f61c183a --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu new file mode 100644 index 00000000..c10450fd --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu new file mode 100644 index 00000000..2d5cb195 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu new file mode 100644 index 00000000..b384f34d --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu new file mode 100644 index 00000000..446e293b --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu new file mode 100644 index 00000000..6f430298 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu new file mode 100644 index 00000000..1cd8ba88 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu new file mode 100644 index 00000000..1ee2eab6 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu new file mode 100644 index 00000000..2bc77816 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu new file mode 100644 index 00000000..d55ced08 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu new file mode 100644 index 00000000..8361e99c --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu new file mode 100644 index 00000000..7507a67c --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu new file mode 100644 index 00000000..61f050b2 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu new file mode 100644 index 00000000..d4a49d9c --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu new file mode 100644 index 00000000..d1462789 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu new file mode 100644 index 00000000..e73f917a --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(256, GGML_TYPE_F16, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu new file mode 100644 index 00000000..d40825df --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu new file mode 100644 index 00000000..b5c6869f --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu new file mode 100644 index 00000000..4e21b0cc --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu new file mode 100644 index 00000000..2eac321b --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu new file mode 100644 index 00000000..f7d2c3b4 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu new file mode 100644 index 00000000..a013f400 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu new file mode 100644 index 00000000..2d94e65c --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu @@ -0,0 +1,10 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-wmma-f16.cuh" + +DECL_FATTN_WMMA_F16_CASE(64, 16, float); +DECL_FATTN_WMMA_F16_CASE(80, 16, float); +DECL_FATTN_WMMA_F16_CASE(96, 16, float); +DECL_FATTN_WMMA_F16_CASE(112, 16, float); +DECL_FATTN_WMMA_F16_CASE(128, 16, float); +DECL_FATTN_WMMA_F16_CASE(256, 16, float); diff --git a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu new file mode 100644 index 00000000..c3d9df3c --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu @@ -0,0 +1,9 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-wmma-f16.cuh" + +DECL_FATTN_WMMA_F16_CASE(64, 32, float); +DECL_FATTN_WMMA_F16_CASE(80, 32, float); +DECL_FATTN_WMMA_F16_CASE(96, 32, float); +DECL_FATTN_WMMA_F16_CASE(112, 32, float); +DECL_FATTN_WMMA_F16_CASE(128, 32, float); diff --git a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu new file mode 100644 index 00000000..bb680e40 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu @@ -0,0 +1,10 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-wmma-f16.cuh" + +DECL_FATTN_WMMA_F16_CASE(64, 16, half); +DECL_FATTN_WMMA_F16_CASE(80, 16, half); +DECL_FATTN_WMMA_F16_CASE(96, 16, half); +DECL_FATTN_WMMA_F16_CASE(112, 16, half); +DECL_FATTN_WMMA_F16_CASE(128, 16, half); +DECL_FATTN_WMMA_F16_CASE(256, 16, half); diff --git a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu new file mode 100644 index 00000000..073f71b1 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu @@ -0,0 +1,10 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-wmma-f16.cuh" + +DECL_FATTN_WMMA_F16_CASE(64, 32, half); +DECL_FATTN_WMMA_F16_CASE(80, 32, half); +DECL_FATTN_WMMA_F16_CASE(96, 32, half); +DECL_FATTN_WMMA_F16_CASE(112, 32, half); +DECL_FATTN_WMMA_F16_CASE(128, 32, half); +DECL_FATTN_WMMA_F16_CASE(256, 32, half); diff --git a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu new file mode 100644 index 00000000..d30710c5 --- /dev/null +++ b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu @@ -0,0 +1,8 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-wmma-f16.cuh" + +DECL_FATTN_WMMA_F16_CASE(64, 8, half); +DECL_FATTN_WMMA_F16_CASE(96, 8, half); +DECL_FATTN_WMMA_F16_CASE(128, 8, half); +DECL_FATTN_WMMA_F16_CASE(256, 8, half); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q2_k.cu b/llama/ggml-cuda/template-instances/mmq-instance-q2_k.cu new file mode 100644 index 00000000..6415369d --- /dev/null +++ b/llama/ggml-cuda/template-instances/mmq-instance-q2_k.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q2_K); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q3_k.cu b/llama/ggml-cuda/template-instances/mmq-instance-q3_k.cu new file mode 100644 index 00000000..ffb6213a --- /dev/null +++ b/llama/ggml-cuda/template-instances/mmq-instance-q3_k.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q3_K); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q4_0.cu b/llama/ggml-cuda/template-instances/mmq-instance-q4_0.cu new file mode 100644 index 00000000..0c0b0c8a --- /dev/null +++ b/llama/ggml-cuda/template-instances/mmq-instance-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q4_1.cu b/llama/ggml-cuda/template-instances/mmq-instance-q4_1.cu new file mode 100644 index 00000000..ee67f694 --- /dev/null +++ b/llama/ggml-cuda/template-instances/mmq-instance-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q4_k.cu b/llama/ggml-cuda/template-instances/mmq-instance-q4_k.cu new file mode 100644 index 00000000..9eeb3cd7 --- /dev/null +++ b/llama/ggml-cuda/template-instances/mmq-instance-q4_k.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q4_K); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q5_0.cu b/llama/ggml-cuda/template-instances/mmq-instance-q5_0.cu new file mode 100644 index 00000000..cc57fb97 --- /dev/null +++ b/llama/ggml-cuda/template-instances/mmq-instance-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q5_1.cu b/llama/ggml-cuda/template-instances/mmq-instance-q5_1.cu new file mode 100644 index 00000000..721ac790 --- /dev/null +++ b/llama/ggml-cuda/template-instances/mmq-instance-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q5_k.cu b/llama/ggml-cuda/template-instances/mmq-instance-q5_k.cu new file mode 100644 index 00000000..a2e90ffd --- /dev/null +++ b/llama/ggml-cuda/template-instances/mmq-instance-q5_k.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q5_K); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q6_k.cu b/llama/ggml-cuda/template-instances/mmq-instance-q6_k.cu new file mode 100644 index 00000000..470938fe --- /dev/null +++ b/llama/ggml-cuda/template-instances/mmq-instance-q6_k.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q6_K); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q8_0.cu b/llama/ggml-cuda/template-instances/mmq-instance-q8_0.cu new file mode 100644 index 00000000..974477bb --- /dev/null +++ b/llama/ggml-cuda/template-instances/mmq-instance-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/tsembd.cu b/llama/ggml-cuda/tsembd.cu index 153ddbcd..3fca3327 100644 --- a/llama/ggml-cuda/tsembd.cu +++ b/llama/ggml-cuda/tsembd.cu @@ -1,47 +1,47 @@ -#include "tsembd.cuh" - -static __global__ void timestep_embedding_f32(const float * timesteps, float * dst, const int nb1, const int dim, const int max_period) { - // blockIDx.y: idx of timesteps->ne[0] - // blockIDx.x: idx of ((dim + 1) / 2) / BLOCK_SIZE - int i = blockIdx.y; - int j = threadIdx.x + blockIdx.x * blockDim.x; - float * embed_data = (float *)((char *)dst + i*nb1); - - if (dim % 2 != 0 && j == ((dim + 1) / 2)) { - embed_data[dim] = 0.f; - } - - int half = dim / 2; - if (j >= half) { - return; - } - - float timestep = timesteps[i]; - float freq = (float)expf(-logf(max_period) * j / half); - float arg = timestep * freq; - embed_data[j] = cosf(arg); - embed_data[j + half] = sinf(arg); -} - -static void timestep_embedding_f32_cuda(const float * x, float * dst, const int ne00, const int nb1, - const int dim, const int max_period, cudaStream_t stream) { - int half_ceil = (dim + 1) / 2; - int num_blocks = (half_ceil + CUDA_TIMESTEP_EMBEDDING_BLOCK_SIZE - 1) / CUDA_TIMESTEP_EMBEDDING_BLOCK_SIZE; - dim3 gridDim(num_blocks, ne00, 1); - timestep_embedding_f32<<>>(x, dst, nb1, dim, max_period); -} - -void ggml_cuda_op_timestep_embedding(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * src0 = dst->src[0]; - const float * src0_d = (const float *)src0->data; - float * dst_d = (float *)dst->data; - cudaStream_t stream = ctx.stream(); - - GGML_ASSERT(src0->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); - - const int dim = dst->op_params[0]; - const int max_period = dst->op_params[1]; - - timestep_embedding_f32_cuda(src0_d, dst_d, src0->ne[0], dst->nb[1], dim, max_period, stream); -} +#include "tsembd.cuh" + +static __global__ void timestep_embedding_f32(const float * timesteps, float * dst, const int nb1, const int dim, const int max_period) { + // blockIDx.y: idx of timesteps->ne[0] + // blockIDx.x: idx of ((dim + 1) / 2) / BLOCK_SIZE + int i = blockIdx.y; + int j = threadIdx.x + blockIdx.x * blockDim.x; + float * embed_data = (float *)((char *)dst + i*nb1); + + if (dim % 2 != 0 && j == ((dim + 1) / 2)) { + embed_data[dim] = 0.f; + } + + int half = dim / 2; + if (j >= half) { + return; + } + + float timestep = timesteps[i]; + float freq = (float)expf(-logf(max_period) * j / half); + float arg = timestep * freq; + embed_data[j] = cosf(arg); + embed_data[j + half] = sinf(arg); +} + +static void timestep_embedding_f32_cuda(const float * x, float * dst, const int ne00, const int nb1, + const int dim, const int max_period, cudaStream_t stream) { + int half_ceil = (dim + 1) / 2; + int num_blocks = (half_ceil + CUDA_TIMESTEP_EMBEDDING_BLOCK_SIZE - 1) / CUDA_TIMESTEP_EMBEDDING_BLOCK_SIZE; + dim3 gridDim(num_blocks, ne00, 1); + timestep_embedding_f32<<>>(x, dst, nb1, dim, max_period); +} + +void ggml_cuda_op_timestep_embedding(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * src0 = dst->src[0]; + const float * src0_d = (const float *)src0->data; + float * dst_d = (float *)dst->data; + cudaStream_t stream = ctx.stream(); + + GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->type == GGML_TYPE_F32); + + const int dim = dst->op_params[0]; + const int max_period = dst->op_params[1]; + + timestep_embedding_f32_cuda(src0_d, dst_d, src0->ne[0], dst->nb[1], dim, max_period, stream); +} diff --git a/llama/ggml-cuda/tsembd.cuh b/llama/ggml-cuda/tsembd.cuh index 84340e3d..2313b21b 100644 --- a/llama/ggml-cuda/tsembd.cuh +++ b/llama/ggml-cuda/tsembd.cuh @@ -1,5 +1,5 @@ -#include "common.cuh" - -#define CUDA_TIMESTEP_EMBEDDING_BLOCK_SIZE 256 - -void ggml_cuda_op_timestep_embedding(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +#include "common.cuh" + +#define CUDA_TIMESTEP_EMBEDDING_BLOCK_SIZE 256 + +void ggml_cuda_op_timestep_embedding(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/upscale.cuh b/llama/ggml-cuda/upscale.cuh index d4d76523..651031dc 100644 --- a/llama/ggml-cuda/upscale.cuh +++ b/llama/ggml-cuda/upscale.cuh @@ -1,5 +1,5 @@ -#include "common.cuh" - -#define CUDA_UPSCALE_BLOCK_SIZE 256 - -void ggml_cuda_op_upscale(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +#include "common.cuh" + +#define CUDA_UPSCALE_BLOCK_SIZE 256 + +void ggml_cuda_op_upscale(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-impl.h b/llama/ggml-impl.h index 557c828d..94ac3821 100644 --- a/llama/ggml-impl.h +++ b/llama/ggml-impl.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * diff --git a/llama/ggml-metal-darwin_arm64.m b/llama/ggml-metal-darwin_arm64.m index cd151a17..5285bff3 100644 --- a/llama/ggml-metal-darwin_arm64.m +++ b/llama/ggml-metal-darwin_arm64.m @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * diff --git a/llama/ggml-metal.h b/llama/ggml-metal.h index c117f051..51545e42 100644 --- a/llama/ggml-metal.h +++ b/llama/ggml-metal.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * diff --git a/llama/ggml-metal.metal b/llama/ggml-metal.metal index 88b8aece..f7e24876 100644 --- a/llama/ggml-metal.metal +++ b/llama/ggml-metal.metal @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * diff --git a/llama/ggml-metal.o b/llama/ggml-metal.o deleted file mode 100644 index 70b219df3bc9b71dc86c53cfb6fe65c7ac7dec95..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 373184 zcmeFZ+m2n=l_luD6dF+V2Xwz3IDvxXqD97uAVsD-3!p_>CQMQiNoS@jkqGb-Ax0%d z(u`nQt||llSbx*#Kj=T`-{_bAfqrQ89CNI(=4EY?l8}|CYWUFS#M)!e%a~)%wa$r) z{D1yG|M&m53%~K7hL5iQ2Sf4v zZ-?9Q@x{q^7vsXd93MZr{kML|-PoKViz5Ds@o3o4Ev#U>U9A1C=?6c1fu06T&c>nn6$FMdwv;B4Rh5Nlbk9K#@o}RosKN-m9uU`N0&Dl54FLob4{baYlvOoN}y&UZx zJwMq&!QJDx7tde4IUnX?EL)f)yv)4`R@72o0BJh++R#jD-pmw(*7K6!I)4zHeEJU)AQ_VSzEJUe;( z0s+rnTELT??(Y6@X7t})?B1N5U%WYciZGYV=;@2M&%i{tUz~k=Hj=>I6+z7?;qAGV z0>YPe-@bZwc8aejUf=7tPhOmzKfknlc82Ir-d>pHIY5{A5~{iU>do%_Rmxoxh$+TFYT z;PYQw@SDH9bN9}p-(5niU)_0hACZ6c<%8YL-B&jsJi7DwZ|~lGu>0z_55D^H;cX*v z%Oc;ubN^QlEYamt)Dj!EyZzU8v3vOIn|JRb8^YcEt(EZrM0cNm`PJ_p-1*|y zk9NQQ^6stM2LAH4@w@rUySJIms{8!z%{%ul?QY$?ck>Hye6ah{0(p_JB+cF5{Q5Rf zWW8zs`~1@5KOcMlbsc+Wr!Vc~TaGuOf&nOq4KL0l#eRT21*Cz<~)_{X+#~1$D?AYFr zPk&MF9q#kJ{V%r9-w|01GcLoP{`H-&GWF+YuP@v4zZUin zfA{&_O#l3kPhVU{&yO+1pFV&6`uz=n{a^m&JEtdZf6p2Ea5L=l@#*_F<|lsJcl7b` z)7LNFp5uwV?3jJxwNKB-_KnNQn~So;kB=Wdy7l?-U)u*g`*?PI{K@WL{j1%_9o_w# zzq0SW4i2tdfpX&$QwjPpKE>NN9_QbDvO9j5)1^Dux%Qpi2LsqQD{t*Pg>PQGdh+ecIGe2erC7R3cAUv`%-?{eiI+4a}I+5Oe-<=bzcoV>v&{C|A>;;nsm z^Z4|_uAiO!{w=->`1k&u8?BFFfBx3Kv-tUw7q6cF!|wd--=D-T`T93zu)lh3^8d~8 z!_RNty?y-f&cD6AJJ`R!L&v`1cmMG}zWZ>+wtBAtCl{4HFs4@C<+Z#gC#SFM1Ib2x zcZwQ^``r`!X3oy*lW*+%U;Dmk%Wu~D^_`>RE4z=s{tdod-hF&z-!**wz%D+Cf$-}c zbQlu?rvo}2WT%7XbWokHG84YcOmeSqO{LJIP{hqb{eK}@N?&IKR-;4w}}7n(as+Jqf~!Ge)bDokB6mu zcJjhL=-?WkOt0IA&)=W_oer`6-~R%XUY)0RSG8#m>|y8t{I$_|`4HJ$vaI;r^dWC-6!97xUhXyx%?h zmhhkWrKyXP@q4Iy@F8^%T;09B*St$%&y*ED_^`rw?|7HOvqXp&k0RqN_W9o(*$C-j zHdlXs@!a25?c>H6CfuD4m_5F&eNY`Ud%rqn@XpyAY9TMa-{U+?-`wQyf5schTl+1PnSBJ){m=33K{f*VhZFx@ zoc%`Z&B<%CwqJeO?^zz7@4mBdE*_hFzqfb3C$K!Y#uodplkDQ;#R+~r>M4w=pg!<$6frbgc9 z@k3EiF1_7@58Oo^$8lZL?Wzf)DUfU`&nA z^fI=#lWb>rwlpO1!DD;k;$y6Rqxk;z;ycTCe~h&rUbd}+Pqe5#Bw-%|8piEBS)Wp-zNY6d0S(c+KlC&RDHjrGR>S~`FBpXzsI~HjXiA8 zkA5CKF5f+UVC?YYiFcnWnX`_TG z@Ok)y-xRNXdi4i?==%K&He<><-{vmiUCutbZGFTV9b7+RUD~~Ujn8}O%~9Wsy!+$b z^7d%E`U7u|A9{ndpIN@Y@%SO<2i_pGQ8y`LA=I*tbRf!}i;6@mm!d<4>)=dGqS+>+x|vK73sNkhW%! zwRQ6RfrUxO4;}UaUmrXNA^W2`{ZP&P-=pxyH(z}F>fC+;@>Bbr(#cOZzODHbJLlHh z6a8V@%M)vZ{gIlrgK_OwM*Ow}AHM7-isxtF;BWLUcJ@QfbDLnZw4dT3%Kp9LEB91w@ROG_cDLro1ZtmC3-uyPp4Duv)}KJzx&R< z4}H%MEr(6kd?u0Zm#<52y!j(t2tNGZqxt^R!vlOfXEKaY(w_&EpI6STHmL6|4X3?6 zf$Z|BaA=26E&O}bef~qs)&w5LM-LakeqlC$MOc1v=3~H<^~cmde?-miNvAH3kAu`J z&$_%>h z^WP6%zMi+5yS!)A^quYNKg?gc-ue3Q;qiU`YTCbLyL|2F2b?WGZUF5_9B&VhWn(ro ztFmtok3Te^X7NtZ4y4-Q9%on1l4jK+yi3FHe=?G8$M{Lu#UEa!&kNW2v(_2EpJ(_clPAbH zbp7Wf_Iq^ytZaV^=0BORpHE#o`Xs$2930!vy59RvXcjJh7g~~+Q{Rl=Bl&Of>COIQ zgF9d2SGMhaFaM$>-u6xpuEk#QDcZdS^rxK-&bRh=FZOf8;|u%cpp}3B)ytp1u)kA& zJbsF3f7_!!vasJ6+HZQl&rko$#`NaH72nV&)sBu|zW9J_FnQ0=GH4oB*uP7<%?S;S z`bPtf#_{tI&iCbj$G0cf>|2de>)f}!)DM1EIy(OD-TO2B0>?iiX>6{|x6J&h0ioWd z!n=P#{+{2RV=(?{ANC9PjlT=U-@E&r%|Bi8zlML;--T*O{kQw}UohBTOa1cp7kBP| zjJ99ey?p#F{)-g*?fED6D+v6J8vbjLbNi1(?6%+iJKX$x`{lxam;;|;-M)Y8<4<;f zO20qPe@5WE-~Gp;@n`(`uL0X1i~iUGybSvTvwyxy-uw4uUTgca&G^&OA6w+{@w1bE zwErsS`1qf%O#Thr=3mJE*fRbphwc4;s);t(pK7KF=5848e3y3rfZsYC+o$XA?6+6r zZP9+gXUB;BOu>GJgpc5OQ@UoC2lk)MrAWRP-Wwa=xSlJxdFwZv_M6#*{SA9LwAF5R zh?V8o?XKd*G9Db->fq2;yPetX4&ZUbzDWptMJ&H-N9Al0iPZC99+FIVqSySA-oRZb+ru0c2`lwRd`&- z3VaT(BhL+Z7$4St1G&Iy2Tlh!daW?Zoq2~uRXkO zAo?CW!RY{;_J{DoEF4@hyMrt6@L9M5c8A7ue+Z9*1IFCD_6Jbxz$)3h_TaRK_IuYJ zoDMJxd(6Tfvv7b}*k45+@HqgV{Z-?+j`!uu`0o>r{jX_{Z`ywtXJ?Q7aq_)8_m4mS z{P@;aM>p|d5kHHxcaf*BzI|=~&ExLr3L2R zJM!Am@r7+k-!@~zam!r!?n--H+FkkX^fZH~ryaw6VAaa(ydBw}k*X~)#ti>cdoU(F za0l2LQ}=UBVl^f(JTrG<5mWh*2kUA}>y9aA_FZ{0o?_BgSmsGgG2@EWOV9A9MR3A3 zpB)1VEc<4j!Wl8wE;Y!L_lfnGxXv-dmaCj!5jv)r*|*2!Ju;SmV4~m1cu+KKg=L<^@EUo9 zb)LX^ui6st7|@#AMc|fhZ9ZYt*Q(IX^MRjNC+P zZeS6Uwv3C|VJj^2RQ}9USj0s995;9d-Z9*%xe;ZJ$!s-0G4fPiWy&#$Ma-~eglbFc zjsXRheKSwsj0QEMrohM(R>E@3z=CJk@~-mp3a?SSFnksFz}A?&7d7Ep_BoI69FthY zRPCy)+S0mXKx-aWoAMt$X2}j+jmRXyFVi0TZ$YL5w~Ij?=kmJ zJb&{Cnqwk|=Waa7sOBf573+BpZUC`*mYDk|y(UT~%gVVVOItti5D~j-}dFWv){gzN*K>_*yZt&pc^WdjyPl zjVaq46BwfQm{g?tv-+5M`QmGiNj7Uts8VflRc9%>78(m%d5S^KPiycGPQaKwW2~q1 zgeBtUI%x?!{DF~w##B+&lkGUs<~_Ruw#n>u$EK`+yeo z7I$I%+GAp7@GoL2OZHccj!}~ulks9k;7bg8zm_>T+PL z$@7eu!4R3qDP!Uk7*~jyF>p}rfwNcDmK-uB`;KkJlvdT23S^8(t%qz@p6Yb?ixKCYH=4S#mi>r8g4sPYsV zJQ1tc9+>8Xnp4fJCvdUPF%glx#8Xz$OL-!j`OdR0@T?vDImuBwv(z4;Lh{UdM)uH7 z7!buIMxZ3wv(0BIQmW-;8Z1IvMTjRv8a9IVMJKPEWuVdzA7d$zIIF+ z%UAxHC$QPOp;NHTdDM}p9f;<(##CjwCRnm7!4nv5h4Bh@wMX(sjIfQ$h$#cLsu=GD zPuVhZw$+`CamT10Snvt{HKr;OPuMX>^dl=AcFc>KYRt%%Z4rZa#FL(5ZoHT558?q0 zp1fu^)K6k!#+bzL3Oq0|&GQK>+Iud~A6>&8`72(w3rp}^|F$3@{f_|rFLK6DPp=;A z|I7dBDYT3ZvkQ1-x1dsBRCLAIwQT1dnxEWtXPopYCVR(j%1RTK^AKBk0!O#1Eu*ej zCwXKe1XTv8pjJ zJ}XaRc#Ya|1@DRht@V(p`!=4cA;+XnxyMp2=V4yYQ(_XYJb{r_U+GV>J6ijh;y7pH*JP-BXH_9xRk^Q?uhp)szhNRLTN z#Z--X&jjXmdhHp@vlKY;F~2Z=T`|p4*nw&A3>?)~e;_>%yOQk`p~oa%^8@qzsZL-$ zH@ms=1P=R}AKohl?Xw-N)|jla`m@T6v6`AkSnm<>S|_nuCon#PC$I5Z^#s!Eq!zVD z#N2;s3=GH`lM!p3oaHSn>nYE!C-ItJSoa^CCQqr}c>-q)=|oRd6g&fqJi<_a)>Am5 z)|j|swu2Mxs9wbepLx#HM)uj35o;b`sH)o0yYoctxRY&}J!AB(+=;nw;R%HIBBmxJ z$5fV%4LwRsa?CY_cho7&ISq}84L&Jm;L%TDvsH{TLd-r1iRB1jL^LEUq1 zqONg?iIWEZN^I!VzdlB8A~j( zEjY&B#G1a9r!rff!V~z&RM^6|$5fV_M|JkviC0d-SZ8z$nW%BbuvYCAU;TYX zY#)RjSE#3A!87AfrrU~p*a7Kw$h`8DPmW2fatGFP1M}Qt<`s*0_h*Ef2NBt$@I}Ne zp5R|=qUDP9n5?|VRK}cNSmsWw<|ek*4xG=Zlh=5y*r;>%omzO`ik`%~zpQgiMyz>& zIrkP*6CVEZ&UzB7xfyZok?^b?Fl4GR5zqZc?YizU1CN-(yr$-lY^sNLc(vTAcH#Ki zZ8J~WA!48l$Aa3^WwMWF51!3-+Y9~&niV3W97nWy%7_(=d#PJ%lMCNrCqngZ<@m8L| zJ5L~W=DFAMe5jIe~9 z^ImWwR(k}DNX!DUt(c0KV-kaPaKgmqeSmt1xn0sT(3~II)w$vI;50N+4xhOclQ=aL zCwjs%@(?5cY{%?XPjwTl!Z!M}V%d(hz&iS#*^5(PV8xiiGEZVe%x=~$Jl8G^U&VhP8MGjw;DBW2!wcuD~;6sG-NC z&NU{pP*3@@@`~wRk4e1NN#(o$=*Q$qHeO@VQ!KN;u$)KCtDT}&o~VfSp%XF8L}02i z#{@>a#zZ~6@4_-qsZnDJ%`wG2_gxsiiYGC?u2}86Y*B5tC0=84zAMJ+%{TRkTGraB zCYWb`?bnKN`-4E~iTGcjtmRooSezQJazM?6DVLHN^JXL9zf z+!2w}lxt@mu+RC4!k&55F+`M}fw}IpN5c6v`|D1{a&B56FIK@5SMtuBfU&BG$$0MX z`<*sGy~e^*nnX>JCGrSE1myvyXXZ(a9@Q4vb4=JEe~t;8;hT6yMe0!?xgXSx43wMd zAZp&5%tHo|pK2g>#mHRT5i$Nn?HL!(z_22xjCDKSTVrw_vp>1!nB*OLAQmy#cWad>* z;CSYmkfZYi2DI8SQ|C#RohQ)D3DJ7rhmi2MT01qW{Sa2UGgFPJOlwS9=b8}J^C#}adLB;L8guln__ND354x87&bf9yT92qw zw*|{Btny^$KA*_7Y7eZ|&RS|7VZHCfp;gpL46haIy2E1VUTO!<7}Xz4bDiW}XNj2C z(_%9F>JN{chm0zB&O+CdYIdDeRgTGuYHsA+;>oCC3mg6mp2FOJ%v@mRn`#%H>mT8|kRsD%op1{Dg^8^l7IX|)P&j{6S?UC}J`fj^& zN9LZJ)3Ea7MC6$Cu6h6ihi>P5THl$e`@_5Yj~=I40vq^uo}AAf6V^E2}L1H5ulrW%t` zS8Uaj`ErklbUoSOZZ~?_cv6w7C$Y*M7_w!a!0_xnVy13OO{zaL1;%UFxbP$nrMhj^ zn0B@Az=p<4OzM|o(z50OR%>U4h+Jz2PU{-8*A6>!&2~ggx5iTyRNGP0*hlomdMRM*H3`F4v)-^(gaaI+c#A|+B5s#R{vhKvz+KGc} z_3yQlcb!jSTQL!}$CP!>0}QqmgJ;)MJ1)-+FnDKw`mPx3tndbSmr5zs^4v+PT2wJJdru~C~Q$_j;T8< zw%UPF^oX8#?VLC@l@qH`kK_wM1_CM}b9;gk_$>kSp^fj@RgsFnrZ~1=jOmZffncWu?_0mOW41Tkut`&Qtet zOk$N2usk2M!RuN(JJkJwR-U+a?Z&yFyslg7yKsEXJgLSOR&&G7bGMkldw$MH8meuDx%$#dS4s_jex0uXU zV=`*@r)ABoEg zuDX*Y@Xk|8XMaYloUq&RPj<5LJc*}p&oy`kw)*=l7CW-fJn0Xn(GOwxS~&r!c8pkS zr`4FTlpnS|53rtxx>=r9J6Tpvnx@PX@j6d&sxier#}t`h{8j{?WN9Fr`!u*ws5o_qFPF{yu!DP|q( zJXy`EC+w;%D~dgX45~772S&X1NO-P^nAhZIfMW7%aEHa1nNqv(8dFiLKhcf>?L6yw zK`wahwTo?z$xif`^sX~ctZEF5&xpxuyjGsVbM4ITHBCL$;oKu+9cOCMQ+SOD)bo#+ zh|hnq4}^6+1CN-(z)W=#<7>zI%*%G=1f<8LCOM|^XFY)tuRMv@n812&PNLZ*J>`q9 zH9x#7Mx^FpHMQ@|jo0kY>n+T+n|-65b1yuB^BFvOjn|5G?%IRBb}3T*Wu0U4^v$|! z2lm>TyY`q^c7ug4@_c>+_9bo_8mBwW}T?!imfquj&z=!yq>@J2wC|ratrHv60iA* zfq(S}o^$gWua&3ptQ|2htud+Iif!@i+Jp7jb8~MIb50Ak#)Mtxq^Ow_v04u>{Oh#? z>)H{iwQ~}y##Cd~lX%S!%=4!@f#I3Cb2?U@RI}SMQ~2|`_MMn(JkELc0XSFHpDK5( za;Gm|tDZnuLH1{s%w1Jxjfri=MDAV_FrYOLyemee>#3a?w&Ixm1B+e?b3Kxt!kz!< zabVpaccOOMje3@tz*m2$rP}FVYY)tPmzeNg{X2K@%6Wj{*<%u~Jb`tdtg`A(Y~=|Y z|7!laUzNA(IrnR;ohsG*#42}SJvT5sd+nUu%99HBJj@jJfc-cV3s2&B?Y4CmXje4` zM!fPAp6eu5>m=rN&N&7K#A;l4vii&u)^#5t1CW}Vnbw%fR%6n-V?Zl5G;Tbhn}3c< zHPJHnjkDEbvcleXde=IIs$R_(xhe(G84 z1P0rho2+u*f#BI~$vpeRg8w3>Fymf&MBMq0nOD5Z9d>-yn1lNgbMQ|wl_~oxYQ<>T zc`{EMBwl#}>pVF%RS#k-PvH2MdE$EI38dSxpPeUFs`-gk z?u=Mt0>g8S$?7vtDxCAc2LEzQVtB1Fg=bF0s-D1zM{Z$VPvTWiVqTN)gYaE>Zt00= z`MgmMV3oV>WS+oQJrR@E)s|?*;9WH)Vj9nLiLroVmHk;!#^_yRDq@dG9Ip`*SDibt znj09N=397*QJxWE-CxS*-24o)@?@sqiCFv>d4%OWfz??O4raXCf$wdSYF86#5ju$q;pIv@VXz<(>2>!IDM zC-GVnFg|NcR$8%9PpO@DwI5O`>j8}Ub4*6;wF}KU5krLV2i9W(gK~X7rDpgm3lvq% zz(P-9;GgGHIKEc@sweGOeYNAA9+MSC?XY6Rny2eYJMQ=%lbNzVv*f->A>0X`%4}Hb zJ8TspY?USZlNIcKQmznI~1Qd4R2X7^Cw9+HKY6$Rn0Hx3I1!apymEB38Kr;x(o+Ra;m>!Qc+N zdM*&5WsND8IVOCWOEJSL^bm%qIVSPJf6ON^PEz$p9^8vO^z-+aq$ekda}u^h^SML| zW`iBMaE@}!ic`Ccq5AHR`xOIMvl~3aKeWpj*j9hu?U?6oF{w)RXZFg8nYXYwm&6IS zo-65uYuwMAh}7JQ(Q60ZW3o%#A67XJD_r$d<*RMgQ)+ZO;JtR*R-VLa?euj{QzK${ zt+lfrpRr^MJIyCGqg~IfOlwTs$=bn_F``H8P`6_*R-W25)t`GlFtV-nLv_!wzQT%uZI87T^PP*o`DBXVR@E_@wMgwR%c1Jxu3#NZS`mU8KdpW zlX&F`tmgsN^Kd$BX3`lVUwp0k;axEzF>}h{zAa{-CFVxN921`Sm+K+s^^};zYfZrT z4DP(fYsE%AEhg;p{y<(vsQ%)gV=|)IwV1&3%xf3cJoK(Sfss*-sfby7DI7c*k&23^ zu&yU@R$6UYX~xJXa>JLNwRYlz|JZZDphfO+FwZfSrN@-=xlUk+lKq*Z=AjMlWIcJJ zAwtX@*0AgTL}f>gRM>LQz6XvKwF?X^D#m*&W_gyFsB-o9np|V$p{hBi?(~?%YfZp< z9$-BWY~vjiJfS!1g598;C|n5w+mA_p`Tce1H^1XgEBc-Bc6Sk#!f6Fs6GHOW09&bqR{ z>^la$Vzlfr8L`@;@>)B6D^F!M-!z|OK5~s715|5o@h{&aw z;aTTX_-b3Z(~fa-%vn#W9k!SoSck1FGbifN?S$sq70>sA>nY}W76U^J<_-)oBBpR= ztG2kOnU@_IWII-r^`tFuJgY6OGlqyg53_Wh{!8P&_TWe?Tnx7b6tADSZ z%-whPJ$$>r?&O$^7Gq4;JcC@WAg|D`_6FkMTW90C?|KCFo_s18P zc4xmo{O&vb|A+3cUYq~-p+{HzAJTGla1`r(te;+aI`O9mX?=Ji{J zo^{|_{#eTgnU(v%EW#gGjBBfGGc^Qm8m1ikWVgpX@6Ewc=!1%8K3h*4^PIVDJ#*X2 z^oSg(MyMcttur&<-l|i5wn}*PY@aw8YjB!5a4lZ@q`N6Sc^z6l_37~FaOSfP%sQ|Z z53CPUKTHveKz$BR4y6g0l=ZN2hC?4;4}ENn4tgA4o4xA5wLRxOad&+1vNe>D4JsdN zTjll4hd$*T7B%eV%s47uuuvUfD|@UJ7oK77yk{MlpJ|tP!76ZMA$Qv;;E36?mDM+K zbc(pm4)V<$xE}p9kI(@fS6r;68+P4{g_#`rtdlbiSR22Y13hOf#FV~VvkqSm?p#wh ztnmbQuEXBcnK}Gk^4ED~ICOK4T+g_Mvu?-jYUHpeGY2%7T4AlZ!Wuh4KBlkFlH=nn z^3G1~c4zdA9Tz8hNGsffCFAmHuE%}`8hYZCP#s`5<60dvj;*bKqaWsvzA1;T*;_=_ z-o_dy0CQt1VvVU3Zak)q$C){hLHDqhKPHCr$+!EvCt320*j$LcHpcDRZkmR4{iO#wtO=OrbL?H)KyKGV)@{dP;SdGdH6Z)8er~B z=~!bXM-JT6{BTXaSZkKJp7@+S>{qCW-uN1E#~MD71G_%#bgogU>4ToRo=`3L0_9A) znzIf+-)7HYg?P-5cTwfTN^vpA2_L=n&Im_mgxlJfL=L;2aeWp?EMK!9;EHOUhpp8= zuvP!C#w^mN&J{Grjv4nb5v<9oo7_>UYW4Nd(AJtUKoWb>@$IGr3V+sucxcwwy`V6EKNrDShRMoqwj@;2pi()ZDD_lqZ(-ik~;+8F~Lw8ef=J+gWj&+`0o4#2G*2;&q=Gdy3byznh z53E&_Sx*f=yMh^uI&iIeZNjJj4dbYMSgQx7)Z~PQa)p3aUutUd(BNW&+_L;P%KPTZ!rXoc&ri{99T zPhO_<%x%?8?pTXE)~e5TL(vM?VTT%7uQ8mttz#*!RpmC=@yf?FHQ+ktir3*|*VA7! z3LDkRH8td#8e*;9a9wjqAKcSkqBM*P970azA zuJ;0~v5s@snT)x_IyA*veZtul z{(3Khz4jK?PsXfcjrSPsL0pH9IG3aw*2;mkdW$nk+_C1|L#faW>(i72=Ti7Y&v8aY zKE2-|R_KPaM>(+8yy5K0bA@a1#hIhHSckvPbAMNwvtTE|D>u%LTnEk! z>1dxOQmp8KeX1C-d|JzzxrsA3*D<@i=FDI%UR;wG*VLM8)`zw9mZti>xn2RSa{Llc}I zii@@Q;9A}2?ab`$2oEOT6^CI2{(Q#>wZmPFfD;C$RgKOsFnss9>J+MZP zr4_HkUp)Y>o^#E4<2vGMe*k|r^u!vo8ac3USvw|CXDZj?YZZ-|1}<&67C-DwY6U#M z*Wy}p!Zr29w5y+3>)iHTFk@lz*=OK-55*e#M;)k4bs&qlU@d*%6*ii6>?WMA*t5Q~ zk}qaG=g^r7JAHP>S~G^V&PrH>o)&55##$O+&&B&We2nqng7;+Ae#12-N32y7_L}Um z*Hkyw^2MHu_b9NniboFYyNIRFoUn;H@Ht<6@!qU_*h6_8`vWTAY$0E)l@EK0a|LzO zf3C>`YjMX~ak!4No!9Ie)~X3>al~5t6ll{ez9>lk1o(yfZ{Uv6c_AD<8bX(LDxtynl!XW?FS)9cR1mh-9bFZP*df zCp+zCKAicg1Lu5RD+kYc?Q<|OmV+10dGZ1i)r7TTan1U$mR4A+H(bZ;@|rqet$mKQ z-nX&VX9?fglLuy({D`v-tg&xY2d*_USZjY^t@9WsgL;OwW&vxRk63HgxsIPR;9Bq1 zI8{O)tW_UojJo^DlW?6qz>Sw{j&=SS8Lo8}`KN?whZ&RZm@(zRjB)n-bQ@=ve5KDDv2fnywQ|S#hEF2Wkn1=TdCgv7t$oBb^I>gI=Yfew`zN2l5o_%OtW^irdcVTjo+DqJ5!gYI8~w{Ua2~K%zSAaO^g#7tZ>uKkPUXN< zt3Iq{gD&SB_<5Ao`linoRWg2g z@)yrePS0MR?CyW@;Lfe%J6|6>-0iO%UH?cpkmFmo?>@S@yYlJ5{_1JIbFbYwJKZtd zvyc7PG@pNY@7|a9kMG?3>hAI9pXBWEi<)ak{5!J){h}tA==U|J?EwucL|rm=#U9=6 z%E9TJ$znTZIIt^MPEWf(A}*Lk+=_`zos)G2`3rYo)sC4h&zJ{sLhjB97|TR)M&=|e=LW`SjY$lzp`Eav z$Aq(*7CZ1MwYxuco^?WXGR^)Ylka8R1xI=Jn7UK_fmTiqG>r^H=UU%~Wvd;wc(yzi zv1-JK8!^q>bLHAyjqcCWXGgju~1oXI?d8HEZpxXtlLWrA}tBKB4rZWV~#B*Y;_JNnVn^@|b zV^FB)8G2-%w6)j8-}rY<#@_zT+Tq31qW0bKHs1p?O3mLD35{WAPuHXIB;Ndoec|L- zw{tmGo?d0G6E$`{Sovz_dK8|h8@goesRhvBDdlp^fnnyBn9yml#f+Dju<3bV1GL8^ z-u;0t81!g)2A{^$Trnfn)|szZuLpkcUw8svW4d+H(>U_6VoOZ7t@Ewv%5&p$;G}9y z+BTj(Gp19TZ}<0U>i$qEW6%PLdQ4zV`=L_N(-qpVqeity&Id}B{6mqBIVYW|=j0in9qpmTTuW|yWJe+IP9ra`^>lrv!jF~Xw+Au>FyQU}o^_u;Gb$|A< z#sr2()UId-nx-|T>r{BcY9r4(c)E7g-@R+?h`7Xb%ygf#GY-(>WbP2W>G6fe8oCX*rBG?ziRAunB<zhdLjceWloG*>!IZuQxU2^BXrC; zC5_>WN7s|+njd{`F|GDIbMDDrmcASADm7bIspWC+mPa$Q!~~-fb7U$0j&0e2RjVDg zPyVQzSqvmTYfUF+;O5N z?%=_P)st!$4qnPXF^fE}3W~@@*24>#1@4)%2G2xv#(PoVaWIJZn z-u+FvC1&e8?L1GZec(lVVOe8%B1^5^Em2R4Nqoz;$E0P}h?&;ffy27n0^PvU%)`PS zHa9Cz$Ga`G>oLuH$&E?eis=*DI4$#qdB>yJIkr)|Gg@su|D1W$f{ER*+n7)2vF6|C zhgaESBJ)P=Zrgh7YBW8GRo#uF^XaqX_!84ICGFg|_02KwTa7vJ!bw=>WZ0-PG#1wP zJ8^rhoXj8ESAVzd7%ba-T8&Vo#w1o_vTLgy*l)#bGvawVPxy8{f#I>nbhy+>q;oRV zvgO)=c%IfHS}y)e-`%e1PHdB>b)#wL**CDo&h=P(&s%D@yr_Grec-5J z>O`#8?z|Zz+cBc)mh~LAJtiwk+}#dn(tTj&Tl+5DzSjK6Qn>@!i0Khlo=rRELG48k zW@+4MyYlpIbX$v%djwzaTeg#$T~Fbe6EQ>$JqA|vaBQP?;@ux8nB|y^nstX4CdqP_ zz7O28RexG{4Csn&#YEiB2}rG-R*RjnS@op%;_v&WaAKc|#+;Z9p6q+o4wGb$8q#;o~!T^)nvf`L!x>E4a!sH*sn>X?alhedfJqg>2>D5cmVGHl6 zN5Xm@pP5|yv|aO?Eo$Ot2DjsGof-{{Xs_K_Lg(U-EGq`vVmDYVF`2P)LPUFZ|AjmI zw$u*0t{t&fJJeWs(sJbqJo|Irt3P$CdPw1NE}=r6szp!78Yg5xH8nTA*O-bB+QFZX z1uGg4Bh(oC$a&fw|JwYGXYP^vZ*X^w*O()6iRlqrOpn-V2WHQO=h9EkLFh3XnJ;=e z$IcViU|RhLr<`XnYCS@SJX`bks2y8#!@KH<%-+vb6ERd78V45KC)Q?Z;9h|^0n>BE zLQmOP@-p)iN3Nk0F@K$U3IqFY=X#{RPg`hGV}g6dpm2|AR(+Pp29{Mj)4j~nu=A)T zH#%gG+;?1ikL*s^dIa3+Sc|DlYfMJXJQ1zeIWUWwbIY>x1dbTp7U+VtddvjeTi@Y1 zG?_gbSm)&4Esy1Zb_-8ex$$&t_0RigvRU?#vjl@$r}uNogG_0@=!+^FPm8zo!*h5| zp~t{VOvkokE_zyKW=cJB%>7dyuisuePgKx35i5Fl-Al~Deu+6MD4Yh-)r4+tTA>qYabZUhC%1rBiP#0HBmn& zR(Zm%^CV_=ou^mS@;laJuC)VMYaf$n6+}&}8>)1y)=oQ%*=l#3n#|gv5F%!t18cd3 z*4lyckvSOx^$Jg5ohPxbr^kaP)!!p_3=y;LBO>Z4JyNE{f92$vi=AcWqs1I}(ay0t zH@Lzqbvxo~OxG^$V_xGmX>3=BSYwV}_P$$3i6#5nJj`ciZt?6vJ*OS8&Yc?1z9(C^j+%5Q#}txrUh|l83wKv@qjtx;KdNkbj5o9|dZH8dSa=$r4S&zn z{DI||?za)seOpbAwSEk&$Ha6jJiVeG6Ly^wFgy}>Ru0|kd;<5l%@#eX{$RRb$g=bZ zzKA>hVZ%qqV6}lEPvS{`Mio!58kIMH$BpY^*J`IfU#l%5c7I@%Cy}b>i0M6TF7+fj<%R)walGkjVCd1idixY zR?936e8bl6EdIu-VeY^3gd&SSYAT#ygGY`@%<|M8dAwdHY@2pw#|Sm1TlO9e+gcO+ z@#y{rmD=41^(;JnLc6W!YB77?{Th8*Vva5qe_*X2&=-|fJ7>BP(|Oi9y+eyXqDDQI z7Y{@&JRKi8YwiZ-wjImdX-AE+o`$oVHNR&u>shDU(XwM(Jb{(Dp})U2f6J2e75h{@UqCsd+` zcn0Qmx}F)!Ju3LJiZ%lipoVMX33Su2|;|JJY@Oo#>js&l2rw-w@9= zT4S2_OHAJhjpx!M#Pr-frgy#N_w21s&%9#JXNl=m)gC*`p2t~rJ7=`$$-TMQqN-H8 z`?mRFg=>$zN=#1mU%0#7nrE{|uyv(!OlZ{ea1x82R2cPCJzcq86Iqs7pu%>e=61`? zm_84&XW;Mro4>~`7!_&nM^K2BR$H&MXbd|%x}EdswL7cYBW035vqGWJBQVR8ViLz| zuVb#q%F{GmVnXMsCvq!)jw!s?PP}p&*b>wGG3G<@ zJa^*>?wIhJpB^0px?*dOsC=)}(58pmHH`;WW}a-uo-H-inU~!LPoLUVPpGu;^u18( zG~SCwm;}_*^>n<&v@CsQJ#*L7S#Ib_y!P0cw^{IsTQS#T;YkLyPE1JV4o^IC%!#!g z5%*lPN5WSBs{63BnyQ|_c`tYh>oFZq8qp4~TQM@$c7y>Dta>asuR5Po}t}F zZtvz+%$^?+@W|YWRh|P|cshf|iP-Aj^>Djo=24AR-gF|`c`{SwPVZhj{Bft-0^Pt$ z-(j=vO-Gl0(zeb#y_ekB*~@&wc7rGS*?F?ZX>Yjirk)ma)pOaKsIkwSpNP<{%_s53 z2^IA`#HyZV<1@F!^xoAvQDyI&V?8(Q)_nv%-n*=mV~anS_S|NLR+};1ezonjL-$_K z=tZ3+#Pcrqm>WF%-tZhg3oRy9sx=KP&$s*cJZ@)q+CCb#wN5Hu>m=55GwK?1^e$%J z^6-)M7*OMGsHwLZ6a0HjQ?Fr5FA)(;H)1-k?hosNSw!@+W8lBU^z0k9s3~)Y-9{e6 z`g@$uhtEUtNA}LsfUbvKJNG_I9(D1D9<5G9?A(F1+ResQ%KKwtDNkULN7!aebS~w0 zTU6d-8esRP#|WVNw8d-V zByQH(pUQ1uX1&H%%(F)cD=`BPJ%u^b!O1bsLXF8RMbBZ2 zJCzfWsa@FFs^&3E^dfpBTl>1iG@gsUE81duKbDvya%KCYqY$`dNr{K}N`Flx7>Wsf;}TxuGa_Lg&UY{~6B z^BE#r@=rCP9{ejjN3{zEyE04eO`UU0WU)u}@B7URT(!-Zekb`${~UAH)a_7x<7o(| zEZY(*Jlzh=svWA?is|rbYs{LzAq^Ys7oLMb@i)vKH4n9$dZ-rP*QrMdpZ>xSRCyA% z+O@t9Td$z`gK76Ky@W6QO^?P?adS-CSDua~jcMb%v&^S_U6V1N!tk}`ff_xJLz^*u z#(GTXRI$?cksG>g#q=&0e^5DwX)|fzgkA|%=&HjqIVq11=O!r@60?m3l zpM|^7UOVxg-_g{!VYTMo=n;Lvs@92Ju;c;O^RSn#M|?+Uw#eQ&5vwuH#`pCS)4A3j zIm@00EE{*&VP@Bu&Y;xp6)pZAVX-ss7Sj+s3Ma=JC*<`6`F;P+JoBQ`X zoVb-IRW5Z}4l>A`h}GO64*s2|7gT2ny!}yQI+XiPJJeHqH1H)qb(&`~^+D8@hZ^;m z?p?K`@+}V(?JtDCNnQ~9--9En67DwIdWBO%wposX)uj)-{I>KbADFjHBEn2nKX9A ziPxCSjk!60|ftMDOvz+u?_A9(x^$nh*_a!EC)jXD^&jM#=E9M4IXIZ!pMk@xk zO;5K4gT&KVqU+V49a`;tW@_KP$~HHn@*b1=)!e{Vo{nN3q92IqaTmvO}sm9RA?)Ldh-pF^kNZ+d#?Gk0RE z9W1Aw$saxG{?NH$(Kq>yuS-nFTTH`xom8XSfxYF{e9m03u6jZvL@hjBxz%pfle&2> z%%?}E!gC)b%(ZKF<{Nu6+ZOFSX44K?QBBsEnCB_A54^=xgs2?|b9D@O)f0$6l6FF} zcEUPOOh=z3#>GJt|sa zjtUCT!Kz_q+4BrNYTvzX*Cf|IP{l;eoTP|3u~s{9-OK*MD@L?q7`j|Ltz6OBqk%z@ zIg7&ZwZ%k5YfRU!*#d*@&;wjhea%B`jY$S(n|KmKoEp=BG;_EQKcKbtfj3S-@yIcW z)jaNPL3>O_?f$f`G0AMjwqnAz)=s2zg4JS&+BbL(O;SJTy>R!bEj)b}Y{c}bY5tBi zPQbeEcI~>SeFWaH>wS0TcB^uNZO=VwtT6}6;!l=UV`7yjFhiEw2m2b6T`!zQH5+#5 zTjPm5Z5BPZ=a0S-v)81Du&tP0SF?qE-dlccl~!!1wD2@*)1~Gg5lS9Rl37m7;^|Rh zPCVrwSn2V^x*ZfUK3PwGZq5MX7m|DR{zy{p>MEwi45^(hvAEYK=+DH74!5t77`QB(f9?CaPZdvu@&RX`7vvN&hZU!b!oJ;VI`4C2x zf@k1nJG4)j)kW?Z<%8OlEW5T;~a!XSn^MsZC8-1TKU8f}` zdS{Q~Z@w`%!{4iHxgE7NC?jzYjsa(^r zEqXfUDz%uF*?iZSP$l(){{b&QT~Q(!SS#Q0icdXaZTT4duEbhY7G+y*v z<_3BBh?vB@o>Ke3k!#M9u&f6#;?<6L$-PZG$JYIX!efXl63(sD=+q$AHPuoYs zZkbQy9{VeCB7@4ESe^y?+H33kz`?)7WTq9{vfGNuxZNM<23GWh%{oht`pLHVL#N8U z%^V}5qTVB9!rkr5Qj5o}$RZtFTF ze|D(Gbj%{O+ASWV=9u8v>l|2~FV_0hROjQ|o1SiKs&t<4&UMlbuTi@&vRkEX5nj_*AK^Gda6g!$K+$NQW6K5ac$ zjfu`{zTM8JD>NP$(WK|ZlTHciF)bi^oN|K;9!pF{shkY!+*xJI!(QZB8qt`o=4XVC zt$Co3!=+F9}8xT zd+lyn=7vbG(@@Ji_mS7`s@shvrdxJfvM-#>4sjO@HP#+^?~PNob*%OnSFq}K$i37) zYAAgN=Fh~*uh%@tvh>I+M^V`y*G*4WUTYs}thJlF-zzbVmDe=)A=m(^wX>SFcH-R@ zOdIzRwdSUG?-3O#eRqvqr@@n*uq0NIVh)ToroK}(uc_Dy&;H<@^`H&W%ro$sAC{=9 z$L!o`ReLmImL4T+&g{h2JkY7kyxW=`Xay&rj-{9bD|#}M>rwO+9(_lAX6bqCM&app zsIi<&!=L;w>gJHSVUb_akP(=MB}j zSsHla>D8d3#A#xyozF++Ic$wn@N{hGBu*1s?Y3hgVzos-N}U!HnAe%|5HsJ#)A6hw z{GC^=omk5~Y?qjhcUv-;eIGVmW7G!*YfRqh{#6fD&HBTR`nVpY_JNn$9Yg*&1MEKd zfPIb$)a_bK*x^pK1={@$D7|zO=3ZkWf0`xQf>p`mTw88u-t|<6i>>9xqvUZ6)+x`x zwqi1FwH^JOv⁣Ce&DCqSDq6R%B?-m;id)9jp6;%?XeSYtA3 z;pCaBo`~0LCx+U;_~PE(<3~4tdH445?fbVr{^X;-IC*h?lHp(8{^HL4kKdlXyg1w+ zUtHRq{r=$m_?tIp&yK%&{r1xC{udAK+&aGV^})kWG(#j?mhB3qiN$F zUWc$8AEsgF`V4=pd5nC}#{vxvYaO_!Iv}(?hi<0mm`mWsdEnOlxmUb7qgIqR>vkJyg|1naVF&-& z{}~G%RBo&l%lxK|$$?OVhplX0lmqwdk?x5vVBr=w_JplvW2<2U683M_1YhOAJ^6r0 z*kPS&pZ<{>1~Kd4A$wz#>M)({I%$>c#2q%`VV#}1UBh9IwLQEJ*E8kdHFH2;ad)i; zcdq4cYzHUc;_i~qyKA-8ajugIkb3Tx(`5!`v&zcKTy28{1h4H|?Z{`-WD=D)tTv9(GpI zz!6kAxTX!(_8fi2S~&SBhvCT|YuN=y*vKEXYJM>Gnh~r`QO}1pWcBeAUEDJU^PXMIdM(RX zLn_%|EgQ2=xP0yGz{Z(cdF>+>*2c`^qCU+l*2;&5DIeFWk8xQac}pwUDZgtna)4~a z3huzgo$FDPHPs%h+t*Qt^~u^9_bkt_v(?109`$*sSj)zGKXYIWd)0w!tA+YttISxD z0~NYfZbB!!hy@4DBGwjS)Q7e7L333%jAVm#w zkF|2cQS*(p$v!l|I%{P*Xiu1n<`wR+Lp~2RZ4g&DVllVZG;(kqIlQiU5BXFFa)^8I z^_dy=T+2SVnr2`)wZgSc@2Jtjj=3}x^i{j9nGd)rH}SA{)d!C3xen$Wn#@>OC^y!M z%k|XC`O-HyfgU)i=e!Od@iMj$$a!I{IMb2aSN8#VPmXP4)5Zy19$m=yJAHh-v5~oO4HsI z81`I;RvtCo6EEQQ^xRyNqi3FY)aon}jyTea_rgDTMSWnaK5;F3_jVrWMx0%yhMfh< z;hh?12G{gQ*DUn#$2xGm*TP2c(_Zzox7=p09H=&(gSKWYS7DxG;H5rUw1g`MD5(yt zVIMhgt+-gK&sZz2X_eN{f#(PKhE4dIq9q5enFFfZMf7>D!BIFl&b>YIp-W~h}2PRV!^Orq1g}w3t%sHT_ z{9WDQ@0E|ZUi+ARt{E3=Q%!*J{ik*Q&|1n)?9jz!l5*;DhO`+p^5HWtll(XDr7(Tg9GAam{4l&;yT zuJKMd<9dCbI?i1;8*#C=|G7Wc%x5~yd=SpE0{w z&sj2G*;{AKK51a~%7J_Gfezt=SYsAEuKbHu;1;D&s-);<8PTCKdf zKVU8#Ib@F53H5X>NJ=+utq-h1GO76`f2`iZ9Zu&8wAzt zVJ&;C&13W*DIy0F7&7mLk8vD1tpDbdYE8J}qHfu|%a?N~L%g6ga=i8JX zxlx}uVQqB{u7;+$vMcPWc@Hy(^BK72&A6f=12=fuU@c$EkZ|})E1QSu579M$SZhW> zP&trTIk;92z)AfhKjkp@;I;aOs#4GAoIon! zSj!#+1Gfw3>OC&A>&UnVRb=NC4;)oRt;*+_hrMSW`M3_P+SX;DV1HOs{y7oVDLY}!ctC<6&aE*H5AUmur&%`bESi9L=n_XHfjwx&QsgB77 zUACOV2gjX!z(cXHrVYBGI&Rgl=AJOCxhKFwS_LN*6nhK&q%91l~hq;IxSl6E0ZLi(?pVr!gls)x)+E~6g7x2b8;#fAbQ61j%X=mAoKl*7~ zdgzX|`r!?kxN>-3r@bkYa`2w~A&>fob>NX39EF=!(;ocD$@`ab`2HMr-nZfJy_r6d z+xO$NM-Iisnz>DLBa*ekF`<5-ZL!vwVJ;Il4b5J4*o|Rteuhh1#!)|T&u$KVv6NPb z8@LHHZ4?*D#L2Mfi#2?egV*%O+MXWEYMB10UHi**82g6n&W3>=)I4h*8X&3oYw zzJZH}_h8^yD=va44i>o%UNaWf)&$Roey9iDn}jPjaOH5F=e{wmY^K$7Xr;M!-R7P$ zE@^EUr#?C}paEi0H$&&Xfp6q+j^mzbXphlzTigHK9}CSW*Q^_BD|Xn}#h7oa-|bZg zt}V~7Gli#*dCweXF>o`-gWPSc+`!Fy;4q4PZoQeYxE{0N_~^N;K~v8!AAVJ zj`vzzizC+&EBK;T;iyl1qi(A?aYfysmBpXDuof4ynQ^gJT&|gqYu1FdJ;z*8e`)1& zlJ1E+X2G0BpFl&su^VZvvj8Nt|GmfNs~ott|3wdV=Aev`!`!Do^i*!F!xwAmiFND8 zyk|Ae9LTNOBOf%eO*Ay|IVqQpX=~3wLAy#h-FtJhcQoa{bJa&rpSS7#f_Q_ zwYbWGwQQn3LoG+(b}@3G1JMuIMGo(X+nM*|i?#IOnsLo@)`7Kha2+{#9X{f2*^<9m z4UWLU(Z4sawd%u+nCgymoqKL~=6Q#8*jbI!$0Cn4QmE%xXI%CO6YsSiHQ7a6S8>KQ z#$ye&6c;KeAJ!Sqb$Ipe?+k|z*Yx3VPj0@{=hxy03y$E_1?J|iEr3g4O|YlSuKJj3w!3?nYrjEl82;99=aLwgX} z)kCc9%6W0ESir;I;!T^#;n@f7QD;BAipdxE6f5fUc~57h&UWIo$J#cF=ioZemDQW> z>GPyd&p6w$4u7m!AMo(STDa9@F{Kq04PID_m(?_LV;%l@|MBl-42&FD)5g1DYvu56 zqi2N4H&)B!t-G3}7wzZZ5~ajj>` zCg)JyW}di$M%jaQt+=?SxTf{My`O0p=z<7C@QKFmM+G2+4|{IQn3 zq0=Tfk#_Be>TunMziU1Gp_O7`E$&<=ZRdFf9x+!CL3LoKYTw}$(|+c9XyrtPhUO}J zV=?WGgKg_nhsPSV`oD(`VOdKb%Qs^|TGavbSO<O77+?m2 z!C){L0G>Z6z&}&Ieg8mF*6%1AmPh2oWS@1lJ|WLhU7ftLPF9YP7vUi7R<6{;`UC*2 zT%@zIMk6!$=O7p0X7nAk_4PpCit@8C%MXrv8KsDtjPlv)!6vYF0!a}}j03cK9DK%$ z(5TOrKhVhU0XLrwA8EC`!W>@@Hnh<~_81O*Rvf}MN3Vdpvh-PT@Hyn=b7*r-3vgCn zem5E+OUQ2cNR!gb?^Y+YHwso5J|l;(hrTNgX4kdnk+e;_r%E+R1-ME$|$00D~`=&z7IhmLEKE-=&>=eogWo~F;MQm3 z0XUBDfmf)N*At-7>jpk+S!9Um0zRWneVc{6{2t(0PQbw=OG6EPeiY_$2YShOgVATr z%V&e5%XVBuGQJSMkTERafhkr&v z9tU#+(xvgu>ckp`?|5-Atg3zIv%w320z)2j6ye~rm4(_leB_4F$7g;w9Kv;1 zu09*D0uFM@@*_JN!SfA0(my*{CklK{dF5mrY4U%UhtHNDSbVvBwt9d++CNlw91M9C z^+BY8MhB+^D2@7Tbt1a(-Ec^~ecnJH>x~Q%uL6xsv;4wcCzpIid9IyAnEW$p;+`QD z9tNC^WEam=CJqkO@^EN1pP%Z(`U>+~odRw&0gIAy_#C%6UXf;eM}7~7{Qf&~M9<7m zT|$EJzvwx9XE`QUgafdHBR+#8y6~NOv|N6-dXtXu-DnbtOmGAmnHKQz*|)jJjn?$I z^*O*8KJZoQXZ6r`gCmVf7rz@mv==@rjPTaUlA!bQ4G8}o@OU;H!c&*VXE!zouPu-8 zGxQ0Hd*GYT8*s4FaUUdqLO&vYrC0doo`ol_EU9<&Y_cwO_5G9`B2Ytb{{j6cY4FyLnNY~P7Eu7|!WEqum* zQkFj3_fQYeha`jHv%!RV5GJk%`D3!A&sKMKZQ~DcxW0nAvC$kqQ4ilgKzsB|_(-mN z9`X$@o+qIX6Q7m~-bCe6TC0==>P`2Haba`oeL zXmhE%lSNh|9h7Au^~U79qsJgPAv} zF4_n%22U0k44*@Lpc8p|(XgIgKI2Vr=(Co^XDbVg@_7kta>ZwW`FiNP)g82({1Bh@ z3F3>sgRa0I>np$)Z~%9LgYSj|HFRYWh2ao%{(HbpURk+(#{UCv0Uz3fX~t9V!Iwq4 z>^tdDUJGwsS^BInI!gq&{&qjyv+4>f7M9%>8I8Y{} zSIY$*_`k^p$+Eih+3Lj3VlXV%VEAk>s0U!I9^|9-5AZB13-r3OgeU%)`L$f(iS=K6 zGwq*f*~_-x;$Mg4c{A0!qO%6B|?cs>K8$AR2}Q}mC%xICd;ehy`^ zww^zDHomd8_FZZo;o!T`BDEJB_)6r_v%&zv!}B@7@LBQkJL$0YfSdr#(nNx!T;Zwj zYryB^jo*>O!yt!yCJOt`=eRwXJ*0)Q_&wwYZLTb-p??nWq&C11x79@aePW z(P!hQw6x$L6-K*08$7?qI3(5Q!CUlf<)TI&H=hHH@k${4ceY4q56O|FS-GL^b31etC(W6(3>npZ{*OlM3UNJ{!E0>f3{9ffjj_vPgN*KcL<9 zZPvo{ht$aRe?EtPjuHWHZ7wb7$5-HRd01N@7kpB4ua}U+eIxpZ?`%oK z$7jPQEesf|hrU~W(irj7mqj6qzXy0Ecs>Vwq%Y`QHb*46a1)Z59DJ}W%yig%BL@04ZgM3D_A<9x>d{de%#(MvKy`y;>m z4w^iSmTC3Ccb|uKBz_s&BYOGW@ac1?pWzc(@OdSJuZPG$lxFm@k$~Om0ahEm;B54a z7pn&z!!y5!dZ2ZD9+n&6D1S1pXruY(01rYUn2?uUK5la~t-}F60im@US!nejh~j4b zkmBkG@Ce3a4~&&^tR8%~vWVRB^V#wf0B@crc#EFn?^bV68p$r&JSvO$6b^0c%jL6` ztIt}lJ_mTCl|_jJ6$YS(g`I^>hAJLeZ5SBRjypAcLalXmzU*)XVB~G0a`sRzzy;9 zykbX^{QPcmsn7PEpADYR;XC1tSICV-Utc6CEub5OS>5>@U?2ikZ;yk87!E$iIP_if zqe>{t@)JyP2pJ9spP66v4WEs6N`U6Y6D2FEhe)sE39@)N;kyJh)=_dJTrrp#b<+Id*FWpqtBrpDA|8Uu~9u(T}$J$x4J|-Q-wG#CNNQzH5HL;PU9ReV5XtEWzwy1j>KMTl9>2WG)jqJ1UPq!lfeoTkPx< z0`k@O-+uf3cfDud{{3(Bl0N#+^6YSUc{C^ zJ?|e53Q7JPc#a0g<=LS4_RIgmtwa| zPV}eK%Mq)=gyLi}zC16c2-O}>X1(#`XfP>$l6Dj(KL7s5 z7rpO)eERi|y)U1Cr43w7c=|O{egSwvA|yg#y$q)U=9ICUp@UcQ7##;$ zI#P%n9VtYPjuawCN5T(N%guFx;`9xd=ZBdLbIQokkx-hYBZbJ(kwPrwM<+)|!jDJ> z8!Eeq8#y8p6ge7_dgN$G02Xp0M?+FVj)sWH@$vCA)RQ@Pi`mI}dGhk*>~u2dABpXm15!_=!+wM^&b%x6mAqjKumisCF}oA%BJlU@S9;S{TC5bIZEdzYG=bUyg! z=OVOKfUXC4mj~#0fNN-BFVUViWrsM3>0@Y<&Th7p4$+kDuEKQlFvL{0I||cr$eJ`` z?|e1^Je6dzMf96z2cUtET|Rc@vBSp>9@&11wV}XHD3I`|6|$mgd}M*B0Uudl2egq7 zt|UIj3RemOY0$j|^9X6X`pA;H`Y0vwQAv{$Ig+)|c*ukTW)zhN*Prw4F*UV3ZGAye zcmPA-C&?qoW!#XA9p*#^X^gfKN|4P~%h7v09QS9%=0*zy3az9ObhS4)>}TM};{=e9 zTMdxPN3x1#b^xk0{QMDYiOevF;d(s`u7^SPATv)Ay}TX(ajCvzoAiW^Oi<6%sAJh+Im!5k7{Rw+{7vw;eXx9-Fj48 zk7@%>i12gcqs@bsoU{G*Mzx)rjB2yN=zQ3p4Ia+YMm-4=P>7CGG(`rO2?SE|dcop-sx0|rNs zdfeK1j~hH=08xiq`!*eJ$y}t`&ny11qNJxy{9_>3{9_=C?xW)Nlx;h5jJ30lu@_4? z#RMjCh>7bgb#qZ;Vs#D9wh*LWdoxtkbZ+tT<>?sSIjY^( zACla?s0UWwy=)1) zWade|Qh=5?i}o}|Nz^b&wHb&{e` zh!^>M{(oHJ}np^D$ z?PCPBc-`Je_q042A!y}}klq2=#ZJ+ZzgxxN`n+{NszhTgFnK!tkX4AIdrVlblfl(l z%+bJmJ{cUA)4^w-3GoQgqGWlo*HZrc_22%h_tp1bf7AO?G{qMM8g_5*lg`8A&$F;U z{MWOu{wg_u{eI`cC+%(>^7#*l%j)*N|8E3l{ry=1$o7LzKK*2WfA8U=_I|s&|LEcF z!-xBye7gTBq6I(Qf3*L}Bb1UekxsnWC(G_ieH`LcH+f=px00wY!)#r@kGS1_@bGx^ zdebB;Wq!(#2rS}1zFu!|u{+=Y$p{+*>a0lhf1MYJ?tedK>|uPU; zeqH>>fBc7id0xT^^sdlo#XnFI{{RQUbcTQz2Gji^;nN?AwPvlb3X}V7e_D(Wri01V;0PIKr^Pq@A>_OGW+H5B|}@i*?wIScG9~eGQ36IDB>Bq{9*x>%HC};jprusSL12&9~#P7=q+w9b{8*P>%-;>(esoEPX|RV#0lr< zNFj1`B(=>36{Qe4I#P%n9VtYPj)c-I9VtYPjuawCM+%XnBcU|cED1o4iUc4>L;{eb zA)zuyLjsVaApywIkN`w9@H--Sm-C02_`%^WQJFrN)+dNVpHa3hh}I{FC!YxxJc~^v zEtK;FQJe#K@&ENN`snqoD{me5)-i7!yTW8JyPTX^B&AwHHFd6fhgt<>TC}HhA~!?c z ze_a#+Qo*Y81yrnu9VeWR*moB%E6PD)W;I4uqGjcAYb#-+D*-XIP}ijSW!Z)slVrhe z=O54{-`jM4I*g*j%{RYjJ#;!gp7l7#mg1zyxTS2-nZrd;bxET{Ad$=`oHr0q8w&CI za+wVtN$1RJ-AXCB-o@J4e753viiim?U`y|y>`${aM_>_W^5sN)2>{ZkySlBVcZw-W zZ#X`Q$c_T$4cBG?Z#g`}QBtd2_nY4&)W6X9Z7X}(Q2(Nsp2!Y?X*qJ63d*yi!8Laj zL{!#X`^Bs*r@^`J^Y~)wIo@YaK}$9jY|tEq9>cQTpxtpQLK?)27lZzxjRd^7k4ST< z6Q2RunNd4nH6K-HKA)81acZ}X`v*c!!okQmO}) zW_E)Y(7xxR{`3v-0d1%F54Sl1oAwg)3M<$<%qTyG;A{DCC!SpnhuGaPN8z4}<_s0n zLVU%XUR}9j!bvS`tUVF;=u`Tb`mI$bl<| z#A>>tI_P67-xtLx_c(-ZOXq4E%osW82)Qt};sx_tdZy~RPBD&kkg>bjI)$J_MRL=E zt#Y$CtK&J=@JPfq3w7=5Mg!FOypJu1Lu@ldbt481@e2I+pMCeuv+th0_@By%g!=j| z9Nf0yc5uj;1o4tR95KUOaoqOF$L&I77o$`A)uw1x(B4w{`lAxQDQ^OS+DZe|XvbF4 zqhfZ7?WG|0sz1EM-pn)ka4;MnzKN;>fF1x)BPt9TzetBGk*$qQCurzs7idgA0Yq;V zUtn~OQ^;uO*q5T7IHpD%V;X;s*?f#tT<@SinUq+`91TXj<6-|~`q^iEj6gu1)PXrk zf*@%mK?o!XuCm&}nD&`&ZnR4>|pRby*zkuI3DesoD3!>{j>2Eo<_r+bEwswN8QeU9+lI> z%PBVm_a>da?!(T*N4vWZrL;)4n#2z~ga;;fPFIReD$R-}YO&r1aGM>6T|BVxRz!A% zK7&f#GIAB?Twl=?&rszjMaoq=7U1&i%1sJ5RP6|v+G)aGx|MoIkj+huT3Wi+T(+>S zIt^&_z?liZo(QmIgR_S(+OO`x0$4OtSWg71F+S^wKsZT3=iVV$@$2xU26ORZ)IMPp9`?WW0WkjfjylL>{X@%D!v=A*<{{1XI_w+ znDOu#%{y%FvG=n5YFn%Ivh!+VTh5}9TE6W1gxyzMc#34h*?%D0+qdfp#A-8`z`~qS z6jD8d-KVs5<0&hwq0mGSRlHNMf+e|V3eha2kr3^96Q70qS;TvYYvX)kk5gxxP&n;N zMMtAe@hJr|r*Y{SqtQ*JgddQ0C`NtkgvV}p+=Yi&*a+6HCnf1@K8O;VVLd5P%k{>S zlIH#vNqBx<(y-^imLvSG12mJ{Ghj*FdeF3G!Ncrn0efNJLP%P?cfrFRyQqC5qE1Vl zzQ@aKHV7Epc7v5LNV6^4co0x@9`*JzdnG&o&(^9kc~7dbw>*;F?oE)WQNXPZNR{@+m;RGXx8yIK>)5rV7k%|EY{`}Yt z3Rb~-%P%Piw5&HqHY98RbAjl$L3GZ~a;4Ctun1Y?(O4uWz5RKT!pTS0g~1Ah83-pJ zp#g=Xa*AAKk+EUEZ>_gLTHaS&WRS14HtdP+rpSwLZE1xqX)t9qp4~){t-J**Lf3xe zSyQYAB~7Hg%`z`pvSn{?hf8YMRwheo1#;Y%uo*{j-F8Czl00*SKQRIC6S@_H+ zkF+g3%fWQHMeQ3LUw6|b?Im~7CVmtfWVp(faofL>9m zZh{2!{oAj<{LW1I+A<5c#+bdC?0XA_t?v^nF6l+ZVbXFflXvX|DvYvnadde&5Q3t1 zY+8AUKwOQ4Gzw2qf=gJT+v&ooKtBxs>_Bx@d#%(DE3IXmz^<55>)+U+opK{o=(-Ux z%hRjbRl{ogjt2cRuHzCRG^)e{Sagpt=n%)J;@DUoVHo=|`(A+xv#~Z6*k<$uJ+zSXYzLwD)@( zap1AKOC!$BVyJEqLbcqfto4D7W^=>HCjR;rhBna)qSE%LMvtknQ~*`aA$p|}HVMUA z8~hfHa?PFz!Rbdh)~hzu1ri9X9yfUshnqZ!zfGR#ZsS-JWnxGQ(pv75>$EfuZ!0P# z=y)CGgaSUUJkI%?ByY)p6jv{;_pK2S7?|@Xhg84Np0YRNH?`5)0q$^~qD0TvxsuQH ztuQDaiP;x86q}XjWSItDb#z`*T{jD>SJ-YOhOMl7x0UO8g{@~h&r#WCP%N4VqoOyT zBrM}INv0Z6=&Ll3Ot{mjMs~iPf@GaDDM%I($%P9`{WrqFXx0+w0pFE-_c$B}7eUnT z!n|fZ${a0y_BSq(kUa+KNTA;!dkE0UGy!W#I4>+>o?ng&9j35V;mCqr?%vS@r32Gg z!fgVR8M13|?`Co0j@V`z?#n>a!ax(4ElHD%?t?qt+De12WDeN~5l>C{k=0Nw<5ZFg zj9y+Likr7lU__ghh+xDxA(3$^K#)T0G@YEgrAmtldTo?_UcAO^2w3=lK@8(bjGj2~ zRopzU-MHk4hy--`36Rq46)s+XcVijqbj#zIGA%cTHL|m5?1I#xMRAjelT{u$x7kB+LG@pBz|&O z6_ok*jn8H_4y}omMajttmiMYQ3Dn#1<=LTJXv1P*vK(IWa7L&%T#3V5Y~T}^6$gXa z+ri+h;DQ-~WiR%6?Ls2G#edRyq$BARH~b6{*8xrWj)z}8|F3WH;br-1gC}5>Llk<% z>vIrr`}TA&8A!nR#a?l72@3)@;0#WnR9o!=g#7SVpxD`yjBf{ev=-rU!2rX~-wprI z6vSK&kO^ej)jYQ0pCJ8n5Py#OJSwA;_2d5GY&;<;QqJLcf^!7U$7e^ljt6)5Fq*wY z^e%#c4@!(dr{yt-YT**8&Q9kM|Ls1MKYQ&@A8mY8o*`a%;7kbNMem|_);k){dhg8O z)W&s*tXAm;e5gBR9fHQWuv7 z9U`^;z&2GRGecTcSX!JBKZoVYBrqgzxZ*KR%VibAovw|`Uh1fk#0~hk*oQZ8>x;w} z2s&gM25%Eu9_7IKwpp|;rg+&YFd4gt`?+wv**6=a7Aep6+p;W}yw$UOuUGF?6aJIvBlWNakI^ zcBc|aXc%FuC^*EXC|HTDAO6a2fU;4EeP0RSm{$oMkSiEuL{2fV5($RRBdrAM;49HQ zes><-{s8{4cY6bPqhjg&-W1DkU;tlAMT&o;d_G^{n&Q#GU!xDDP`Q>aTJ5Nss zBYG7%5p|2Zhy6>0>GiSm;pD8um9KQP<;1mGo1A|#dQvESW!%_$7lF}8TznJZqYp$ zkPyAT_h1Mw!aJW|cnBW`Jp!l>YlPzAuHg0n-2Myd!$IF<58I2U84gB>C=tpZ_+!z- znJ(rbU_7+W?$6)^;4J@adg315KKV$3YT?f4T@4P!pMX=@dn~;nicH{Pon;q@V&9U0 zL`yEtKcH;aIZtH`$q5nvfJ`|F2M2p#VJ@ttG=W5Tnn0qZod%Hj467!P#icB!Hcu=K zMa4SFBZ&ZZ14km~J-N0oK9?!(T^=0q7=+6~v=S9nO3YGS?$~w*;qlZJ_bfpN<_gkA z6~Q@x0m}y25pS8Bq=xi~#1qRMtvDo#*rGHEMwYbd^{fzPlq*XwyxX;s@wlAmK}LXS z0mCNcm$Ne-F0;W`oG%jP5jNqbQ-GJFk8zJ{vRLF&dHhv5!m2M*tC@L?>0rjX1l&M# z0HiGxyff);Y_`r0G^|xW?(R%>!$(=7G7~=Wi^ZK1q}SsnQUJf5(~+c1O-j_B_-KhUm{`Z4&r@3Lp=h{{p^-jU>MFi&KV zFGH8dMxU@=){lk~1pY&R>-ntCt1QfZAk80~jl@y%s@ek{<8mMmTuyt-$;D3s%o_O0 zbG_e%4#;;~n5%d#={ZaaAh38C=u$j@&l*+XUj$u;wG{%g-6^j{$g(>X#~o@m=6bPT+VTWPG^AlO#l3rxK0yam4lp!65-ZZq)f4TM?2~9G z>8tFjK_5wevLAHBwbr_$a+zp7j`E|c9@kQM_e#S6KQ~M~=D_L$yyVu+5>O0w4rVgI zH-SWBW)ny>Y6cMMdqZ>(TM|tmSwv|mSw!AsVQhSCNd!bGNET5Fl0}rzl1G$+WD%tx zSwsmfkyX=Fpp%M!h|8)4&#Xo3N{KF@C*aqh&yW8$8wQaGJ zwxyQaY6CSL%p3Z9XWid}$R(nm-=dO&EbbloZkTkSPPQASAX!8SEqO#KNET5Fl0}rz z@|)@JD|h!5dpnoCBXCp+?AB5$Y-Y_c7;;a6-?So%`85d+o6|j<^eE4A7Ss@cHiPX2Ahx7RGtd zf0B~KFrZBUryO@Xh*hher6>uWcP9CXZ4Kb9<|Tkkc5B{XxY7L@0))3(z{kR~`*z2FxQj)QRasg+5T+fS`DONCi=ofU=Gj#icq0+4VMO1A9hI1SE=H2eh z$NZZ~9T2ikDE=ar>4JbYELQg3D%c%g67Z`4$=bjQO9Hv8*eiAfj{=*wbb#)(_AP`t z^m?wb_AShCRSlN`b!)Z1ttPVa!gciXjCTpBx#dNH|2!Jt;*0?-wr?;;z<wp%AegX>*9s`3a4z49}TzXgX~>jZS*E>3dSeKo!WwI3}{SxTTn+2_kj$W;-8@ zgm>=f!MnyG@m(em^DO0h?2+r7IIM!Y2nD*RBa38`Q%bN}4_mI&-H$D}CL=U(3&w3F zn1XFdwoDipHeqlT0D`?H$=FMeWZ6pJsf9@pqH-P|KShQfUlViJ)K7dyhYCYdcZ<0= z=*XT##77-q*Megv3*}F9&;S*)JJDvI9}EH#*@F$tco%SwPcT+@Yr#M)REk4tOG@A1 z0W`8_(7oc!kD580BIbs%+<7xBI0{W6t~5L)Zce81=L1pX&_%Y8e~>dH-Qg)|qXh$9 zpXx_{)-E`cBZPRIWV(%2D$O&5TreR?@|>+qckZU7aSlU=NTyx|>LnemN!VCh`_+Y5 zPMVp+vI0St8Oc~(T;%|oW`XtX!DBWd+c-h9z;ysyTR3I2Cd@e3T+X!wSKMP&bTTAo zPWQQyFndbbC-)G*iN1*4Epf+`3Ao{__riMbWBc4rC*c;uIrB4Tpe>l5IjE+m8&&~P3X(;Xf@BdTwB!+` zAX!8yNET5-OJu<{RVY|+5d`s2{O?j=myKcRG%K0h$Sh0ZnYZNtD;biv`7c&_=yS!t z6es@6FR-Ne9nHai1qM@Ie3vgU5ad~46l9x$ZUy}P&MwvAA44xXcr+cu;RxU06bnqp zngca)!h8?^t-G@>U?!5iUfBKpFYMmZd`-LBZ+5;WdlhmkJ(_U4vo*)7yD?jHHLp+B z;+i*^&lyW{b>nxrxf;m{Q#F07+lrNWsZJlw*sSa%xQjD2)~jKn=8~42r`-jWi6ihe(9CnZOeF}*+@9V z0`1o{TGLw_xuNULwv0oT4bETUimWUT%<;GSY>R!ZbI0-9ZW2$|EqYdO+RTgmsC22g z(Rz_HH3D6`VAd`eIsP5%fLU)S##I@t#Ou8fBI~&I!Mnc!k80F4)aJcQZq?=}7f1G- z(`}* zVMch7Y<|xJL{@qQfH5xfqFe%&+l+ESU-@P?IXcHU@|%cq5wp9|9(RWHZN<1aUfm6^ zshZggJh=HJ!Xz*^neb`xK{pj6b~(SPlpY+X?zQg2S$j@-Rt_Ru?x39IU8gEH>j)Rz z2D^@MaTD<6W@gJx%GUF-^?dC2Hy@Kh)Gj;5z4k|gYy8{un+uxu5!e!h%xgaAObjRR zeL01D5GSNEK2wfQl|Z`hW-KWre`s#0MA!R|7LDTjG?}nBg1&Bq)Umh_Dt}!5al58MITsmT| zE_C2?SH!|-6SaKP2JQDo4|o-B)sKxGEDwyJ{)O-_wiE?8`{=OS`zUcolRLjXCWJ+S(OlxCTvA ztVO|uKM$bw?jOOH&|}VdCnk8ylhoxdwZj=W&z_}q7~>wfa(pIu;~GLz29tAW&QLi_ zTjScR6hwIdtm&Fp)7c?HZHxcl&i6KgW>f;Dmy6?0NbomTJXgmXZ{0 z+5#8*NngQbYY#fdz%?A7NS8rJ3ru!VY&!qj^i7YN;JGE#vdUVfR(I2j0if_)htfXy z%?9|4s&Sr2qd)t3aSzQr-sWz`4T!cD0NOLS+>QNkF>6f(045LEIY~|7*QaWh;n- zVZn*oEow5w;g03+9nP+MP%k)+UGoVk_97)ap>v8Wq43OSPY48&-}e6gtoKchPSI^n zEvE{j=cQCt$6{hd07#@Km}=&@k1fBhI=I}i-2|1T`=MAcHqUNjb>I{3jfhC32GrJb0xXE+fNhQpj@P(M zScZthK{;F%k*j)w2y$)LScE$qUOdT7lB8O5?(hZ^qz&g@(tviv1ZxJVpji_ZI10B?1Zv(ur2B`B zaBc_@!K08hm()O0imrjCluvnV2}}b;2}}b)2}}b$Dbaa)5|{>h5|{>h5|{>h$a&I@ zT}NlVb83`qgCt<=y-R?3mD^OJ_5_^aZr!%A&;&BFg-BK`H+VJBFzhcZwLhDknZdP) z)}Ypwf)u^vA=w~ukQh(6P&+O-rIO<<0?o{W0K@Z;C3DMr4V<@v7z#Z`14DI5ZBPH# z$)r3At}kJrm)(*VdC1;;Sf^@2Q~-DnhyvKX3Kak!eu!ISovEa);s-uobY(oDWVzu0 zyTJUYi&4&tNIbAaqnP`&(a8)-z+kQNX%3ODdHONM5MU?3r|Gaktab~ zt!dom*K~EE0@7uTD*jD?*s}@5j_o!Xnoog@;Aw0F#BA57!mJU}ljtArGrj>)WaU9#?4Te@? zjODLQSsTX~r^DPJ7&W4UbQgxibb6Szaf~jlMujt@*Tyj}*M!@1GU)61aj#wlIdZo2 z+Bi;ZoV9UmY6mMQYvZ{8A&uihn#sSmaV$u0&p6g;;0~vZjW)8_Vzlj7H?`JXN;$NKqi7keQxU%_t&~nG(c=z#R8H=B6xXYihe1er&`jvaQQjj^C zbHl#Q=SBi^dtY{uJJc#5!d@&!{Mq%>RSMysu6}Z8LEr$F6!&L?MHo7dR>i>B@h8jG zfz_Q?2VqBAhrqg}>L3{A)S&@`uY+qwSrBNIqwe8&*qd!MOpeqzL)d(~Ja19LPJJ)xAzQ}%%dyLM-bHI&DzR&E zbuQt1#adh^ZbjhN64xZc$aGU#^0ozAdEhE4Hxp@(>GH_dqc);5&a3yEh+n@fPkmmv zy^xn-Wtvysj~Ll;p8EU^v7X7>g9a|X@CI8;Gs9*;TnoFn6eCX{24%}6GW)UniZK{a zlf_n4oWM(QEp}$f$p=Cwr0Pfi4e8E~bmIoyo99Ao;*G}3bY`HkBmkZ~F51Pv{#B4Y zIP&+|U-X50U)=Y%^8Fw2ngPpx8F)f}E`xH^16%XHs(ZM?N*Gye4pDYeWsY5*Y4*j1 zT+$TI;Jh{(SQvcSsKRFP3_Qi-TemRvGvmj+;>+1ZZg{R36gFs^8KLjX*+EufYCtrU zXf{LUM8eB+an;b)QC)0ifpWBVwv8q0%%N&Eq!xqu$NW9eP04NyY)W?P%1z16(;1Sp zKF+4(8tycA4R@Mc!|fL^qYdK$b&^`coTSzeC#f~ODOsvP1KuRHhBryA;Z0I&cp(So z_?sR(V<+HEQfqjV)EeF-)!`*AD;;y=GOS|nXqJVx4h9cVt5R9tHeuQN!9P`1Rnq;c z^#h#aj4D>wMg5}n0~|_{1Hg(NoI|@i)8pN20XQ%o>0PAf_xQGnq3j^thM^F_EOF-K z6_D4~d@&%T3%-?_;rMv*T0smP<*c657Yb{$5a`l&?|e2v23yHY0K%C%Wjm~5^5}cF zjM4E%iHb6m7)=CYgp%Z$^m5-Y2Q%pPXbm%3#Ee!k!5I=_gB%NSrR%bz)y}VB z%J`7cz32U-wchooohEt)hw}#EmnD}pxvqzx8Nzr|&FuRpwQ^k##d&Lple{&&DOpv# zNooyml3K%?q^|X!YhEY>s+qaEFRvF>@1;g;qID#2;x^P0G=nGwjRxSOS2Lz5;&_S# zU`ah>%1110*$snpu8Vv3ps_Skrz;$23IrSEx|OY77#3cOV_{wzQN|Uj~Xnk7~JQZ*0KqIlMe4s`e!GDx*Au~mbH(o zdc)aNJcM1srRSy(&NZ0|-%W0JP7T7m9$BeCX^Yx}? zCg%RTej%5heP(HWr}mRAlZk4!+vnQL1cH7);9X(VcB|QY*XR=xm@J z%bHb0LHMkpWp!x~Q;e1v1IN6oL#dp|_Mlk6B5Z+h2Cf&F*GQ4sKF!zHP<;Zo$WCHt)!8E$qMtj(`wPzf@|768yG+DFJc>AmTF=?pf< zs>yUIAVj6R9>>xmKRN9~+i-yh%)wFbsz1cuJuF{EP^~sr!vdm}bv2Q?Ov_DX*=@`x z_4wpuH0%jm$CIPMq>z8JTTqPs@CWru^9d1oW+Er&YKA%`^RB>@rdB}r4kmkJGt{6K zw*y*!dY2l)xtX!`Def(z6u|)q=8$^8c_~!^@H@|0z<1~1b%P_jFEjAwH1dqx*g#*J zT~5xLOZxnfrNiLspKQ4<WW6biY^eJRr3zmr+Fw z;5#-hCikxS92^wh$Y23o{De1IK_yp!Dc@vyc}-~Y_x}~IgDRF}Mz~v+1b3TiT<6@Xvq#_HVuC z-~afc_x+Dgzy7iJ<@2wKKQP z|KrQQ`^u9rkjY?rIh+-T zip6j?J?+W1AK3wj1O6IgbL`yNi$HwBB};#^ApL4V`lvsx7opu|5tqTs;9PZcSLVzZ z)w1s8oSE}LoJTVSN#@A;UAZYp7Eub4Ma1FW*XYJqOQfY_5hb+b5v3rDYt&hcsGDu0 z1W|V}qV8fuyIDjjUxHXRh)b3d2!=`}dNz+;$(9meIO{$r`_n~Wk)DaDiOR`Vh@6u- z9;CZ>ikSW$Cj1fE(P#y_Ip6n_kU)?W;?Da`=7vcGRz70O%@1t1?g}AW3u3k+@}{Sp z#4kRpu9jtr6K|kq{c&~;n~7=6V?f5l|1*D9PJeP;UOgC3PIk_Zj(57d?N1)G_aA-Q zd4N|xm_4@6`ZOF<^8*a^{qXt~UU$1s3Y5?0kpt?s3!J`#3C6U*UW|CgjssfyhZy%u zjFz>;++bGM<_&Rcrm8OjkJhSOZ`|~@Ug&m-KC%_Rc>_XLAEnAB+kI1zsNV)!Qjjd7 zgxs|%KZA=MCRXC08RU5z)a!3rl`rAZbN-sx^Y`zf-IHia9ZSxpbaB)3{_bWNQcxqf^MVY0!SfE?Z<(@K$vs!g*n$%K zi3Ze#ER$!F7$7A6NU$RLwKtzCgHOl9qp8g9l?^<^KpMMR+MXbi+o3W^ixExxqXE}5 z`Um5yL2&?=0@_b%RzqjAtm(8?FP9Xf>u!o&g+@jtJ65mh;iNx1JO!d-te?m!XP9MT z&4nju@ldU!vVSr@!-0CE{%lfSs|>LFl@lc&7kTn$uFTy?6pP7f_mrwU?0~(0HnNvq%piQ-aALx!r`hqY` zoVVJy=#UjY=!)JCwwgCRNnA7W!3)AmHtU77YGQy{8W^2Mp6RmgMsWdVw^}7s$X4-s zgK=JbDlbCp*@Z@=@z_Wqmhu-7Tsa2xDY`_-^~dj$=w@c3tH~kKFpcCKQ$p}Jz1VyC zdK3K%RM0;ms$ma=@gEwp2S+lZGyHJf=B%~h>s|72If&X@w%td`Iv&TUF<*M37)6Umz%+z zh)D7LFI7q?CF0qCTtGs$wqD1CG&c_QXG7ymr{wxK#`QE1CE-z|HJ0;K3h|+lwCg{A z`2$<`r{ZP1c<|ss@xy(D=A%+zOYw7PD+T^G4}AZdzrE;v`{n=L=`xFNQS`gMoZVgg zRW!tZ!?}pVA*r{(*Ykk~j1~IXpvzGf(QWRLZ`ZU$)RLWnVRwEtE^ySCz6wjZ*pA1Xr|i+DR~H9w!RjH%DDfk1irL zO*T}zOminEa43&-wNO}zMH&p=&{&(8qrZsOXk6cBHt_#^3@SM{AInfe2c@SnQQKk> z;H*!c^_uE!O@TURX3^9P4c_%pBgCYgMqIVX&(j@KiT(0#<)2EAm}=R)Nn9`lc&trW zpo)NZ3USj9nldUh04Z*FmKS%zME)aqmPc(IIyH*6poE-Y4PFpk!s6>WPf^(pGIuvy zJn}dymK%92wwuNAXO-viStN$XQ=OZ_B|zFRIArs@fIUOE$)Nb_f2+g9vj|N$fCnYC zJPe+*qaloh1DFtRrnK;X;2JZ`BA6s!aiQdLbf!zg#3u5^3YIv4O9v4=@Q5*-fG!^~ zVtz#%H_v*_O12~079iC`@B|Wv46lc>QjSuqQx!kG^y7iA20w5vwYeXidnbfSAAi&&+AUn*`h_Jtn#=Qa5*?uEVBalXFb~Coj&z&yulkoIQ zrE8!@mpp=>OZI~qPKmbR<@gfwR*W(CpMCdDQb{+`WB7*nG4^Il_S=`QE7QIl zy6 z-Ud{0)vw?)q1S}jrXgDv-E+mjKbwjUJ(2X{_Kw!ov`KqHE@18b!1PUdF1^n6)1+}omx^1|GjsHz)o10| zC88?kWBVS&(%IF*asQj&C%``?Z!9K^uF{jl&>E!}QZTh4Mv!U_!Cx&NeXz898=L=~u%phLi1gLW*Q||s1?;IY>E~$aXR|S{ zNNQ?hcE!fLqKyeR1Z=n~+L+xtX=9RnndgGQ)XG?Fu%$8=MDwaHngr!9h>VKpl@n2# zPK%{f%ZN)c!HbPqlR=bCTee9KSy(EI^)Dek(if^};i{w~>TH1HsPwiM1kYa{+J*s* z*K>|yDm-6v_@XBRSO%i5(LdD}9<*^J&kKj-pe~z2c7r|_JCvhJy>$YR*oO5%I_86N zNjfDsA(hs;yzNiUr^R#tw+?h5ob&GlD5{Svs9}nXznCpgaWwbVRys*mOW`>>;ZIH8 zLrH6J7T(O8$r_<56{mnqWTptSfI~nAtD3ie8@0d0(Qzkm7kMQmBLzcB7)6~{j(8$& zET?F_-1Ky2<)k}uP$eCeIP4ddU^<))c8mTK*HOU_%*Cb>Jew2h& z$=)5-AmY@*en9xx-g-{IOpt0O^H<%+$!|$EF1?0qvPKF0=nvp0y#3;Q-{#TfCPPWK zJi6Ms;6(W?D5HJ*EhhfoCjP-3K_i?Lqt+%!gO!>YhGRlTIO)fM^N1gUdwH?q*(aa3 zVUf~h^}K_Js+W$Cixm=~0*h5C)iqKIrCKRNR0>8*@HId2r`Dj`IH{k)zkvYF0}Gq$ z(VC~?&t$I9lk_d&4phhYJ@VVxb8Q%Oc;B-34&OUTmbw+=`;%XN#E#NHus#K#FxXBr zRm{TWnkxY6aEw#DyvbEBs+8&o&_}6LTK(;s(ot@|6gI7ktlX5Z~ zANWOn;pjt3AqTHt~vIyc$kS`Zi{l{HtkN30*I`+~}-4F?te$^_IcEH|&869@V7 z607K#9t<#B7|6Wgf>Q=tT8GPe0R4f8ADHP>Pr6MPE9l9p_|EfG$bx`byFjSs?yjf@ ziS|w(?fs#MPC?Cp_`MeOw8xM37VYS$XX>5qx8Dcne)}dkKYjE*I6r-K6P%q+a>8te z74vwD+&4nn-H#8QT@`6}A4qq0R+gjg!{n^n711I$(0b`@3a5tB#=4s9iuhH-sjmiy zJ=}D@4yg`1Qre3Hkk`9o@+U{FcgOg^Y|H>*l=4nJ%zN(U?wEB*^<;c`-n3!nj)qjb zX{6eB8m8GCQ2iSTjuuN;yNkujM1exEtwWFy?PcpwnE)YW%beh#*e+f^Fi3OxpdhY= zy-!e(RWb<(auY3?KEX-}x7X;FHXb8^*&%J{Tv{BwOxxW5ZW5F|FDjdt+%1a#UO6nA z2%F-vQ-rq^m`(P?Y_vHY$f!l5TUuQ21~QfpN9VH4p0w$thS1VtgqZHrE4%rWr(||E z5!OeaZzdp}K2GPuR4#U*aMt+pQ!h>PX`isLvtRu|5E4wZz z_8GZn9=8_`5=saQF=C4SBw7*a+WX^f@7=AC)0JYh>8c!^$DT?xTeMyrw&#Rti>6-3 z9kY4Bid@|Bf1XeJC!>CGc{ah>dcKvcK_fA##Bo4Q=7k}qzAfjZHbnS{j z+j(uC5BrEhi}#iZEnZN0(iSlQLa%YK@G5C=rg1}3WBJue(P9`ID#_`_^_?RS;8DqT!ZONPq9gIIOrXX-=1kab>~rL zEs6Y|pRL%xS+>2KW$WH7TXs#Gl>>jZYvbQ#1F~sliitJ%C_7_a8tdIsEw6Fne?)AT z>X!`;4G7KN;Zn@u;j0ae2ge>34nk2kg zDHu3b5$B*o&CvQ{zrsnyixJ?yty3F=xOm-cpe_R!M4lPFAQ22$q|NTOh)cxDCK`|+ zLADu*ih&VbtH2Uk3*cJc;sV}tge;)xv~M+r(n8v3|GZBaEmjK8*~pBYtxkLp*ekLpn{r;X}!gE<#e zh&+3r_GA8eZCGm{X5h~^Bu2RNt7z0lj3-8I@fN*g4LO?68N_qt$UvScCl~Fc)a2=r zu{~3c9hzZz*b7?Z>7PcZsSIN7Bta7hwK5t}4JLe8z_$%2s!MM_p?I zj0h;*!EqoHdbQ;bjzLX!PLj#u1cdi#QguDW1B!(YI<8iGNCN6&_$GSNMJ+ZYhP5z3 zS8TPzB%o=rwUEFB7Il|_H5(cjLoNslOhM;|1jev3y3P#^oG;$wM}vsFq^g+nbJRKc z%7JyoD7;91mXds;;ctEqP#8d2PtXS!IVmsDx)a3oaDPuF*f6M~sCp}R8j|4h-;GeL zsa#@FWDFvXS~qIK$8P!)7DNFbiKuapv3}&2u=NvwsurV6UgFo^zsMx z`4A#G2X=`tmMksFQG35yq)V2Rq(Y0il0N{?;;P7$C3uo+!GtAQIueL19SKB42fwd$ za}Z_DWjdF$KUKjdN8Lp_w|?`*GS@2-iK;zgpI-izI~guO@JyI+X+o|H4$F2roYSBw znlk{%#e)N0If(F9{=;zevi7b?*;3>x$TO@Fv|MN^OL6n?6d&&Ff*b7&`vyuM$11>@ zS#abtvMQO3EJ>natrTEZBeH^+G53q|voIq;rb})>3<{@ujgO|l+3y{{IIE3o=ul4N zBKPv`@b z#bj;;UTpN{1M7W1l(8{qeJ2oE>pOwS5|Thf)^}5Fa&#mXe3p&`B1=aC5z*lcdbx0H z#RIJM_Oct?PJpwa?J5M^Wj2_^mw&x_WyW09YTczJD0G^uy9%+M1q`@tc|8lL1gu0l zG@1t3XmuO&0KcMy$IFKr1XOFP1JM5 z+XjfN@=eOfD&GVmOGl!fvved7SvnGkEFB3%LE z%Nb+@!<5d@dS`}#i~U}^VOwlegGfgt<3J%-(XGdU^*F#Spg5gu;e=v64v4E(Ot^GL zu^tEVmN@0$Qj^0>Mr=(Gebh0{!2QDHkG!;Slz{<|L{=pQ-rsG6Vd=FuewkU$LS0uolN3ghxzTVo3Yr`7FRpG&mT#cxn zU+3`R;)oU>w~J$7Ze7I(z(%1IafwGLQF3FEtA!nDkBd(>U?VUPnaKHz!Ars);naKX ztlc?6;IGs*)Xjv2YbPpMuO3LAYM=v*)J**soI!KV1tev_W_c79Njh_=FWgbJ*rI)Q z)Vj9CJAy~y3{5Ngn8sb~t;cUWjUbtDaRz`J_QTk%6k~IgQd>EcI2&jg5BD6onRTbQ?=DSbfic9Y1_4;HtU&-!hB7Z} z60}`iairZVF%GQmRg$kItQc05bR68Y<#TwGCuHG(w%?l-*Cl)9x6i(7bslbG_r84H zfJK4a`wt!-$EH>;H*wZ;GhoJYC&(+djBrhD8I%KB(iU!p{uiOetcz|CS=#dHtUVWc zL2hc+EcVyt!)nq}qLE%YR3^wXH{17qO)hk1DM=u*CRhTIr6VOZM@IsYr6YmJ(vd(! zbnyGygY8>s?YU_5TRircj*I zw{*^?X|{;k?`08L?gV*`k1JS2;JQM;k45xw?F(A_g18P7t=9$HEwN_S7SYRfU>Bl; zf8FgJ@C89fWqm>TKG!0W^k3K_B5m_5BBb9Pi^#cD@@qLZx(E9RL;6y_AmPD#7)N)( z7u0za46gTa27wpeUh;#zO#ZrXxm@tp> z5_KM}U=*d<=_c~oJ)SyT_or78rxSyn75ZEHDP|l6y@2OkzMpCn`DXPrOu?=7;BGaH2y@qI{&*H#Gm8 zI}GLU>5s(~!f(Wl#1YPZbdA9y1_v6S98mu{7;H;>$abS+S>DgeTj&)7S8`sll}*9U zvZmmByURfFP26QIXXm(;L-BNlrePZ@ryZCq-oY3Oxo8TyTwcDl$7FYct z!tvx17k{z=UD$A7qnq}J2zS}Qd0w;2$(c=dG#LkXG5PF%Cjj6@rG?l=L(Y$ISKlT6 z`joe)$Ipww5Qn}AKE6s8 zwUQm-*YFvWSYX8|Rq)Kq^DBI%3gTC)s>{zmg7Ogo#~tvKz$?dH<^CG`WU5A+1xxohH|Cr^z+kX>tvBLRt-Vl3K%@ zq}C88sWrR_Sv9;#Y7K9aTEm;9*6@-l>;U7=+oN)%($5?#MW(t8=PMkssacjeTuqd< zNWJHv99by`>pb|UYN<-PKUU|#yKFkur{;&xP!L#G8^7F>nK)by?*Eu2Q$>tlf7o{$M!GJB3B z%_Jo0ag)Wpd-TQY?R)al)t9ck?CQ&IYvCQ|9Qz}`MZNzoE_;e8pu+{GO&IGB^?Uy>Al>gp#Wg`ePJ zB9xKBPjE}|Q$k9x4qPKS1{Cii)_A61$N<)*j|ye|H(Ai8b5_epCkG(taMv4M4tt}k z-bJ_fS01m^D@W%|J{UVVAq;vfJ}X)PacUz1;iK*JsT{wcN%F7t4D?l;E*qlcn?NEw zO(0H3Mag`O5t-JX2eP=7#nk4BrE*ftlRT0Lz-d&1M<)rhGP=^QcSQEo-Q2P;LGsv&m^SzRTG5Qfgl4JNip1u%YB32Kv#L=G>O+?g z+PH%9xSYU+JQ=?QObZw`i|=~RpMCrE&p&?uUw=o>aisF|)8{X$B$Wp)Yba(DwMei? zTa+ED{Bra$wu?A-uM+Q4c@kSW+R@aAL`fbWA5RA}*1m;ub|&2o-Ydp4iLp3GmD1VW zne2v-Oreus%1OGLJ2C)$7Zt7RuQ+)_Y*p$Gt>aeV$WR_6#j3WE_TNm@P^_UfS&F5K zo6uj=p~5alBp4?a56n>4^4<+!?ppYwet{W&{8Ad?)fYd(qx|!BfvY%r_?&Q@y<<8X zoP%N0@}Gn8aVu180~N0{5W!0_LqvCH|8u#*q5a^&gWael1ZrkbA4gE1RG}^aRnB;q zDu1{RqFsz9MW+fj5wMRVupG)Mpj!Kz06Qf9$5|t+cqkg6 zvGYD(4?Ra zsF#b)jq8`?!K(+;!~Sr<+rl6MY`ikM3@hn_)D*b;ehv)G@A@F#kaL;QnWT79y_hc7d=zMBalp7klQQzhlkYO^Mxse zr!rjv9=%SM1nmw>6+pS@wOf@zA|NFdkwcwZh~+DHARUa`Yqwz~2R< zovF&9_F+TLZj$q1Ehnm3%^5-*$!XP%N4zK~a{}fwEpD?jV{%YxL>U!y1 znAXyrgoe_wI=g)7Y}9(`n5MYW(Oi^&Upg7HeCcfXdg+)Kxze$`oRt0uv}03PGi#36 zRIE2fBJwq5Fe2Dtx-m%_lDs$Wgd}{?TvoM^1adXW=xY0!u{8`8DC`TE8S(l(Q=!&f zN-8+eotYfbIDvs1093`n{B_9-^y}6Z1TxlW&Mbnk`&%Zx+(xQ|GxFx!NilbPDP~%* z(rBcCwZYtlA;+$O^NY+7GX_Nwyf0s#j$vh=^+$u&AM*3aKWw~umGh`prkf3`{lDN$ zQ^w_HS1W7V1QJc&nn0rITL9ruUh}J_h*FR&q7)>H$WLiwCu)i)1<4{xL9&PvTH?8L zL#54Qo6?e&l0}rz5>2lIo&+LaAL1eAdqq6Gj*5r?qUkliuH=>^4lPRhpdH}t1Dbm5 zyC*m`3#aNY?t5?gE~_9FFtTC$$!ym z{8yg!ahvQZCcF54IUJtPCfm+Ipc5&q8lJ%63T*=&mh&L(u02SUfb?fwpWZAmJ2wEv zAd!ei<-`z?h({16!9^nCd4Nd7BiUjr z=D1rvBJ@reB8@&GN%lS>Zx6s*4f9|bPZ1nNvBO&1NX>8&LmUi2jfB#c_Y?_7cXqTa z@>C|oZ$|y;8_qD>*Sqaq7s8N-;=B@bykndk88;6~f==Vou)2ucF+N_d=xMP;_;aF$ zWzOeY}V0&<*F1q7$iGEqmh1{y6a}673m^{Q?e` zN{%5GIpAc4L&;T<*Oi_Pinrs-;SpTK<=Gqj+ed|Q`S`bkS$_!PM(2oUoj91|^5k+d zD8`qB0U+wvKRYT)0QS!c+yeiuIPG8Q#keSZuQ(r1r{w_xVi{sOn8IlX@bUSq9Kk!s zInsD?RGys--#sY)F&H9J_iX&2c!nAb21nE4bTB;EEN=(J(HNdJ*UZJ+a&}5G5f(dp zJ1#)#VEVu%8qD7fH-%s-ehOCXv^+U24qQ=AUgR3jy<#+SOKd;MY#&1){&ujkz`tz{ zhWOiUx8Z!GZx6MH%xtsz_$z3oee3^dd3%z7&zH9^@B8`kw7Kg=$rmY)!*cr?uD2DReF<1mY-=gA;!c7I)T5*N&%hzz48wf@C<( zhH&8|fb1mG)AE>3T>uT^p5hgegwK$7Xk4t7a!UPK!P#jZnhJz42st-rwv#Sk6Kyo@ z2~O&xJx~@G<29CpVPPJeHK>t)(Nk3mz+G3*p;K_4)y2C21TV01gU~~8h0tlsbR`Z(Wf+t?D+bL@?=z0q5#O^B;83xevZSxR&0X}U{^j+qQ($gGs^Pk0QQ5>HaLvxQ0l2Bga25Kt=&crpf|0V4_X zc_emGI(CP@IlfIEE=`D-2Uo8b4r-I02&1I04ERFYZVs z34YL8J>VP8(DcyP-LDPAm50kWlj%{tBJ@0`G(O7IgdXG+>XRyh>_Pw~M>`$;yqsmk zZhHvSeQ+01rxjvdZ>7A2++b6h`yq2b+PsQ4+DUuKc;c51xck?2Mt17^7{V*)H~@8D z`V8GD18gUkA?X;X3Ty^39M~D8CrCG#{*rxQD;_PpM2B90k*OvG zfRcuOupdj02dnjjPpjf14HtTXU$%)=gqFfc+i4mWx_2Jhk6LmvLJ^dKoSZ1<3CmO>YT+m&cmVG)V`HQ^2c*s4 zyzJwLMIZ+yejPtYIcu=e5>cQoG|iH$M9x>dXqCu;tyhU|b5E%EoUT`i;-x;$e%J+m z$E!rHJAKeqqP_Jh5v45sLG)xTV!cX~@DSs3Wa+TExo&$GeF?FNKlW zrCSsrf?PQ=^Mp6@u0ogYas>jh@zNu;yy z)YHSxBkw*-{KW(@hw(lIG@rV`65Oo78_Wos{;=`W;+NC#X@pX{=wy6US5#T^*1@q; za^?t6mBd_U!>6kgd$cu6vP@V{Fidbr5R;|{`=0$@J_bykN1i+v56Vn2rf8bgtHnilDm7O;R)9mqlr$#Q`bH>`y|HbTiN z9KFI^_Jm6qQIPivfrcNHN0ugduptjHHAfY@6dDs83@HKsE~9cyS-3jJ85`^9L(f%V2v_5b?WQ{V z_;F@*b&N7*>KLgY^2s@VSBM|4^hb_}-jT~@eVhG0%Ur)PuK9$A>tH)#1}0R zMYo`qCCsL$dzI*IZRcvSY(y&-iepIC1ro(oY9y{00#0@b3*c#Tq2K&O`Sx3U zlo43ZaG#KO-_k`Cebh$P42He=8k zNgmaJyeNGjcrD2Xf>*L265Im8*#!e%El3Z^j1#3NZEjTQBLuP z9WE988vKQzbU}n@g2U>FyIIK2dPrrMb2Q~+;G*WkPE_BdWOR}v8|O$U{t`&fTI_U2 z+4H<-(MbmM#!2(m%JY1rC@{c+$5w=TIY$pCcSxVV|KIaDitm%dC-Tv?>e6K7YsPn4 zk93rvIQ3{+IIZ7qtj?QZ;?QJGp?V}95H;&2c%0uxlS6HN9>v2v!A0v_Hq6uDIQ0ye z6UpIQq1FHjF!@YPn9PTG zib_zk|Iyd(GX}Ayy}|Lp+=rTnC)incoR>#?ndaGL9@ib`<_FS_5bC0FaIUx5BA!^PYYU)gmPz@Z%%-PIcIiwWI0Q5Hc zDJ9waDOo|;p8V?{K0*7Ol9TY0E;;)7kf29KxZTalZE@J)$OucVAKrj1>*ng?Nu%zE zefIgIL~&+>ly!u2NyU&1cGn|+4wG`O(n@~6(6>F@Mv|Hxb5ji z5q?v7-AQPk#2&pcbQ5wX{v0q)fa8tqQWx*Z;4zLhmziwV^u}||J*DA%k;{y`-UC^| zIBm-5-5<6=o&0P9PCaZMO>|pwk`v4(eUMxUXA|F??vMqMJn1uEFP&HFNxQn$9wp@W z+LKt7g=8`+n3CPgdK4#Qc?u@4?pn+HHdvg*ufS=cE-w(;@;7obB+5AzaI7Fh&BXcP zeMD|kGUj{Pt&vj;BT~;;rRd;#$P^8sfrq%usboXZ>MkJFI3TuCv`|eK6?{uIs^L;X z(Ks)#v1sQeLD4uxwotHO0(SRzd`K@Z_4gRAY0Lc6Icoi1Oi zy=)!kCZ`cHua-iiY{~slX9qZ#2h*|x5Y-Z!en%}-nF{EBBsmgRTp!Yb<$7ye){Ja! z!%`JsBe~>7=RDP-Y$Af2<_0)6pgcLt7AW(njh0>)14|JnojZH)CqG${?g0o8L8Eg|NOyyNcd?)B7t4YWjj?;Z3flX5U=jRvFM@vwg~{p>S7 zMj(#2iQ!E~hE1i;G*9N0eu8M*;~-PG-vL)TNgD<3s<6qNnvcBq1Jhhw0fHAyB5)v$ zZJs65LOw9lwFQ~?vW~d9Fmr7|jc|2$;K(f5zL-zmxQIGJU@z+}XIt4xVc5)rNg*b`vDFt!I@K95T~sBwd3=>Vni;u&a6QG^FctzHYCz zZm;dozt`16|11Z_4j1&Xze(Q}Zba3J@orR%5lW2HxA5lUJHI?@liTSgR@dEQ*A=V87# z-JQD-z&bg+CvaoA6>!H#PFWT>HX_KiZ)5G-xU0U6SUY2s(LpM)Nef%*7SpxW#%&;B zcaMxrdbdCV)`P`W&dTu_EwA`1O{Y4SBD}+Ub!jaKn|ic6toQhi^C)>|H}W{Bk}In-Ft~ z$g520DGA{Vcgs@}v^=fPp`MfMZ)B`&G^hg8Bdg2jaAw3lO_~%6to{r=Pd9dg08?f9 zoxro?#Az~|QnPraLImDb!p$f=*8CD1+?Mj1h;J^t<3x#w?Q(0(RlO$k4oMKl!%l}@ z9#S0tQeNhLCt3t}`{~AQ`Aj5o9{WOmEvJchN~ojxS94n3QTE$u_P-YUQ=b)2R>v1^ zAjC=4@(h<$BARnNSkqM;Sao1M9VYATnnwL&?ds}{uG}ECtfx!nvTH}z+R?Rkbfw<0 z59H{Id`s_rhv<9xxpvnfKFR3j46wDMOC7+WvqY9D4PJd+hxmXG!A^%8j5}S*(GIVm zt$WGb?VbQ>VaLeMq`R@%g1N)%g5^>YWZzjW#7DX>XR0hSS+utGdk1l2R~HJOd)p`& za7Ue8vLrt*#76=C9YTC|*C9TKgT&vk_IAyRii%j)-9zlQYG`n*Mp|#83hhDR z%)82bOx4O^MiN3bFU-hxYI&y_q*jiZVHj0sUbqoYqgfDe#QLGhZOc}SL-tn@M|Um` zfGxn0hB2jYse=f8k+)*`4N)Kq?U9(!a1iCAmc$gPyNGt@f~Sp5^^uK;s+avcbQ3*{ zyzR>dL~AV1SJ%W?>kU@td8Tmy33JNw;V$M~hv%iNJ?oqkafC zk%TkQ*iL$Em;t^=G}Iyo(eKYa^pjUQ=qkT{=+bUuk83D0{y&hM!DMg1zy6+P=83O;XyU85CQiHDD{4M|_qX)v2E?^=m5(Yexu3C^ka#H1o79uDA{@5FHx$N53i zU(#13Xb?bSXtKT{?F24(_CVn^Id0c(SWI zP-f9FKUuGl78g}_dB4%ljx&{YCd!(v57#MHJY45JJx6m-)(QR`OsSl^>BgR;M%Pck zVsRI8=8wehMB6zNPtzT67tsxq>>u;DwL2G4JbbH!J4aJ(=b6hXJKS!;xzxP(Y-cC$ z^jXCfFpEq|U1JEp=VFJ!~4HWV_f9r?hOC5-gm0kW!QT zion6gLS{6&uj-(U?yC%Fv->Iq{(ZTxAU`)g2x>Vu7P!agsI?nw?Z#TWu|B*T%XP=Q z5b@MmyRkT)rN7^}8!HHSR4rPM>wqUcQdJ^bbe16#`h%XH73r@t$B2A}%@`3J+4o@% zfMOda^Gb_nLf2(P2G^yV(|_kqtf&#m2w9PE?UwMwLNagr^dGrB9+o+G4@T$I`dLim zxmxWgWVX7Li;Vy{r{qqYlI#R-g1Rm~Bbn26h-VrDw9u2|Hc&0;$`KC>IfX!4I)Cml zl;?I%R!(NOGQA6%g_1N*xH~%G?wy=px=A;tm5Ik;U&C@f$@m&ppJ><7A{lkYiV>iT{_eb4k+v+j?wuzMM{Epbo3k`h&rSs*)iLZ zYoR$NhDB)Kd+qi)wZ!ehO-LGHg{DT&M5^8ZfQz+P-X z@~MlwOnHHNNtNmt5C*>X)BMDRrXb^XBnHtcfJ3=y{}u2<2csTZp7a3;_ThI z?(hlH_#=TCfHX{zbl8^d>hcRc2LXW@Tn&WqtS8ufO=qx8MB7-ym$<2@pR1;=AvaJB)(*sahGQ z5U_1l2HxLV8RutCq(mDaN0hrdb&OC9;;LmElm zv`Y57TN6l7#5>bUQn98EgZOq+&O(qf?d`IGlJYld;OX{um8OKeA6yO|VanpxF*P%) z^2nA7B7v&r?4rHBlfCc;gdi%VsM&a-Z%_quEt)bk!>U@>U_^aNHXG7_SgXb;EqV$f zvCG6Fy{65Ek;fi*e~a{*Hk*R0^e*rzyD1hyJerb7F?rwDaEo$ljkX&wxxV=p1rs~O zKiEAn+r^vhwV7~e+M&5-D^A6z+SilmY&j-#p%?Et+KK75cW4Z^uWN>zdqVR}s)In1 z>W9|uYp$LSX$?EBqtd)u(=ZMJhYZ#eI|v=c&QT|(l(S6R~sqqEtqIbEf2YubN`X$nT=;m=}Ho3Pkm zz$DpLsljCmr6~o0y%KQLw(S+T2czd-Ro}1Y5?qo&gc*16fHVMXA+ee<%-zj%vAnwr z_Gu9-ehOQElmO^kvsJDIu4|T~=@scPyQMoY;~aM9;0`Sc1Z3|$c<^BGP!=vzYzbnM zO}x9lSOMj4pg!D$DvzWXEyA}N?Fl#I0p+5;xEpLpL%Q9_g1B9KC+Mx zUpy6Kd#4!BW+Oam-E!pRobN$}h#TK3{@0ge1^!y%7kEaX(E^J}_HeJJ@`|Evl4@uS z?8RYZ$?D5Z$pGj53hE!YIKfffixZ!#HJEcHF;icI-0e!I18s2J<5~P8y3Okwsochs zoz&EhbYr%)`Q9Wl#0Ww^6uX^n1^k5OAwOxu<0n)b`H9#?{)E0IKObTYf?5+$u2`TF zMT}<&zLW?F&VIvOnBrb>#cp3w8{2}2VvYghH>g#P#&K~W7Esc3e2c7(FCR!y_~BN$ ztg1nc;>CYOjfePvK(nw8 zvh^QkpFYYy{Y?#)%!7KH#CSnJQ)E!U-%){K#Wr@}9-_*>bcR60Izu4e+ZkGr8rC*L zV!*Y{keFzh86vY`k~x<~mT>ZN<`~$QdA&AB8|0sDRveJ!+&AFizU?RJAgvkVmxzAfOgaaWo*=Ww_P^AgejGFf0%#hKS z%8*gxyfs2r<>lvOB|1-3r}X|f$c|JGObdLJM#$iivqIFoU>_q*KsEPPNGO-JRz!bZ z`T!5pfNV1@VZ;2A442WDTt#?$9X2WPjTY$7ZNSJt%Wml`m@qRg`fzIgGEErpT+W1< zt+HWU&a4I-M&CvoX7Ff>wqOBUX0~=)M!fJ@d&3$sYWJ+NVs0%IF{oEtD9~d&k2}iV zjyoL_9M)#bu)p0Amx!6%5V1k0f$Re`#jZLezIq7ioWLb48ft?1lr%E#d&~D zn)?vUsTmtg28u+t?d0qnAg~r=hj9ly2LB7ObdHW*pD&i9v+?P4<5%@C#%{+g5g>Yd z+)`+rK?^ar$UU5@*)yrCn~IqH&#_2|-9#%>o2t60NET4qN)`}(dXmM6=;i^XMY4d> zB3VEQE++SOQ-`8GWl9hZ)z^E4O`DxBV8T0DZC`U=9~~LnYlQSH<+40K8cv z7Ce&*OKdw|3B@MUD;B1o;??~8@?E`#Sb9fCp+u}?AisDKRJI1bThn>Tv&-4+-EzL; zGJiW;)kSZ9eh1E)6+%E_nOCL`bTQ9AeYmrkdqc@n8}8gxBumLDnPw?DA@)3=v`7|E zS|kf7!6na|X^||Tv`7|Ef=eDyS|kf7Es_P4;BqSj+Jt~PLvZ8Ve>W5Y+)CmSzfubL zg12JeHub`7>V=9b(OZ+asUqRIaZQ>;vIBZued1uG+bA7Gi59#SCAXsFR+RkwqQvfg z-dG>$DoS>DpSJxK%PEdnj+fI#F+Q6V%h%I#UM${CkIV6_{Bcaf4vwd`zdLsjob~*h z@Bj1|waZ0;|IW^r*a5n@gfUN+ z?|X+@gpEBM918Y3Q{mOtCK~kl{=$yD${hI-;0CMUfN;ZCgk&C>*{6KIZnug_LWZ= zp$^rC*P96X($nB^t4~Oxm@vW#ds)9L;9y1eh+-GLjYMF}I z6hK9~dy|TCsk^WEU-X{z;R4M5pn)oo;qa%S3V&6Qox%@56$MjLkQ@*#%0X6eW`2J@ zazO5cloXIpMdx)DJ)|{J2Q{(v3R2RV{R!e>gP6ecpNb&%U4T_W5N`4uhuBxj?i3m; z_#I7%psBIDr3d*ye)t6a4hNXL6 zqpbldM;I!n1YDr)3yj}Bjf&YWRwNlqe80ynV#TXIeRvs z(S^?;J_oyx4xm6i#usmX@fDAP4|d%fSY-~wV;nt^g*4O9Z-;~rjRk1wx6qJ#;Ubd|wTf2RIQ{5TkR$Ki&#gSS|B{Zi6IbKBcx-SCPu2Vk_EqQ%8{9Y5s#>2AM) z?(n=n-6?O4Z~_3GcKpSmyCG>cRA<_fHp+ZWisB3t8u&LzDlK3tp4$4%rd|dpW zpfd`M=Kk4mRfPpWxsMU)6&W-_q&I?H0;hn2ofpCR5CU#893vJD%zXEdXSkT_1?pIM zeu(~;Gx)h28@s+0lNo|G7#FQr-4ot*t=^NKhtw|8qCSH`h|wB0WJ8R-eoQ7~z2U01 z0vAf=RpH=r_Jp0E2>oJy9jc6mdvHQOPW6p=cupi-4h6KMiIgQ996UGyc}yS^tMUw@ zV&7I$u|`3CW5K3a1abo(0F$bu>?Po~o(f(q)*IK$>&UK+*G`s^@G#1!3>4;+iZEL& z;cPxeaKns7)FlP>uy{3Hy8Ks=tfRLFqc@j}<#2<9^=5~G5XIY_iqH{B4{gDV*7Hfb$+i=0ZWke=>sx|c8^N|vtc7>+dW56iu`ty16Ogyr1Gy1>LxhIS z2-*`&=PM5|W50tpRDgqc{gw)UN~6S8wL%wQ0T=&H`)Wnb!K?r5eu# z)>L^~L&3k#sLjOp0>`O}M?8_>3_Tis{4v~w;w-GjcR`7cJlJ9#sz{clK(RVP6WEY`$VbI@>$pnhlcC9ig)T{S7GiFr&g8WtF@s$yukyynU&-#cYj$`N3%6~<66*L2Pv zcnn$&FB-rQTYcWMp$@Op=i3!}X!Bj0!m$;_oWoPyXGc$|pxnSr=1s3xSBsQq{P9$I z1qj`D8L=&Zl>!kFn!HAKGkEp|V+b)=JbeZo7SZs<`AH!__ysx=qbR^HhH9PD1@%N} zvBUr}#34&f1B*)f%B@d4P|AT)JmB$?)2k1N6o0&%pC3;b3&ehKb3%^zb0L(#4WMvV zj4fJJwNPy!g%?xM!~~^%`_u6;wrpnS$8TLH&Zg7J0^CW4i}^Vf?9(q>Jj`F(qGnrfin{sSCp@`sorU&?lhoDCcy(Lr%BWPS$b+Za|eqGan( zmqk&RqhrVvGq6gLuLeTb`YJ4j(&rZdy64;#S&w@bCnk(wr=~t%XLz zAr`RYzBhqePwt5}8ZJl}f}-VU2AU?4*xE07$EE9zw;ugPrEld6vLg+z7DpD90S0@< z9jU?aH%`*#R&cp}akXoUXGJJIpr0w^^twa5`5jXCi^_n19jxmDj*pJq{hU~xv-CABIpXpO%Qa2S_P+Fyc*Z{3A|=fY%$*fU0+y8hy*FphN(#U?-Ahzecbc<)ymyWV*BR+08c4H^<&+{)G- zvXz1|h{IOds>0Y)C+p~UZ@#Gzb-t`9LRn3`LjncfJ*!Q%nNf^Fqti(s$>kJM*Cj=l zWEuNIc#kw7@Wd-k4HB@8lZ|?^@bXkQb6!)%OXwXe{mm61+8DkL0_%rtF$oCVT6T}l zga9IElI7XCNqpumq=)RR*k^I&SS3L_ws+t~U^gLJ9~ZUP83l@=M@rgsTviFzdV4VY z<2cI=iPszFCb5|t7twCTb>wT{y%D@xmbmH!hwH6YDRQhlPqb&P@4aO?OxCwzZ$E*|B`B{cExCX?{(1 zH_QF>9^Wo3T;o&6ZnDRl_9u|H1|q9()rZ+-u1MsZN}&&Wom`cpxg()aaYK{CuGYz$ zt^Q_NS7iah24BeI^}WIwW1eJJiK0nvh1wq`_dE5! zAV)9oP`ub*R~`Tw%j0T~T6x&#jpcFaN3A^U@5b`D4n)gei>#U=JeXRi7g?=sleEi{ z`JtmYYCIw|4b?f%Bl^~z4Z<>{Id5Sr%Uc^=agw(-tVDHZocLUf4LSK5NGr&W}k_myIo( z*5&f-crz#QFN!T!GbD78IcNTpdz;VhXaZ1e(Io?A$T)5)SzJH+RpHJOApeU=*66r| za?ssG3p&HaDKWoLFobZbEoO#o$fwD*BP5UgkhyLXKx;}+nd@@`$7HaSMN&sC{=0nj zu6*TF!%8yP4*6tbb@_&*aLdp}L#Eh>XdaDs%U6X*V=;O+osVjXW(jL|HwZSFGiCvj zE~qf7zMkWCII)U(b`p~riUyky1Bu%1>7X1i4S=-z;K~ZQSb*xd7I>d+q6u!mMG0Fj z+TdPEIl!GiF9-S+H~t`r@#ZGffo$XD9@TeLac{k%q631MK)DKI`3gB6@Bb--^U@%9& z_N0q@Lw@D<54wIf*`+MP=Usjh1-Dob3@j%vjsliB7~uafj$)DxWEee&O%6)voTw}< z66qiO9qj)eim zT5H2qtwo$&Yk&LGw<8%koJl&YBF>lJe)FFSY>3E4Nd!!LRUpGT1dBL5=v^czq-Evjg zr9;WO9M9ar^o1x}j!R@~Mv|(t84@m+uU;<~A-U=Y^u%#7^o}_3>DlXePBnFYs-gO1 z5@eLhWfHz(H;VWez{zflPOn89w%3I$P9(fGRK3%fmy^|0GmR=yGC!oI*RNBg&Q}fGiWRL z*cZHCxR41Q4hIWk(gq8PgTj)rYk?&N2}=qJEbW-TEN?{0hgMLa+ebd@T2LWILb`l? z`V!~Y-Modm;qCEgK3yCgx&q#XUzl4BT#n{{2y`K7se-*rCY67SD;P=;;3VGSH_~Z% zA~1G}!)F_N=GeBG9myiFA=6RZEh-aIXRZ<%r?KB~dKcv8LP-pFm4khGyQ^;njFUF! zCnt;PQgSMzjotZhQ{4Vw;|o$qgMZEzo83rr9;h;>r6Dj64f7|winRicl${Y=pQ z6^i*y0>v!6gAh`G;6|Mrw;hF(OnJiJ?M^tXxi{gk&`C^ytAN7xMDe}`JiHU&F=zHR zFQ;U(l=3vpsXQb9Zq&Ir;0H>K={uwbCCi54@h@?;ftg2Y$taMSVNy4b!UzI^>SpKX z??y~qhlH%|yKWnWpV&6C;OyoR#AOr=yuxI$JjX>NQ0A?y*F_-o+iR@clNpV5fJx)k zEvGByywFXd(b#MYnWjfC$MbnPoo@gHxNrPw@%S;nnj*f#gz#_g=HpkVb#J?{wgd*;jd(H_G!|e=ZdMXVZnKrzUvA1oN^UVN2>C|a z!Zq%_8mwZp3BHE_lLjU3P#V-NveCGDQwq_^+ALxV1Ozm zV3J>QgC*n%I>fG@V>e+YZT`KTMwMasm-&Z4tVoOTJElkB&&T|7*XO_^+E)z4L@FVa zL+E#SEHV7G2s2=7c|B|fg@m0Hx^%KY9S2)gG8MIt1LP0IReI*aM7Iz+bSkQu z%&TehIi{~d`r0NgSP9aFjaHcLAjfM??v!s+wS^*$OW@n|c9iy3`8LPJ2vM)xcfgYT ziwie}*anvL^2LsfWtF>2BYs#@56STx|6)50;^^%BY+7ldGCGx{tGRv*JNM(I>jyb9 zovM020$s5q+V_M3zQ$A-*GDq5P_;n*LaGo( zYJ6QhdCJg>;@&-3-0LNvr?UhqMjg*eJ+${naI|_y?P~uu-Uv*{Qpizdy?X^vs4faR z7|Fj7doZRsb{wjR@!?LgN$)8sbx?=H8B$$uPZ1XF~J>GxmQ(+p-W~ z;Hho#J0YS=m=k1b#Sb$JZrqFMY%w)3AXkHx=4>)Z>G2tSd$8Abc<&ihN?0((SX^U& zpX0J@+P_1UyQQ=DojrhTN`Aj692!!NRxBWAT%}V*QC1m zu3`9nLs8K2N&Mx-9t0m)Zo$g|k*THbXx=GaUM?lTPBa-xFWx%{>B~fSlj4VR43FSM z_EJP5Oz^XS306)%3Sap|-Q`t^QoAy`3=_Y3Vj9{q#@%UXy3i}c$hO+==z(`FxyevV zvN^alajoVraj$sPjh9z?#3MZFYh4inE3ULyCf`nikM%+uSZSGjJM+DXfp zfo?35hn3*d;L-Nz6ksd^$<2~{*dlvLDLe5EKIb_9?A47O%Fg{_(ZhFJ35Nm6*zf%g~qJ4{UY87Z~N{Il~2uKERukNzwV)V=JYpBaUXrBx9(#kF4ehr z%Ga&?IJkg6P4_Wnfx2elMS+_k<6ESo%JG1^BQx*CwL(FJHFBcDTX?UKoT9oS9@Ux6 zxaRrsAz%&d0h2cWZ-V$Se+K{Ccjwef)B9 zUFzs=yNU12bjigGIc4Ua9*;>`qsu1=V8FkBw~^(p-Ru4C6(HNDJI6vu9 zVAq1ZMeId$DKoyVkSD?HhD}35*zbzK62aj267InTRGJxUE@UvWg%ZhGHWW%lUKend z6dS==>TND$h|?BIaI*B=T&Pn^hO%u-$v9bNYA)0;m}yI0aOyJPoq$DRvN|m&v;Q#% z_M$HAHB8>`Agw;vz*anCn9p_WL-pQ>l8Gc$Nd_3rpc10~iJbc(-UtnKOxQMzA)|?r zW)dH~g9HspNORo!f%_U@WP4j&yLK+B+rDTh38f70D@P{)=y32`Mo3GOa5jDa%@^N1 zF5ZpjJcBSRe?+(~k1H^CSGTL&{^sNa+od(w zbNXt^J*}bN(;9>mhFR_i^O2nXDGzN!_Hpz|X1ys9pmKyZE9Mk!Kbl~O07r1R3HTud z{_XCvi0(t=qA|1-a^*>A!4Zdoq4RE#Hvkc_#QZbeGk$gYrS2{Um4N?Ib$Y`%cOlNI-IaXUWr zU)}?Y`SQvoOqti`i{(9CU~Hux}qxp#~Ap`ata2$A_bOAMva^#KXRQM1@)(@vJ+<;poeL^N0#NynOBX zS1I-2d3UJ$eW9X47Al^1hkDQ#Dk@~5;(2$d5Bowzg)CG&?+z8$qxYLvRLDZb^UhG+ z?ajRayE-}8?o~hV3xV!r`)Wz3SGGd_)=64l^?)wdL-y>v)X8^VUiAX+`rf1SxO0tb zPpbl$}TgPV7XbEtwI+ixZjG+AAO>|HueJfs-tbvvx2K>rMqp4NH z>;WTBes{oDodn1X;MH2C159<$LU#1k&||As0&tzSx@eaX^57bfwX<2sD>X0=8R}tg z$f9QE``kg>jF9{HS=7xY$esHPb+LD!MeWQ(?x218K0{^f4O!GrK(5P=iw+v7hYYo` zH)K&k^N>4ep&l|+$KH@d4b4ODpox0OP#=3k7F9G4xq~+9Awz}i4O!IDJmd}H40CnzD%p^CgLjnNsy+tTUN34DpjxM4c?9t8utj+cz`J$)rn+btb*PR3^s2Q| zSB~=HW}0XK4aKoH=$q-G0W{Rc-k@)$g&NSveG$}?ZnKTs$*7mq1t~8jT&aTw(9|X) z>~5fMsDT>TS7yk&!tSVlavNtpMHU8d?7Y0bB8t7|Zn|hW>8y^TD5g6OJLWO%J=;o^ z(i9C;$f}2(^-^Ncs+#68Sfhdp0PQ+z9)opis)|C3;bMPV(vtD6xO2F5`|XHwiSi7=(j|nRmE*ZVJ(U-QE1h5TTxhx z#!D1hmEKkq)}#7j2O0DTbv3LKT~-LMjY-Wl=)ew(h(xD#%3#stE9#>R6Nxr`xNSCE z&HPYw^3Zy1MX@j=(Q7EyqaN#YTo;G+=*h&PO;v86jrAzZibJ!`yb6bPXwJl;O?hs| zVNEKuB4LC?FQV-0%l!fBG;*hcBbbvZ=gZYA=~PmUgmZq7gdEK=80NYq6SQE%!XlzlC1yuI1HwMutB>`MGTy#A|}Ei770NT0Q52sC@qo&lorVX3Nas&)WVEh z#3e0~1(X)a0!nbPp(7DcS|kf7Es_P4;L@Q(F#;uGq9OV6+lEBv3z%rqN&yX?tVBsy z%IHtTuPt6NoFxrCN*G|ciUU@MA$jty1f!ccq?LzTeku+~rZG=1X_1eP7)$^lG1V)_ zUHsY)FE#n(Gc(kbYn!D z;jvWQ+Cqk#6M>6xRAgLS;y_$-B*KhGOkLC>bSs4nwC0xb1QSm}HH%(72O|M_Qm6r3 zBCKq*d?QW`be{Ik2EsSD)X=+>XY!&e9fBsDlHN7ploslOlcA>O&SeR{f_A zXprmz)fX`XlwoHker{lBHqaD1A2D-a1f=;#qM5stTA1TD4Q(m&yTED3;*t$}M-~Bc zov1SSXTWd69Z@*s+>snNLip_-USNjt58v`9PZ!}I{G7_m9SkN84Czwm7c+bCCy_XQ zCA$?lI_aFcC`}qTAT4w&a&ARVMTa1PeB>f$LxQ{!Yb1g1Zsp%>6huz)RHaNRa*fQ{ z@tO`LkBi|CQ9-KER8$|gdm<{(GeiUNXt}dAPms;dLWLUGZU%7NSt15`9qo*vDsmXVul z&?ARn^))apC&{HyUZ#{%llR?7KN6v8Ea1`cB{O>0&x3MM8!8QAC1zenM{o)fz2R3L zC-s-?xaw@l+R9QJFrmOp6((lq7ha>|vb5_NP_^o#A-}5!)ru{oZ^rpmwiCHZU*aYj zc_}C+FA0q=@mZ?~1dz3Tx8fN^i+CX|@-t}-50L$M7xAgPpfD@DSqpMnnoH!hzR>29 z)Cl@QniI1{I?pi^1aQ~7U}oi>zL2IA0jp8!OM{&~+Ndx0BkBVqbNLcBlA>OP#|b7P zNp=RU;ZU!^*^>ds@KlCwAkQ7<>ARQu4p5Tk^C5;qhT9I#oI)^tnkrbKaK({Xu~J1^ zMR;whSYiw^AZ4{~2IZ_;D^bs~hyQ{WmPW}qeXZVJ4MZ2jRvgYVIM_Iu8{?pjsp8rx z5)o=Gwxexy#7%MVjrB&thLrb5f+LC@q|wFbOy-qN4Szyf&NDtAeevD*JH>OPHv9UE zzkK`6fBen=_|Lz7@%_I)d)8yT4n{GcW!}Q>*+VG$CqAKA*>qYfLjjDtZtrJxn z%hP{UwT9op^npRW8OBEQ+!zVhSZ^e3JkO1haEtK?J{<#q*+Froq&hBTQ zefBT^4ga{mO?>{#fA_yW`|Mxw^?%~`|NFoHLmsjye}})(t5>J+&rX*}cT~Q_eMaN? W^0Ugr>DlD7+4A(=K>poh-Tx1^cUnUL diff --git a/llama/ggml-quants.c b/llama/ggml-quants.c index 0eda1ffc..dde7f1d1 100644 --- a/llama/ggml-quants.c +++ b/llama/ggml-quants.c @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * diff --git a/llama/ggml-quants.h b/llama/ggml-quants.h index f6d07143..16ad861b 100644 --- a/llama/ggml-quants.h +++ b/llama/ggml-quants.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * @@ -24,136 +24,136 @@ * SOFTWARE. */ -#pragma once - -#define GGML_COMMON_DECL_C -#include "ggml-common.h" - -#include "ggml.h" - -// GGML internal header - -#ifdef __cplusplus -extern "C" { -#endif - -// Quantization -void quantize_row_q4_0_reference(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int64_t k); -void quantize_row_q4_1_reference(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int64_t k); -void quantize_row_q5_0_reference(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int64_t k); -void quantize_row_q5_1_reference(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int64_t k); -void quantize_row_q8_0_reference(const float * GGML_RESTRICT x, block_q8_0 * GGML_RESTRICT y, int64_t k); -void quantize_row_q8_1_reference(const float * GGML_RESTRICT x, block_q8_1 * GGML_RESTRICT y, int64_t k); - -void quantize_row_q2_K_reference(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int64_t k); -void quantize_row_q3_K_reference(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int64_t k); -void quantize_row_q4_K_reference(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int64_t k); -void quantize_row_q5_K_reference(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int64_t k); -void quantize_row_q6_K_reference(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int64_t k); -void quantize_row_q8_K_reference(const float * GGML_RESTRICT x, block_q8_K * GGML_RESTRICT y, int64_t k); - -void quantize_row_iq3_xxs_reference(const float * GGML_RESTRICT x, block_iq3_xxs * GGML_RESTRICT y, int64_t k); -void quantize_row_iq4_nl_reference (const float * GGML_RESTRICT x, block_iq4_nl * GGML_RESTRICT y, int64_t k); -void quantize_row_iq4_xs_reference (const float * GGML_RESTRICT x, block_iq4_xs * GGML_RESTRICT y, int64_t k); -void quantize_row_iq3_s_reference (const float * GGML_RESTRICT x, block_iq3_s * GGML_RESTRICT y, int64_t k); -void quantize_row_iq2_s_reference (const float * GGML_RESTRICT x, block_iq2_s * GGML_RESTRICT y, int64_t k); - -void quantize_row_q4_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); -void quantize_row_q4_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); -void quantize_row_q5_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); -void quantize_row_q5_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); -void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); -void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); - -void quantize_row_q2_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); -void quantize_row_q3_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); -void quantize_row_q4_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); -void quantize_row_q5_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); -void quantize_row_q6_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); -void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); - -void quantize_row_iq3_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); -void quantize_row_iq4_nl (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); -void quantize_row_iq4_xs (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); -void quantize_row_iq3_s (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); -void quantize_row_iq2_s (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); - -// Dequantization -void dequantize_row_q4_0(const block_q4_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); -void dequantize_row_q4_1(const block_q4_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); -void dequantize_row_q5_0(const block_q5_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); -void dequantize_row_q5_1(const block_q5_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); -void dequantize_row_q8_0(const block_q8_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); -//void dequantize_row_q8_1(const block_q8_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); - -void dequantize_row_q2_K(const block_q2_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); -void dequantize_row_q3_K(const block_q3_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); -void dequantize_row_q4_K(const block_q4_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); -void dequantize_row_q5_K(const block_q5_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); -void dequantize_row_q6_K(const block_q6_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); -void dequantize_row_q8_K(const block_q8_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); - -void dequantize_row_iq2_xxs(const block_iq2_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); -void dequantize_row_iq2_xs (const block_iq2_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); -void dequantize_row_iq2_s (const block_iq2_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); -void dequantize_row_iq3_xxs(const block_iq3_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); -void dequantize_row_iq1_s (const block_iq1_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); -void dequantize_row_iq1_m (const block_iq1_m * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); -void dequantize_row_iq4_nl (const block_iq4_nl * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); -void dequantize_row_iq4_xs (const block_iq4_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); -void dequantize_row_iq3_s (const block_iq3_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); - -// Dot product -void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); -void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); -void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); -void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); -void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); - -void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); -void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); -void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); -void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); -void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); - -void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); -void ggml_vec_dot_iq2_xs_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); -void ggml_vec_dot_iq2_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); -void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); -void ggml_vec_dot_iq1_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); -void ggml_vec_dot_iq1_m_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); -void ggml_vec_dot_iq4_nl_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); -void ggml_vec_dot_iq4_xs_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); -void ggml_vec_dot_iq3_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); - -// Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization") -size_t quantize_iq2_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); -size_t quantize_iq2_xs (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); -size_t quantize_iq2_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); -size_t quantize_iq3_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); -size_t quantize_iq1_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); -size_t quantize_iq1_m (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); -size_t quantize_iq4_nl (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); -size_t quantize_iq4_xs (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); -size_t quantize_iq3_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); - -size_t quantize_q2_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); -size_t quantize_q3_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); -size_t quantize_q4_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); -size_t quantize_q5_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); -size_t quantize_q6_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); -size_t quantize_q4_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); -size_t quantize_q4_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); -size_t quantize_q5_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); -size_t quantize_q5_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); -size_t quantize_q8_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); - -void iq2xs_init_impl(enum ggml_type type); -void iq2xs_free_impl(enum ggml_type type); -void iq3xs_init_impl(int grid_size); -void iq3xs_free_impl(int grid_size); - -#ifdef __cplusplus -} -#endif - +#pragma once + +#define GGML_COMMON_DECL_C +#include "ggml-common.h" + +#include "ggml.h" + +// GGML internal header + +#ifdef __cplusplus +extern "C" { +#endif + +// Quantization +void quantize_row_q4_0_reference(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int64_t k); +void quantize_row_q4_1_reference(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int64_t k); +void quantize_row_q5_0_reference(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int64_t k); +void quantize_row_q5_1_reference(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int64_t k); +void quantize_row_q8_0_reference(const float * GGML_RESTRICT x, block_q8_0 * GGML_RESTRICT y, int64_t k); +void quantize_row_q8_1_reference(const float * GGML_RESTRICT x, block_q8_1 * GGML_RESTRICT y, int64_t k); + +void quantize_row_q2_K_reference(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int64_t k); +void quantize_row_q3_K_reference(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int64_t k); +void quantize_row_q4_K_reference(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int64_t k); +void quantize_row_q5_K_reference(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int64_t k); +void quantize_row_q6_K_reference(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int64_t k); +void quantize_row_q8_K_reference(const float * GGML_RESTRICT x, block_q8_K * GGML_RESTRICT y, int64_t k); + +void quantize_row_iq3_xxs_reference(const float * GGML_RESTRICT x, block_iq3_xxs * GGML_RESTRICT y, int64_t k); +void quantize_row_iq4_nl_reference (const float * GGML_RESTRICT x, block_iq4_nl * GGML_RESTRICT y, int64_t k); +void quantize_row_iq4_xs_reference (const float * GGML_RESTRICT x, block_iq4_xs * GGML_RESTRICT y, int64_t k); +void quantize_row_iq3_s_reference (const float * GGML_RESTRICT x, block_iq3_s * GGML_RESTRICT y, int64_t k); +void quantize_row_iq2_s_reference (const float * GGML_RESTRICT x, block_iq2_s * GGML_RESTRICT y, int64_t k); + +void quantize_row_q4_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); +void quantize_row_q4_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); +void quantize_row_q5_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); +void quantize_row_q5_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); +void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); +void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); + +void quantize_row_q2_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); +void quantize_row_q3_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); +void quantize_row_q4_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); +void quantize_row_q5_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); +void quantize_row_q6_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); +void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); + +void quantize_row_iq3_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); +void quantize_row_iq4_nl (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); +void quantize_row_iq4_xs (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); +void quantize_row_iq3_s (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); +void quantize_row_iq2_s (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); + +// Dequantization +void dequantize_row_q4_0(const block_q4_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); +void dequantize_row_q4_1(const block_q4_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); +void dequantize_row_q5_0(const block_q5_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); +void dequantize_row_q5_1(const block_q5_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); +void dequantize_row_q8_0(const block_q8_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); +//void dequantize_row_q8_1(const block_q8_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); + +void dequantize_row_q2_K(const block_q2_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); +void dequantize_row_q3_K(const block_q3_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); +void dequantize_row_q4_K(const block_q4_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); +void dequantize_row_q5_K(const block_q5_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); +void dequantize_row_q6_K(const block_q6_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); +void dequantize_row_q8_K(const block_q8_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); + +void dequantize_row_iq2_xxs(const block_iq2_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); +void dequantize_row_iq2_xs (const block_iq2_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); +void dequantize_row_iq2_s (const block_iq2_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); +void dequantize_row_iq3_xxs(const block_iq3_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); +void dequantize_row_iq1_s (const block_iq1_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); +void dequantize_row_iq1_m (const block_iq1_m * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); +void dequantize_row_iq4_nl (const block_iq4_nl * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); +void dequantize_row_iq4_xs (const block_iq4_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); +void dequantize_row_iq3_s (const block_iq3_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); + +// Dot product +void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); + +void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); + +void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_iq2_xs_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_iq2_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_iq1_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_iq1_m_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_iq4_nl_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_iq4_xs_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_iq3_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); + +// Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization") +size_t quantize_iq2_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); +size_t quantize_iq2_xs (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); +size_t quantize_iq2_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); +size_t quantize_iq3_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); +size_t quantize_iq1_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); +size_t quantize_iq1_m (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); +size_t quantize_iq4_nl (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); +size_t quantize_iq4_xs (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); +size_t quantize_iq3_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); + +size_t quantize_q2_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); +size_t quantize_q3_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); +size_t quantize_q4_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); +size_t quantize_q5_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); +size_t quantize_q6_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); +size_t quantize_q4_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); +size_t quantize_q4_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); +size_t quantize_q5_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); +size_t quantize_q5_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); +size_t quantize_q8_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix); + +void iq2xs_init_impl(enum ggml_type type); +void iq2xs_free_impl(enum ggml_type type); +void iq3xs_init_impl(int grid_size); +void iq3xs_free_impl(int grid_size); + +#ifdef __cplusplus +} +#endif + diff --git a/llama/ggml.c b/llama/ggml.c index 398bf731..790915e8 100644 --- a/llama/ggml.c +++ b/llama/ggml.c @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * diff --git a/llama/ggml.h b/llama/ggml.h index 00ef4582..9dd8684e 100644 --- a/llama/ggml.h +++ b/llama/ggml.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * diff --git a/llama/grammar-parser.cpp b/llama/grammar-parser.cpp index 6e69d073..70da76db 100644 --- a/llama/grammar-parser.cpp +++ b/llama/grammar-parser.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * diff --git a/llama/grammar-parser.h b/llama/grammar-parser.h index bb25744d..c3f07348 100644 --- a/llama/grammar-parser.h +++ b/llama/grammar-parser.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * @@ -24,32 +24,32 @@ * SOFTWARE. */ -// Implements a parser for an extended Backus-Naur form (BNF), producing the -// binary context-free grammar format specified by llama.h. Supports character -// ranges, grouping, and repetition operators. As an example, a grammar for -// arithmetic might look like: -// -// root ::= expr -// expr ::= term ([-+*/] term)* -// term ::= num | "(" space expr ")" space -// num ::= [0-9]+ space -// space ::= [ \t\n]* - -#pragma once -#include "llama.h" -#include -#include -#include -#include - -namespace grammar_parser { - struct parse_state { - std::map symbol_ids; - std::vector> rules; - - std::vector c_rules(); - }; - - parse_state parse(const char * src); - void print_grammar(FILE * file, const parse_state & state); -} +// Implements a parser for an extended Backus-Naur form (BNF), producing the +// binary context-free grammar format specified by llama.h. Supports character +// ranges, grouping, and repetition operators. As an example, a grammar for +// arithmetic might look like: +// +// root ::= expr +// expr ::= term ([-+*/] term)* +// term ::= num | "(" space expr ")" space +// num ::= [0-9]+ space +// space ::= [ \t\n]* + +#pragma once +#include "llama.h" +#include +#include +#include +#include + +namespace grammar_parser { + struct parse_state { + std::map symbol_ids; + std::vector> rules; + + std::vector c_rules(); + }; + + parse_state parse(const char * src); + void print_grammar(FILE * file, const parse_state & state); +} diff --git a/llama/json-schema-to-grammar.cpp b/llama/json-schema-to-grammar.cpp index 40212c0d..eab7dd1d 100644 --- a/llama/json-schema-to-grammar.cpp +++ b/llama/json-schema-to-grammar.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * diff --git a/llama/json-schema-to-grammar.h b/llama/json-schema-to-grammar.h index 9761e6ce..45f4a14a 100644 --- a/llama/json-schema-to-grammar.h +++ b/llama/json-schema-to-grammar.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git d5c938cd7716b9a2ace49a43a469dfbffcff4d28 + * llama.cpp - git e95beeb1fc4621826ddd616776dbdf717366bf5c * * MIT License * diff --git a/llama/json.hpp b/llama/json.hpp index a858728c..e020407a 100644 --- a/llama/json.hpp +++ b/llama/json.hpp @@ -1,24766 +1,24766 @@ -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - -/****************************************************************************\ - * Note on documentation: The source files contain links to the online * - * documentation of the public API at https://json.nlohmann.me. This URL * - * contains the most recent documentation and should also be applicable to * - * previous versions; documentation for deprecated functions is not * - * removed, but marked deprecated. See "Generate documentation" section in * - * file docs/README.md. * -\****************************************************************************/ - -#ifndef INCLUDE_NLOHMANN_JSON_HPP_ -#define INCLUDE_NLOHMANN_JSON_HPP_ - -#include // all_of, find, for_each -#include // nullptr_t, ptrdiff_t, size_t -#include // hash, less -#include // initializer_list -#ifndef JSON_NO_IO - #include // istream, ostream -#endif // JSON_NO_IO -#include // random_access_iterator_tag -#include // unique_ptr -#include // string, stoi, to_string -#include // declval, forward, move, pair, swap -#include // vector - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -#include - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -// This file contains all macro definitions affecting or depending on the ABI - -#ifndef JSON_SKIP_LIBRARY_VERSION_CHECK - #if defined(NLOHMANN_JSON_VERSION_MAJOR) && defined(NLOHMANN_JSON_VERSION_MINOR) && defined(NLOHMANN_JSON_VERSION_PATCH) - #if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 11 || NLOHMANN_JSON_VERSION_PATCH != 3 - #warning "Already included a different version of the library!" - #endif - #endif -#endif - -#define NLOHMANN_JSON_VERSION_MAJOR 3 // NOLINT(modernize-macro-to-enum) -#define NLOHMANN_JSON_VERSION_MINOR 11 // NOLINT(modernize-macro-to-enum) -#define NLOHMANN_JSON_VERSION_PATCH 3 // NOLINT(modernize-macro-to-enum) - -#ifndef JSON_DIAGNOSTICS - #define JSON_DIAGNOSTICS 0 -#endif - -#ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON - #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0 -#endif - -#if JSON_DIAGNOSTICS - #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS _diag -#else - #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS -#endif - -#if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON - #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp -#else - #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON -#endif - -#ifndef NLOHMANN_JSON_NAMESPACE_NO_VERSION - #define NLOHMANN_JSON_NAMESPACE_NO_VERSION 0 -#endif - -// Construct the namespace ABI tags component -#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b -#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \ - NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) - -#define NLOHMANN_JSON_ABI_TAGS \ - NLOHMANN_JSON_ABI_TAGS_CONCAT( \ - NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \ - NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON) - -// Construct the namespace version component -#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \ - _v ## major ## _ ## minor ## _ ## patch -#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(major, minor, patch) \ - NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) - -#if NLOHMANN_JSON_NAMESPACE_NO_VERSION -#define NLOHMANN_JSON_NAMESPACE_VERSION -#else -#define NLOHMANN_JSON_NAMESPACE_VERSION \ - NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(NLOHMANN_JSON_VERSION_MAJOR, \ - NLOHMANN_JSON_VERSION_MINOR, \ - NLOHMANN_JSON_VERSION_PATCH) -#endif - -// Combine namespace components -#define NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) a ## b -#define NLOHMANN_JSON_NAMESPACE_CONCAT(a, b) \ - NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) - -#ifndef NLOHMANN_JSON_NAMESPACE -#define NLOHMANN_JSON_NAMESPACE \ - nlohmann::NLOHMANN_JSON_NAMESPACE_CONCAT( \ - NLOHMANN_JSON_ABI_TAGS, \ - NLOHMANN_JSON_NAMESPACE_VERSION) -#endif - -#ifndef NLOHMANN_JSON_NAMESPACE_BEGIN -#define NLOHMANN_JSON_NAMESPACE_BEGIN \ - namespace nlohmann \ - { \ - inline namespace NLOHMANN_JSON_NAMESPACE_CONCAT( \ - NLOHMANN_JSON_ABI_TAGS, \ - NLOHMANN_JSON_NAMESPACE_VERSION) \ - { -#endif - -#ifndef NLOHMANN_JSON_NAMESPACE_END -#define NLOHMANN_JSON_NAMESPACE_END \ - } /* namespace (inline namespace) NOLINT(readability/namespace) */ \ - } // namespace nlohmann -#endif - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -#include // transform -#include // array -#include // forward_list -#include // inserter, front_inserter, end -#include // map -#include // string -#include // tuple, make_tuple -#include // is_arithmetic, is_same, is_enum, underlying_type, is_convertible -#include // unordered_map -#include // pair, declval -#include // valarray - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -#include // nullptr_t -#include // exception -#if JSON_DIAGNOSTICS - #include // accumulate -#endif -#include // runtime_error -#include // to_string -#include // vector - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -#include // array -#include // size_t -#include // uint8_t -#include // string - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -#include // declval, pair -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -#include - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -// #include - - -NLOHMANN_JSON_NAMESPACE_BEGIN -namespace detail -{ - -template struct make_void -{ - using type = void; -}; -template using void_t = typename make_void::type; - -} // namespace detail -NLOHMANN_JSON_NAMESPACE_END - - -NLOHMANN_JSON_NAMESPACE_BEGIN -namespace detail -{ - -// https://en.cppreference.com/w/cpp/experimental/is_detected -struct nonesuch -{ - nonesuch() = delete; - ~nonesuch() = delete; - nonesuch(nonesuch const&) = delete; - nonesuch(nonesuch const&&) = delete; - void operator=(nonesuch const&) = delete; - void operator=(nonesuch&&) = delete; -}; - -template class Op, - class... Args> -struct detector -{ - using value_t = std::false_type; - using type = Default; -}; - -template class Op, class... Args> -struct detector>, Op, Args...> -{ - using value_t = std::true_type; - using type = Op; -}; - -template class Op, class... Args> -using is_detected = typename detector::value_t; - -template class Op, class... Args> -struct is_detected_lazy : is_detected { }; - -template class Op, class... Args> -using detected_t = typename detector::type; - -template class Op, class... Args> -using detected_or = detector; - -template class Op, class... Args> -using detected_or_t = typename detected_or::type; - -template class Op, class... Args> -using is_detected_exact = std::is_same>; - -template class Op, class... Args> -using is_detected_convertible = - std::is_convertible, To>; - -} // namespace detail -NLOHMANN_JSON_NAMESPACE_END - -// #include - - -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-FileCopyrightText: 2016-2021 Evan Nemerson -// SPDX-License-Identifier: MIT - -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - */ - -#if !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < 15) -#if defined(JSON_HEDLEY_VERSION) - #undef JSON_HEDLEY_VERSION -#endif -#define JSON_HEDLEY_VERSION 15 - -#if defined(JSON_HEDLEY_STRINGIFY_EX) - #undef JSON_HEDLEY_STRINGIFY_EX -#endif -#define JSON_HEDLEY_STRINGIFY_EX(x) #x - -#if defined(JSON_HEDLEY_STRINGIFY) - #undef JSON_HEDLEY_STRINGIFY -#endif -#define JSON_HEDLEY_STRINGIFY(x) JSON_HEDLEY_STRINGIFY_EX(x) - -#if defined(JSON_HEDLEY_CONCAT_EX) - #undef JSON_HEDLEY_CONCAT_EX -#endif -#define JSON_HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(JSON_HEDLEY_CONCAT) - #undef JSON_HEDLEY_CONCAT -#endif -#define JSON_HEDLEY_CONCAT(a,b) JSON_HEDLEY_CONCAT_EX(a,b) - -#if defined(JSON_HEDLEY_CONCAT3_EX) - #undef JSON_HEDLEY_CONCAT3_EX -#endif -#define JSON_HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(JSON_HEDLEY_CONCAT3) - #undef JSON_HEDLEY_CONCAT3 -#endif -#define JSON_HEDLEY_CONCAT3(a,b,c) JSON_HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(JSON_HEDLEY_VERSION_ENCODE) - #undef JSON_HEDLEY_VERSION_ENCODE -#endif -#define JSON_HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(JSON_HEDLEY_VERSION_DECODE_MAJOR) - #undef JSON_HEDLEY_VERSION_DECODE_MAJOR -#endif -#define JSON_HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(JSON_HEDLEY_VERSION_DECODE_MINOR) - #undef JSON_HEDLEY_VERSION_DECODE_MINOR -#endif -#define JSON_HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(JSON_HEDLEY_VERSION_DECODE_REVISION) - #undef JSON_HEDLEY_VERSION_DECODE_REVISION -#endif -#define JSON_HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(JSON_HEDLEY_GNUC_VERSION) - #undef JSON_HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) - #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) - #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(JSON_HEDLEY_GNUC_VERSION_CHECK) - #undef JSON_HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_GNUC_VERSION) - #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GNUC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_MSVC_VERSION) - #undef JSON_HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) - #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) - #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) - #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(JSON_HEDLEY_MSVC_VERSION_CHECK) - #undef JSON_HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(JSON_HEDLEY_MSVC_VERSION) - #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) - #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) - #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else - #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(JSON_HEDLEY_INTEL_VERSION) - #undef JSON_HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) - #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) - #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(JSON_HEDLEY_INTEL_VERSION_CHECK) - #undef JSON_HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_INTEL_VERSION) - #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_INTEL_CL_VERSION) - #undef JSON_HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) - #define JSON_HEDLEY_INTEL_CL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(JSON_HEDLEY_INTEL_CL_VERSION_CHECK) - #undef JSON_HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_INTEL_CL_VERSION) - #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_CL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_PGI_VERSION) - #undef JSON_HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) - #define JSON_HEDLEY_PGI_VERSION JSON_HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(JSON_HEDLEY_PGI_VERSION_CHECK) - #undef JSON_HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_PGI_VERSION) - #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PGI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_SUNPRO_VERSION) - #undef JSON_HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) - #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) - #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) - #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) - #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(JSON_HEDLEY_SUNPRO_VERSION_CHECK) - #undef JSON_HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_SUNPRO_VERSION) - #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_SUNPRO_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION) - #undef JSON_HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) - #define JSON_HEDLEY_EMSCRIPTEN_VERSION JSON_HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK) - #undef JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION) - #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_EMSCRIPTEN_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_ARM_VERSION) - #undef JSON_HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) - #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) - #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(JSON_HEDLEY_ARM_VERSION_CHECK) - #undef JSON_HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_ARM_VERSION) - #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_ARM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_IBM_VERSION) - #undef JSON_HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) - #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) - #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) - #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(JSON_HEDLEY_IBM_VERSION_CHECK) - #undef JSON_HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_IBM_VERSION) - #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IBM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_TI_VERSION) - #undef JSON_HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -#if (__TI_COMPILER_VERSION__ >= 16000000) - #define JSON_HEDLEY_TI_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif -#endif - -#if defined(JSON_HEDLEY_TI_VERSION_CHECK) - #undef JSON_HEDLEY_TI_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_TI_VERSION) - #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_TI_CL2000_VERSION) - #undef JSON_HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) - #define JSON_HEDLEY_TI_CL2000_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(JSON_HEDLEY_TI_CL2000_VERSION_CHECK) - #undef JSON_HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_TI_CL2000_VERSION) - #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL2000_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_TI_CL430_VERSION) - #undef JSON_HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) - #define JSON_HEDLEY_TI_CL430_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(JSON_HEDLEY_TI_CL430_VERSION_CHECK) - #undef JSON_HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_TI_CL430_VERSION) - #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL430_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_TI_ARMCL_VERSION) - #undef JSON_HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) - #define JSON_HEDLEY_TI_ARMCL_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(JSON_HEDLEY_TI_ARMCL_VERSION_CHECK) - #undef JSON_HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_TI_ARMCL_VERSION) - #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_ARMCL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_TI_CL6X_VERSION) - #undef JSON_HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) - #define JSON_HEDLEY_TI_CL6X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(JSON_HEDLEY_TI_CL6X_VERSION_CHECK) - #undef JSON_HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_TI_CL6X_VERSION) - #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL6X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_TI_CL7X_VERSION) - #undef JSON_HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) - #define JSON_HEDLEY_TI_CL7X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(JSON_HEDLEY_TI_CL7X_VERSION_CHECK) - #undef JSON_HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_TI_CL7X_VERSION) - #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL7X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_TI_CLPRU_VERSION) - #undef JSON_HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) - #define JSON_HEDLEY_TI_CLPRU_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(JSON_HEDLEY_TI_CLPRU_VERSION_CHECK) - #undef JSON_HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_TI_CLPRU_VERSION) - #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CLPRU_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_CRAY_VERSION) - #undef JSON_HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) - #if defined(_RELEASE_PATCHLEVEL) - #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) - #else - #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) - #endif -#endif - -#if defined(JSON_HEDLEY_CRAY_VERSION_CHECK) - #undef JSON_HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_CRAY_VERSION) - #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_CRAY_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_IAR_VERSION) - #undef JSON_HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) - #if __VER__ > 1000 - #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) - #else - #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) - #endif -#endif - -#if defined(JSON_HEDLEY_IAR_VERSION_CHECK) - #undef JSON_HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_IAR_VERSION) - #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IAR_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_TINYC_VERSION) - #undef JSON_HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) - #define JSON_HEDLEY_TINYC_VERSION JSON_HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(JSON_HEDLEY_TINYC_VERSION_CHECK) - #undef JSON_HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_TINYC_VERSION) - #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TINYC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_DMC_VERSION) - #undef JSON_HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) - #define JSON_HEDLEY_DMC_VERSION JSON_HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(JSON_HEDLEY_DMC_VERSION_CHECK) - #undef JSON_HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_DMC_VERSION) - #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_DMC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_COMPCERT_VERSION) - #undef JSON_HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) - #define JSON_HEDLEY_COMPCERT_VERSION JSON_HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(JSON_HEDLEY_COMPCERT_VERSION_CHECK) - #undef JSON_HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_COMPCERT_VERSION) - #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_COMPCERT_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_PELLES_VERSION) - #undef JSON_HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) - #define JSON_HEDLEY_PELLES_VERSION JSON_HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(JSON_HEDLEY_PELLES_VERSION_CHECK) - #undef JSON_HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_PELLES_VERSION) - #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PELLES_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_MCST_LCC_VERSION) - #undef JSON_HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) - #define JSON_HEDLEY_MCST_LCC_VERSION JSON_HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(JSON_HEDLEY_MCST_LCC_VERSION_CHECK) - #undef JSON_HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_MCST_LCC_VERSION) - #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_MCST_LCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_GCC_VERSION) - #undef JSON_HEDLEY_GCC_VERSION -#endif -#if \ - defined(JSON_HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(JSON_HEDLEY_INTEL_VERSION) && \ - !defined(JSON_HEDLEY_PGI_VERSION) && \ - !defined(JSON_HEDLEY_ARM_VERSION) && \ - !defined(JSON_HEDLEY_CRAY_VERSION) && \ - !defined(JSON_HEDLEY_TI_VERSION) && \ - !defined(JSON_HEDLEY_TI_ARMCL_VERSION) && \ - !defined(JSON_HEDLEY_TI_CL430_VERSION) && \ - !defined(JSON_HEDLEY_TI_CL2000_VERSION) && \ - !defined(JSON_HEDLEY_TI_CL6X_VERSION) && \ - !defined(JSON_HEDLEY_TI_CL7X_VERSION) && \ - !defined(JSON_HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(JSON_HEDLEY_MCST_LCC_VERSION) - #define JSON_HEDLEY_GCC_VERSION JSON_HEDLEY_GNUC_VERSION -#endif - -#if defined(JSON_HEDLEY_GCC_VERSION_CHECK) - #undef JSON_HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_GCC_VERSION) - #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_HAS_ATTRIBUTE) - #undef JSON_HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(JSON_HEDLEY_IAR_VERSION) || JSON_HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(JSON_HEDLEY_GNUC_HAS_ATTRIBUTE) - #undef JSON_HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) - #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) -#else - #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_GCC_HAS_ATTRIBUTE) - #undef JSON_HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) - #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) -#else - #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE) - #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) - #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else - #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS) - #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) - #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(JSON_HEDLEY_PGI_VERSION) && \ - !defined(JSON_HEDLEY_IAR_VERSION) && \ - (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(JSON_HEDLEY_MSVC_VERSION) || JSON_HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else - #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) - #undef JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) - #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else - #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE) - #undef JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) - #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else - #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_HAS_BUILTIN) - #undef JSON_HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) - #define JSON_HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else - #define JSON_HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(JSON_HEDLEY_GNUC_HAS_BUILTIN) - #undef JSON_HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) - #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else - #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_GCC_HAS_BUILTIN) - #undef JSON_HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) - #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else - #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_HAS_FEATURE) - #undef JSON_HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) - #define JSON_HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else - #define JSON_HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(JSON_HEDLEY_GNUC_HAS_FEATURE) - #undef JSON_HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) - #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else - #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_GCC_HAS_FEATURE) - #undef JSON_HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) - #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else - #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_HAS_EXTENSION) - #undef JSON_HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) - #define JSON_HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else - #define JSON_HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(JSON_HEDLEY_GNUC_HAS_EXTENSION) - #undef JSON_HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) - #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else - #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_GCC_HAS_EXTENSION) - #undef JSON_HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) - #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else - #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE) - #undef JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) - #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else - #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) - #undef JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) - #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else - #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) - #undef JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) - #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else - #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_HAS_WARNING) - #undef JSON_HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) - #define JSON_HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else - #define JSON_HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(JSON_HEDLEY_GNUC_HAS_WARNING) - #undef JSON_HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) - #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else - #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_GCC_HAS_WARNING) - #undef JSON_HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) - #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else - #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) - #define JSON_HEDLEY_PRAGMA(value) _Pragma(#value) -#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) - #define JSON_HEDLEY_PRAGMA(value) __pragma(value) -#else - #define JSON_HEDLEY_PRAGMA(value) -#endif - -#if defined(JSON_HEDLEY_DIAGNOSTIC_PUSH) - #undef JSON_HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(JSON_HEDLEY_DIAGNOSTIC_POP) - #undef JSON_HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) - #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") - #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") - #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) - #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") - #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) - #define JSON_HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) - #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") - #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) - #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") - #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0) - #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") - #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else - #define JSON_HEDLEY_DIAGNOSTIC_PUSH - #define JSON_HEDLEY_DIAGNOSTIC_POP -#endif - -/* JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) - #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat") -# if JSON_HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if JSON_HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - JSON_HEDLEY_DIAGNOSTIC_POP -# else -# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - JSON_HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - JSON_HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(JSON_HEDLEY_CONST_CAST) - #undef JSON_HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define JSON_HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - JSON_HEDLEY_HAS_WARNING("-Wcast-qual") || \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define JSON_HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - JSON_HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define JSON_HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(JSON_HEDLEY_REINTERPRET_CAST) - #undef JSON_HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) - #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else - #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(JSON_HEDLEY_STATIC_CAST) - #undef JSON_HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) - #define JSON_HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else - #define JSON_HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(JSON_HEDLEY_CPP_CAST) - #undef JSON_HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if JSON_HEDLEY_HAS_WARNING("-Wold-style-cast") -# define JSON_HEDLEY_CPP_CAST(T, expr) \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - JSON_HEDLEY_DIAGNOSTIC_POP -# elif JSON_HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define JSON_HEDLEY_CPP_CAST(T, expr) \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - JSON_HEDLEY_DIAGNOSTIC_POP -# else -# define JSON_HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define JSON_HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) - #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if JSON_HEDLEY_HAS_WARNING("-Wdeprecated-declarations") - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) - #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - JSON_HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) - #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if JSON_HEDLEY_HAS_WARNING("-Wunknown-attributes") - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) - #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if JSON_HEDLEY_HAS_WARNING("-Wcast-qual") - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) - #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if JSON_HEDLEY_HAS_WARNING("-Wunused-function") - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif JSON_HEDLEY_MSVC_VERSION_CHECK(1,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(JSON_HEDLEY_DEPRECATED) - #undef JSON_HEDLEY_DEPRECATED -#endif -#if defined(JSON_HEDLEY_DEPRECATED_FOR) - #undef JSON_HEDLEY_DEPRECATED_FOR -#endif -#if \ - JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) - #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (JSON_HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(JSON_HEDLEY_IAR_VERSION)) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) - #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) - #define JSON_HEDLEY_DEPRECATED(since) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) - #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - JSON_HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) - #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) - #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - JSON_HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated) - #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) - #define JSON_HEDLEY_DEPRECATED(since) _Pragma("deprecated") - #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else - #define JSON_HEDLEY_DEPRECATED(since) - #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(JSON_HEDLEY_UNAVAILABLE) - #undef JSON_HEDLEY_UNAVAILABLE -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(warning) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else - #define JSON_HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT) - #undef JSON_HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT_MSG) - #undef JSON_HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) - #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) - #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) - #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) - #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) - #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ - #define JSON_HEDLEY_WARN_UNUSED_RESULT _Check_return_ - #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else - #define JSON_HEDLEY_WARN_UNUSED_RESULT - #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(JSON_HEDLEY_SENTINEL) - #undef JSON_HEDLEY_SENTINEL -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else - #define JSON_HEDLEY_SENTINEL(position) -#endif - -#if defined(JSON_HEDLEY_NO_RETURN) - #undef JSON_HEDLEY_NO_RETURN -#endif -#if JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) - #define JSON_HEDLEY_NO_RETURN __noreturn -#elif \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L - #define JSON_HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) - #define JSON_HEDLEY_NO_RETURN JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - JSON_HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) - #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) - #define JSON_HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_NO_RETURN __declspec(noreturn) -#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) - #define JSON_HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) - #define JSON_HEDLEY_NO_RETURN __attribute((noreturn)) -#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0) - #define JSON_HEDLEY_NO_RETURN __declspec(noreturn) -#else - #define JSON_HEDLEY_NO_RETURN -#endif - -#if defined(JSON_HEDLEY_NO_ESCAPE) - #undef JSON_HEDLEY_NO_ESCAPE -#endif -#if JSON_HEDLEY_HAS_ATTRIBUTE(noescape) - #define JSON_HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else - #define JSON_HEDLEY_NO_ESCAPE -#endif - -#if defined(JSON_HEDLEY_UNREACHABLE) - #undef JSON_HEDLEY_UNREACHABLE -#endif -#if defined(JSON_HEDLEY_UNREACHABLE_RETURN) - #undef JSON_HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(JSON_HEDLEY_ASSUME) - #undef JSON_HEDLEY_ASSUME -#endif -#if \ - JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_ASSUME(expr) __assume(expr) -#elif JSON_HEDLEY_HAS_BUILTIN(__builtin_assume) - #define JSON_HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) - #if defined(__cplusplus) - #define JSON_HEDLEY_ASSUME(expr) std::_nassert(expr) - #else - #define JSON_HEDLEY_ASSUME(expr) _nassert(expr) - #endif -#endif -#if \ - (JSON_HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(JSON_HEDLEY_ARM_VERSION))) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - JSON_HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - JSON_HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(JSON_HEDLEY_ASSUME) - #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0) -#endif -#if !defined(JSON_HEDLEY_ASSUME) - #if defined(JSON_HEDLEY_UNREACHABLE) - #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (JSON_HEDLEY_UNREACHABLE(), 1))) - #else - #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, expr) - #endif -#endif -#if defined(JSON_HEDLEY_UNREACHABLE) - #if \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) - #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (JSON_HEDLEY_STATIC_CAST(void, JSON_HEDLEY_ASSUME(0)), (value)) - #else - #define JSON_HEDLEY_UNREACHABLE_RETURN(value) JSON_HEDLEY_UNREACHABLE() - #endif -#else - #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(JSON_HEDLEY_UNREACHABLE) - #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0) -#endif - -JSON_HEDLEY_DIAGNOSTIC_PUSH -#if JSON_HEDLEY_HAS_WARNING("-Wpedantic") - #pragma clang diagnostic ignored "-Wpedantic" -#endif -#if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) - #pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if JSON_HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) - #if defined(__clang__) - #pragma clang diagnostic ignored "-Wvariadic-macros" - #elif defined(JSON_HEDLEY_GCC_VERSION) - #pragma GCC diagnostic ignored "-Wvariadic-macros" - #endif -#endif -#if defined(JSON_HEDLEY_NON_NULL) - #undef JSON_HEDLEY_NON_NULL -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define JSON_HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else - #define JSON_HEDLEY_NON_NULL(...) -#endif -JSON_HEDLEY_DIAGNOSTIC_POP - -#if defined(JSON_HEDLEY_PRINTF_FORMAT) - #undef JSON_HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) - #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) - #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - JSON_HEDLEY_HAS_ATTRIBUTE(format) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif JSON_HEDLEY_PELLES_VERSION_CHECK(6,0,0) - #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else - #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(JSON_HEDLEY_CONSTEXPR) - #undef JSON_HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) - #if __cplusplus >= 201103L - #define JSON_HEDLEY_CONSTEXPR JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) - #endif -#endif -#if !defined(JSON_HEDLEY_CONSTEXPR) - #define JSON_HEDLEY_CONSTEXPR -#endif - -#if defined(JSON_HEDLEY_PREDICT) - #undef JSON_HEDLEY_PREDICT -#endif -#if defined(JSON_HEDLEY_LIKELY) - #undef JSON_HEDLEY_LIKELY -#endif -#if defined(JSON_HEDLEY_UNLIKELY) - #undef JSON_HEDLEY_UNLIKELY -#endif -#if defined(JSON_HEDLEY_UNPREDICTABLE) - #undef JSON_HEDLEY_UNPREDICTABLE -#endif -#if JSON_HEDLEY_HAS_BUILTIN(__builtin_unpredictable) - #define JSON_HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(JSON_HEDLEY_PGI_VERSION)) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define JSON_HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define JSON_HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define JSON_HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (JSON_HEDLEY_STATIC_CAST(void, expected), (expr))) -# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define JSON_HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define JSON_HEDLEY_PREDICT(expr, expected, probability) (JSON_HEDLEY_STATIC_CAST(void, expected), (expr)) -# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define JSON_HEDLEY_LIKELY(expr) (!!(expr)) -# define JSON_HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(JSON_HEDLEY_UNPREDICTABLE) - #define JSON_HEDLEY_UNPREDICTABLE(expr) JSON_HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(JSON_HEDLEY_MALLOC) - #undef JSON_HEDLEY_MALLOC -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(malloc) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_MALLOC __attribute__((__malloc__)) -#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) - #define JSON_HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_MALLOC __declspec(restrict) -#else - #define JSON_HEDLEY_MALLOC -#endif - -#if defined(JSON_HEDLEY_PURE) - #undef JSON_HEDLEY_PURE -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(pure) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define JSON_HEDLEY_PURE __attribute__((__pure__)) -#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define JSON_HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define JSON_HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define JSON_HEDLEY_PURE -#endif - -#if defined(JSON_HEDLEY_CONST) - #undef JSON_HEDLEY_CONST -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(const) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_CONST __attribute__((__const__)) -#elif \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) - #define JSON_HEDLEY_CONST _Pragma("no_side_effect") -#else - #define JSON_HEDLEY_CONST JSON_HEDLEY_PURE -#endif - -#if defined(JSON_HEDLEY_RESTRICT) - #undef JSON_HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) - #define JSON_HEDLEY_RESTRICT restrict -#elif \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_RESTRICT __restrict -#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) - #define JSON_HEDLEY_RESTRICT _Restrict -#else - #define JSON_HEDLEY_RESTRICT -#endif - -#if defined(JSON_HEDLEY_INLINE) - #undef JSON_HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) - #define JSON_HEDLEY_INLINE inline -#elif \ - defined(JSON_HEDLEY_GCC_VERSION) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(6,2,0) - #define JSON_HEDLEY_INLINE __inline__ -#elif \ - JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_INLINE __inline -#else - #define JSON_HEDLEY_INLINE -#endif - -#if defined(JSON_HEDLEY_ALWAYS_INLINE) - #undef JSON_HEDLEY_ALWAYS_INLINE -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define JSON_HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) JSON_HEDLEY_INLINE -#elif \ - JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define JSON_HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define JSON_HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define JSON_HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define JSON_HEDLEY_ALWAYS_INLINE JSON_HEDLEY_INLINE -#endif - -#if defined(JSON_HEDLEY_NEVER_INLINE) - #undef JSON_HEDLEY_NEVER_INLINE -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(noinline) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) - #define JSON_HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline) -#elif JSON_HEDLEY_PGI_VERSION_CHECK(10,2,0) - #define JSON_HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) - #define JSON_HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) - #define JSON_HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) - #define JSON_HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0) - #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline) -#else - #define JSON_HEDLEY_NEVER_INLINE -#endif - -#if defined(JSON_HEDLEY_PRIVATE) - #undef JSON_HEDLEY_PRIVATE -#endif -#if defined(JSON_HEDLEY_PUBLIC) - #undef JSON_HEDLEY_PUBLIC -#endif -#if defined(JSON_HEDLEY_IMPORT) - #undef JSON_HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define JSON_HEDLEY_PRIVATE -# define JSON_HEDLEY_PUBLIC __declspec(dllexport) -# define JSON_HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - JSON_HEDLEY_HAS_ATTRIBUTE(visibility) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define JSON_HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define JSON_HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define JSON_HEDLEY_PRIVATE -# define JSON_HEDLEY_PUBLIC -# endif -# define JSON_HEDLEY_IMPORT extern -#endif - -#if defined(JSON_HEDLEY_NO_THROW) - #undef JSON_HEDLEY_NO_THROW -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - JSON_HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define JSON_HEDLEY_NO_THROW __declspec(nothrow) -#else - #define JSON_HEDLEY_NO_THROW -#endif - -#if defined(JSON_HEDLEY_FALL_THROUGH) - #undef JSON_HEDLEY_FALL_THROUGH -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) - #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) - #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ - #define JSON_HEDLEY_FALL_THROUGH __fallthrough -#else - #define JSON_HEDLEY_FALL_THROUGH -#endif - -#if defined(JSON_HEDLEY_RETURNS_NON_NULL) - #undef JSON_HEDLEY_RETURNS_NON_NULL -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ - #define JSON_HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else - #define JSON_HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(JSON_HEDLEY_ARRAY_PARAM) - #undef JSON_HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(JSON_HEDLEY_PGI_VERSION) && \ - !defined(JSON_HEDLEY_TINYC_VERSION) - #define JSON_HEDLEY_ARRAY_PARAM(name) (name) -#else - #define JSON_HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(JSON_HEDLEY_IS_CONSTANT) - #undef JSON_HEDLEY_IS_CONSTANT -#endif -#if defined(JSON_HEDLEY_REQUIRE_CONSTEXPR) - #undef JSON_HEDLEY_REQUIRE_CONSTEXPR -#endif -/* JSON_HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(JSON_HEDLEY_IS_CONSTEXPR_) - #undef JSON_HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - JSON_HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - JSON_HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,24) -#if defined(__INTPTR_TYPE__) - #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -#else - #include - #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -#endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(JSON_HEDLEY_SUNPRO_VERSION) && \ - !defined(JSON_HEDLEY_PGI_VERSION) && \ - !defined(JSON_HEDLEY_IAR_VERSION)) || \ - (JSON_HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(JSON_HEDLEY_IAR_VERSION)) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(5,3,0) -#if defined(__INTPTR_TYPE__) - #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -#else - #include - #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -#endif -# elif \ - defined(JSON_HEDLEY_GCC_VERSION) || \ - defined(JSON_HEDLEY_INTEL_VERSION) || \ - defined(JSON_HEDLEY_TINYC_VERSION) || \ - defined(JSON_HEDLEY_TI_ARMCL_VERSION) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(JSON_HEDLEY_TI_CL2000_VERSION) || \ - defined(JSON_HEDLEY_TI_CL6X_VERSION) || \ - defined(JSON_HEDLEY_TI_CL7X_VERSION) || \ - defined(JSON_HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define JSON_HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ -((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(JSON_HEDLEY_IS_CONSTEXPR_) - #if !defined(JSON_HEDLEY_IS_CONSTANT) - #define JSON_HEDLEY_IS_CONSTANT(expr) JSON_HEDLEY_IS_CONSTEXPR_(expr) - #endif - #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (JSON_HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else - #if !defined(JSON_HEDLEY_IS_CONSTANT) - #define JSON_HEDLEY_IS_CONSTANT(expr) (0) - #endif - #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(JSON_HEDLEY_BEGIN_C_DECLS) - #undef JSON_HEDLEY_BEGIN_C_DECLS -#endif -#if defined(JSON_HEDLEY_END_C_DECLS) - #undef JSON_HEDLEY_END_C_DECLS -#endif -#if defined(JSON_HEDLEY_C_DECL) - #undef JSON_HEDLEY_C_DECL -#endif -#if defined(__cplusplus) - #define JSON_HEDLEY_BEGIN_C_DECLS extern "C" { - #define JSON_HEDLEY_END_C_DECLS } - #define JSON_HEDLEY_C_DECL extern "C" -#else - #define JSON_HEDLEY_BEGIN_C_DECLS - #define JSON_HEDLEY_END_C_DECLS - #define JSON_HEDLEY_C_DECL -#endif - -#if defined(JSON_HEDLEY_STATIC_ASSERT) - #undef JSON_HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (JSON_HEDLEY_HAS_FEATURE(c_static_assert) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define JSON_HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - JSON_HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define JSON_HEDLEY_STATIC_ASSERT(expr, message) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define JSON_HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(JSON_HEDLEY_NULL) - #undef JSON_HEDLEY_NULL -#endif -#if defined(__cplusplus) - #if __cplusplus >= 201103L - #define JSON_HEDLEY_NULL JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) - #elif defined(NULL) - #define JSON_HEDLEY_NULL NULL - #else - #define JSON_HEDLEY_NULL JSON_HEDLEY_STATIC_CAST(void*, 0) - #endif -#elif defined(NULL) - #define JSON_HEDLEY_NULL NULL -#else - #define JSON_HEDLEY_NULL ((void*) 0) -#endif - -#if defined(JSON_HEDLEY_MESSAGE) - #undef JSON_HEDLEY_MESSAGE -#endif -#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define JSON_HEDLEY_MESSAGE(msg) \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - JSON_HEDLEY_PRAGMA(message msg) \ - JSON_HEDLEY_DIAGNOSTIC_POP -#elif \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message msg) -#elif JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(_CRI message msg) -#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg)) -#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg)) -#else -# define JSON_HEDLEY_MESSAGE(msg) -#endif - -#if defined(JSON_HEDLEY_WARNING) - #undef JSON_HEDLEY_WARNING -#endif -#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define JSON_HEDLEY_WARNING(msg) \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - JSON_HEDLEY_PRAGMA(clang warning msg) \ - JSON_HEDLEY_DIAGNOSTIC_POP -#elif \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(GCC warning msg) -#elif \ - JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(message(msg)) -#else -# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_MESSAGE(msg) -#endif - -#if defined(JSON_HEDLEY_REQUIRE) - #undef JSON_HEDLEY_REQUIRE -#endif -#if defined(JSON_HEDLEY_REQUIRE_MSG) - #undef JSON_HEDLEY_REQUIRE_MSG -#endif -#if JSON_HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if JSON_HEDLEY_HAS_WARNING("-Wgcc-compat") -# define JSON_HEDLEY_REQUIRE(expr) \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - JSON_HEDLEY_DIAGNOSTIC_POP -# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - JSON_HEDLEY_DIAGNOSTIC_POP -# else -# define JSON_HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define JSON_HEDLEY_REQUIRE(expr) -# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(JSON_HEDLEY_FLAGS) - #undef JSON_HEDLEY_FLAGS -#endif -#if JSON_HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || JSON_HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) - #define JSON_HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else - #define JSON_HEDLEY_FLAGS -#endif - -#if defined(JSON_HEDLEY_FLAGS_CAST) - #undef JSON_HEDLEY_FLAGS_CAST -#endif -#if JSON_HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define JSON_HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - JSON_HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define JSON_HEDLEY_FLAGS_CAST(T, expr) JSON_HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(JSON_HEDLEY_EMPTY_BASES) - #undef JSON_HEDLEY_EMPTY_BASES -#endif -#if \ - (JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !JSON_HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else - #define JSON_HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) - #undef JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) - #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else - #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_CLANG_HAS_ATTRIBUTE) - #undef JSON_HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define JSON_HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) - #undef JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(JSON_HEDLEY_CLANG_HAS_BUILTIN) - #undef JSON_HEDLEY_CLANG_HAS_BUILTIN -#endif -#define JSON_HEDLEY_CLANG_HAS_BUILTIN(builtin) JSON_HEDLEY_HAS_BUILTIN(builtin) - -#if defined(JSON_HEDLEY_CLANG_HAS_FEATURE) - #undef JSON_HEDLEY_CLANG_HAS_FEATURE -#endif -#define JSON_HEDLEY_CLANG_HAS_FEATURE(feature) JSON_HEDLEY_HAS_FEATURE(feature) - -#if defined(JSON_HEDLEY_CLANG_HAS_EXTENSION) - #undef JSON_HEDLEY_CLANG_HAS_EXTENSION -#endif -#define JSON_HEDLEY_CLANG_HAS_EXTENSION(extension) JSON_HEDLEY_HAS_EXTENSION(extension) - -#if defined(JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) - #undef JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define JSON_HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(JSON_HEDLEY_CLANG_HAS_WARNING) - #undef JSON_HEDLEY_CLANG_HAS_WARNING -#endif -#define JSON_HEDLEY_CLANG_HAS_WARNING(warning) JSON_HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < X) */ - - -// This file contains all internal macro definitions (except those affecting ABI) -// You MUST include macro_unscope.hpp at the end of json.hpp to undef all of them - -// #include - - -// exclude unsupported compilers -#if !defined(JSON_SKIP_UNSUPPORTED_COMPILER_CHECK) - #if defined(__clang__) - #if (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) < 30400 - #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers" - #endif - #elif defined(__GNUC__) && !(defined(__ICC) || defined(__INTEL_COMPILER)) - #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40800 - #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers" - #endif - #endif -#endif - -// C++ language standard detection -// if the user manually specified the used c++ version this is skipped -#if !defined(JSON_HAS_CPP_20) && !defined(JSON_HAS_CPP_17) && !defined(JSON_HAS_CPP_14) && !defined(JSON_HAS_CPP_11) - #if (defined(__cplusplus) && __cplusplus >= 202002L) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) - #define JSON_HAS_CPP_20 - #define JSON_HAS_CPP_17 - #define JSON_HAS_CPP_14 - #elif (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464 - #define JSON_HAS_CPP_17 - #define JSON_HAS_CPP_14 - #elif (defined(__cplusplus) && __cplusplus >= 201402L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1) - #define JSON_HAS_CPP_14 - #endif - // the cpp 11 flag is always specified because it is the minimal required version - #define JSON_HAS_CPP_11 -#endif - -#ifdef __has_include - #if __has_include() - #include - #endif -#endif - -#if !defined(JSON_HAS_FILESYSTEM) && !defined(JSON_HAS_EXPERIMENTAL_FILESYSTEM) - #ifdef JSON_HAS_CPP_17 - #if defined(__cpp_lib_filesystem) - #define JSON_HAS_FILESYSTEM 1 - #elif defined(__cpp_lib_experimental_filesystem) - #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 - #elif !defined(__has_include) - #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 - #elif __has_include() - #define JSON_HAS_FILESYSTEM 1 - #elif __has_include() - #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 - #endif - - // std::filesystem does not work on MinGW GCC 8: https://sourceforge.net/p/mingw-w64/bugs/737/ - #if defined(__MINGW32__) && defined(__GNUC__) && __GNUC__ == 8 - #undef JSON_HAS_FILESYSTEM - #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM - #endif - - // no filesystem support before GCC 8: https://en.cppreference.com/w/cpp/compiler_support - #if defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 8 - #undef JSON_HAS_FILESYSTEM - #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM - #endif - - // no filesystem support before Clang 7: https://en.cppreference.com/w/cpp/compiler_support - #if defined(__clang_major__) && __clang_major__ < 7 - #undef JSON_HAS_FILESYSTEM - #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM - #endif - - // no filesystem support before MSVC 19.14: https://en.cppreference.com/w/cpp/compiler_support - #if defined(_MSC_VER) && _MSC_VER < 1914 - #undef JSON_HAS_FILESYSTEM - #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM - #endif - - // no filesystem support before iOS 13 - #if defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED < 130000 - #undef JSON_HAS_FILESYSTEM - #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM - #endif - - // no filesystem support before macOS Catalina - #if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED < 101500 - #undef JSON_HAS_FILESYSTEM - #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM - #endif - #endif -#endif - -#ifndef JSON_HAS_EXPERIMENTAL_FILESYSTEM - #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 0 -#endif - -#ifndef JSON_HAS_FILESYSTEM - #define JSON_HAS_FILESYSTEM 0 -#endif - -#ifndef JSON_HAS_THREE_WAY_COMPARISON - #if defined(__cpp_impl_three_way_comparison) && __cpp_impl_three_way_comparison >= 201907L \ - && defined(__cpp_lib_three_way_comparison) && __cpp_lib_three_way_comparison >= 201907L - #define JSON_HAS_THREE_WAY_COMPARISON 1 - #else - #define JSON_HAS_THREE_WAY_COMPARISON 0 - #endif -#endif - -#ifndef JSON_HAS_RANGES - // ranges header shipping in GCC 11.1.0 (released 2021-04-27) has syntax error - #if defined(__GLIBCXX__) && __GLIBCXX__ == 20210427 - #define JSON_HAS_RANGES 0 - #elif defined(__cpp_lib_ranges) - #define JSON_HAS_RANGES 1 - #else - #define JSON_HAS_RANGES 0 - #endif -#endif - -#ifndef JSON_HAS_STATIC_RTTI - #if !defined(_HAS_STATIC_RTTI) || _HAS_STATIC_RTTI != 0 - #define JSON_HAS_STATIC_RTTI 1 - #else - #define JSON_HAS_STATIC_RTTI 0 - #endif -#endif - -#ifdef JSON_HAS_CPP_17 - #define JSON_INLINE_VARIABLE inline -#else - #define JSON_INLINE_VARIABLE -#endif - -#if JSON_HEDLEY_HAS_ATTRIBUTE(no_unique_address) - #define JSON_NO_UNIQUE_ADDRESS [[no_unique_address]] -#else - #define JSON_NO_UNIQUE_ADDRESS -#endif - -// disable documentation warnings on clang -#if defined(__clang__) - #pragma clang diagnostic push - #pragma clang diagnostic ignored "-Wdocumentation" - #pragma clang diagnostic ignored "-Wdocumentation-unknown-command" -#endif - -// allow disabling exceptions -#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && !defined(JSON_NOEXCEPTION) - #define JSON_THROW(exception) throw exception - #define JSON_TRY try - #define JSON_CATCH(exception) catch(exception) - #define JSON_INTERNAL_CATCH(exception) catch(exception) -#else - #include - #define JSON_THROW(exception) std::abort() - #define JSON_TRY if(true) - #define JSON_CATCH(exception) if(false) - #define JSON_INTERNAL_CATCH(exception) if(false) -#endif - -// override exception macros -#if defined(JSON_THROW_USER) - #undef JSON_THROW - #define JSON_THROW JSON_THROW_USER -#endif -#if defined(JSON_TRY_USER) - #undef JSON_TRY - #define JSON_TRY JSON_TRY_USER -#endif -#if defined(JSON_CATCH_USER) - #undef JSON_CATCH - #define JSON_CATCH JSON_CATCH_USER - #undef JSON_INTERNAL_CATCH - #define JSON_INTERNAL_CATCH JSON_CATCH_USER -#endif -#if defined(JSON_INTERNAL_CATCH_USER) - #undef JSON_INTERNAL_CATCH - #define JSON_INTERNAL_CATCH JSON_INTERNAL_CATCH_USER -#endif - -// allow overriding assert -#if !defined(JSON_ASSERT) - #include // assert - #define JSON_ASSERT(x) assert(x) -#endif - -// allow to access some private functions (needed by the test suite) -#if defined(JSON_TESTS_PRIVATE) - #define JSON_PRIVATE_UNLESS_TESTED public -#else - #define JSON_PRIVATE_UNLESS_TESTED private -#endif - -/*! -@brief macro to briefly define a mapping between an enum and JSON -@def NLOHMANN_JSON_SERIALIZE_ENUM -@since version 3.4.0 -*/ -#define NLOHMANN_JSON_SERIALIZE_ENUM(ENUM_TYPE, ...) \ - template \ - inline void to_json(BasicJsonType& j, const ENUM_TYPE& e) \ - { \ - static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ - static const std::pair m[] = __VA_ARGS__; \ - auto it = std::find_if(std::begin(m), std::end(m), \ - [e](const std::pair& ej_pair) -> bool \ - { \ - return ej_pair.first == e; \ - }); \ - j = ((it != std::end(m)) ? it : std::begin(m))->second; \ - } \ - template \ - inline void from_json(const BasicJsonType& j, ENUM_TYPE& e) \ - { \ - static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ - static const std::pair m[] = __VA_ARGS__; \ - auto it = std::find_if(std::begin(m), std::end(m), \ - [&j](const std::pair& ej_pair) -> bool \ - { \ - return ej_pair.second == j; \ - }); \ - e = ((it != std::end(m)) ? it : std::begin(m))->first; \ - } - -// Ugly macros to avoid uglier copy-paste when specializing basic_json. They -// may be removed in the future once the class is split. - -#define NLOHMANN_BASIC_JSON_TPL_DECLARATION \ - template class ObjectType, \ - template class ArrayType, \ - class StringType, class BooleanType, class NumberIntegerType, \ - class NumberUnsignedType, class NumberFloatType, \ - template class AllocatorType, \ - template class JSONSerializer, \ - class BinaryType, \ - class CustomBaseClass> - -#define NLOHMANN_BASIC_JSON_TPL \ - basic_json - -// Macros to simplify conversion from/to types - -#define NLOHMANN_JSON_EXPAND( x ) x -#define NLOHMANN_JSON_GET_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, _61, _62, _63, _64, NAME,...) NAME -#define NLOHMANN_JSON_PASTE(...) NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_GET_MACRO(__VA_ARGS__, \ - NLOHMANN_JSON_PASTE64, \ - NLOHMANN_JSON_PASTE63, \ - NLOHMANN_JSON_PASTE62, \ - NLOHMANN_JSON_PASTE61, \ - NLOHMANN_JSON_PASTE60, \ - NLOHMANN_JSON_PASTE59, \ - NLOHMANN_JSON_PASTE58, \ - NLOHMANN_JSON_PASTE57, \ - NLOHMANN_JSON_PASTE56, \ - NLOHMANN_JSON_PASTE55, \ - NLOHMANN_JSON_PASTE54, \ - NLOHMANN_JSON_PASTE53, \ - NLOHMANN_JSON_PASTE52, \ - NLOHMANN_JSON_PASTE51, \ - NLOHMANN_JSON_PASTE50, \ - NLOHMANN_JSON_PASTE49, \ - NLOHMANN_JSON_PASTE48, \ - NLOHMANN_JSON_PASTE47, \ - NLOHMANN_JSON_PASTE46, \ - NLOHMANN_JSON_PASTE45, \ - NLOHMANN_JSON_PASTE44, \ - NLOHMANN_JSON_PASTE43, \ - NLOHMANN_JSON_PASTE42, \ - NLOHMANN_JSON_PASTE41, \ - NLOHMANN_JSON_PASTE40, \ - NLOHMANN_JSON_PASTE39, \ - NLOHMANN_JSON_PASTE38, \ - NLOHMANN_JSON_PASTE37, \ - NLOHMANN_JSON_PASTE36, \ - NLOHMANN_JSON_PASTE35, \ - NLOHMANN_JSON_PASTE34, \ - NLOHMANN_JSON_PASTE33, \ - NLOHMANN_JSON_PASTE32, \ - NLOHMANN_JSON_PASTE31, \ - NLOHMANN_JSON_PASTE30, \ - NLOHMANN_JSON_PASTE29, \ - NLOHMANN_JSON_PASTE28, \ - NLOHMANN_JSON_PASTE27, \ - NLOHMANN_JSON_PASTE26, \ - NLOHMANN_JSON_PASTE25, \ - NLOHMANN_JSON_PASTE24, \ - NLOHMANN_JSON_PASTE23, \ - NLOHMANN_JSON_PASTE22, \ - NLOHMANN_JSON_PASTE21, \ - NLOHMANN_JSON_PASTE20, \ - NLOHMANN_JSON_PASTE19, \ - NLOHMANN_JSON_PASTE18, \ - NLOHMANN_JSON_PASTE17, \ - NLOHMANN_JSON_PASTE16, \ - NLOHMANN_JSON_PASTE15, \ - NLOHMANN_JSON_PASTE14, \ - NLOHMANN_JSON_PASTE13, \ - NLOHMANN_JSON_PASTE12, \ - NLOHMANN_JSON_PASTE11, \ - NLOHMANN_JSON_PASTE10, \ - NLOHMANN_JSON_PASTE9, \ - NLOHMANN_JSON_PASTE8, \ - NLOHMANN_JSON_PASTE7, \ - NLOHMANN_JSON_PASTE6, \ - NLOHMANN_JSON_PASTE5, \ - NLOHMANN_JSON_PASTE4, \ - NLOHMANN_JSON_PASTE3, \ - NLOHMANN_JSON_PASTE2, \ - NLOHMANN_JSON_PASTE1)(__VA_ARGS__)) -#define NLOHMANN_JSON_PASTE2(func, v1) func(v1) -#define NLOHMANN_JSON_PASTE3(func, v1, v2) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE2(func, v2) -#define NLOHMANN_JSON_PASTE4(func, v1, v2, v3) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE3(func, v2, v3) -#define NLOHMANN_JSON_PASTE5(func, v1, v2, v3, v4) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE4(func, v2, v3, v4) -#define NLOHMANN_JSON_PASTE6(func, v1, v2, v3, v4, v5) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE5(func, v2, v3, v4, v5) -#define NLOHMANN_JSON_PASTE7(func, v1, v2, v3, v4, v5, v6) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE6(func, v2, v3, v4, v5, v6) -#define NLOHMANN_JSON_PASTE8(func, v1, v2, v3, v4, v5, v6, v7) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE7(func, v2, v3, v4, v5, v6, v7) -#define NLOHMANN_JSON_PASTE9(func, v1, v2, v3, v4, v5, v6, v7, v8) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE8(func, v2, v3, v4, v5, v6, v7, v8) -#define NLOHMANN_JSON_PASTE10(func, v1, v2, v3, v4, v5, v6, v7, v8, v9) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE9(func, v2, v3, v4, v5, v6, v7, v8, v9) -#define NLOHMANN_JSON_PASTE11(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE10(func, v2, v3, v4, v5, v6, v7, v8, v9, v10) -#define NLOHMANN_JSON_PASTE12(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE11(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) -#define NLOHMANN_JSON_PASTE13(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE12(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) -#define NLOHMANN_JSON_PASTE14(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE13(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) -#define NLOHMANN_JSON_PASTE15(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE14(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) -#define NLOHMANN_JSON_PASTE16(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE15(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) -#define NLOHMANN_JSON_PASTE17(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE16(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) -#define NLOHMANN_JSON_PASTE18(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE17(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) -#define NLOHMANN_JSON_PASTE19(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE18(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) -#define NLOHMANN_JSON_PASTE20(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE19(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) -#define NLOHMANN_JSON_PASTE21(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE20(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) -#define NLOHMANN_JSON_PASTE22(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE21(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) -#define NLOHMANN_JSON_PASTE23(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE22(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22) -#define NLOHMANN_JSON_PASTE24(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE23(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23) -#define NLOHMANN_JSON_PASTE25(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE24(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24) -#define NLOHMANN_JSON_PASTE26(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE25(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) -#define NLOHMANN_JSON_PASTE27(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE26(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) -#define NLOHMANN_JSON_PASTE28(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE27(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27) -#define NLOHMANN_JSON_PASTE29(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE28(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) -#define NLOHMANN_JSON_PASTE30(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE29(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29) -#define NLOHMANN_JSON_PASTE31(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE30(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30) -#define NLOHMANN_JSON_PASTE32(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE31(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31) -#define NLOHMANN_JSON_PASTE33(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE32(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32) -#define NLOHMANN_JSON_PASTE34(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE33(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33) -#define NLOHMANN_JSON_PASTE35(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE34(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34) -#define NLOHMANN_JSON_PASTE36(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE35(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35) -#define NLOHMANN_JSON_PASTE37(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE36(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36) -#define NLOHMANN_JSON_PASTE38(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE37(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37) -#define NLOHMANN_JSON_PASTE39(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE38(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38) -#define NLOHMANN_JSON_PASTE40(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE39(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39) -#define NLOHMANN_JSON_PASTE41(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE40(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40) -#define NLOHMANN_JSON_PASTE42(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE41(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) -#define NLOHMANN_JSON_PASTE43(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE42(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42) -#define NLOHMANN_JSON_PASTE44(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE43(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43) -#define NLOHMANN_JSON_PASTE45(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE44(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44) -#define NLOHMANN_JSON_PASTE46(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE45(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45) -#define NLOHMANN_JSON_PASTE47(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE46(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46) -#define NLOHMANN_JSON_PASTE48(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE47(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47) -#define NLOHMANN_JSON_PASTE49(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE48(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48) -#define NLOHMANN_JSON_PASTE50(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE49(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49) -#define NLOHMANN_JSON_PASTE51(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE50(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50) -#define NLOHMANN_JSON_PASTE52(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE51(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51) -#define NLOHMANN_JSON_PASTE53(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE52(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) -#define NLOHMANN_JSON_PASTE54(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE53(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53) -#define NLOHMANN_JSON_PASTE55(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE54(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54) -#define NLOHMANN_JSON_PASTE56(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE55(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55) -#define NLOHMANN_JSON_PASTE57(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE56(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56) -#define NLOHMANN_JSON_PASTE58(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE57(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57) -#define NLOHMANN_JSON_PASTE59(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE58(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58) -#define NLOHMANN_JSON_PASTE60(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE59(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59) -#define NLOHMANN_JSON_PASTE61(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE60(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60) -#define NLOHMANN_JSON_PASTE62(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE61(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61) -#define NLOHMANN_JSON_PASTE63(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE62(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62) -#define NLOHMANN_JSON_PASTE64(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE63(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) - -#define NLOHMANN_JSON_TO(v1) nlohmann_json_j[#v1] = nlohmann_json_t.v1; -#define NLOHMANN_JSON_FROM(v1) nlohmann_json_j.at(#v1).get_to(nlohmann_json_t.v1); -#define NLOHMANN_JSON_FROM_WITH_DEFAULT(v1) nlohmann_json_t.v1 = nlohmann_json_j.value(#v1, nlohmann_json_default_obj.v1); - -/*! -@brief macro -@def NLOHMANN_DEFINE_TYPE_INTRUSIVE -@since version 3.9.0 -*/ -#define NLOHMANN_DEFINE_TYPE_INTRUSIVE(Type, ...) \ - friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } - -#define NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Type, ...) \ - friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } - -#define NLOHMANN_DEFINE_TYPE_INTRUSIVE_ONLY_SERIALIZE(Type, ...) \ - friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } - -/*! -@brief macro -@def NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE -@since version 3.9.0 -*/ -#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Type, ...) \ - inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } - -#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE(Type, ...) \ - inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } - -#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(Type, ...) \ - inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } - -// inspired from https://stackoverflow.com/a/26745591 -// allows to call any std function as if (e.g. with begin): -// using std::begin; begin(x); -// -// it allows using the detected idiom to retrieve the return type -// of such an expression -#define NLOHMANN_CAN_CALL_STD_FUNC_IMPL(std_name) \ - namespace detail { \ - using std::std_name; \ - \ - template \ - using result_of_##std_name = decltype(std_name(std::declval()...)); \ - } \ - \ - namespace detail2 { \ - struct std_name##_tag \ - { \ - }; \ - \ - template \ - std_name##_tag std_name(T&&...); \ - \ - template \ - using result_of_##std_name = decltype(std_name(std::declval()...)); \ - \ - template \ - struct would_call_std_##std_name \ - { \ - static constexpr auto const value = ::nlohmann::detail:: \ - is_detected_exact::value; \ - }; \ - } /* namespace detail2 */ \ - \ - template \ - struct would_call_std_##std_name : detail2::would_call_std_##std_name \ - { \ - } - -#ifndef JSON_USE_IMPLICIT_CONVERSIONS - #define JSON_USE_IMPLICIT_CONVERSIONS 1 -#endif - -#if JSON_USE_IMPLICIT_CONVERSIONS - #define JSON_EXPLICIT -#else - #define JSON_EXPLICIT explicit -#endif - -#ifndef JSON_DISABLE_ENUM_SERIALIZATION - #define JSON_DISABLE_ENUM_SERIALIZATION 0 -#endif - -#ifndef JSON_USE_GLOBAL_UDLS - #define JSON_USE_GLOBAL_UDLS 1 -#endif - -#if JSON_HAS_THREE_WAY_COMPARISON - #include // partial_ordering -#endif - -NLOHMANN_JSON_NAMESPACE_BEGIN -namespace detail -{ - -/////////////////////////// -// JSON type enumeration // -/////////////////////////// - -/*! -@brief the JSON type enumeration - -This enumeration collects the different JSON types. It is internally used to -distinguish the stored values, and the functions @ref basic_json::is_null(), -@ref basic_json::is_object(), @ref basic_json::is_array(), -@ref basic_json::is_string(), @ref basic_json::is_boolean(), -@ref basic_json::is_number() (with @ref basic_json::is_number_integer(), -@ref basic_json::is_number_unsigned(), and @ref basic_json::is_number_float()), -@ref basic_json::is_discarded(), @ref basic_json::is_primitive(), and -@ref basic_json::is_structured() rely on it. - -@note There are three enumeration entries (number_integer, number_unsigned, and -number_float), because the library distinguishes these three types for numbers: -@ref basic_json::number_unsigned_t is used for unsigned integers, -@ref basic_json::number_integer_t is used for signed integers, and -@ref basic_json::number_float_t is used for floating-point numbers or to -approximate integers which do not fit in the limits of their respective type. - -@sa see @ref basic_json::basic_json(const value_t value_type) -- create a JSON -value with the default value for a given type - -@since version 1.0.0 -*/ -enum class value_t : std::uint8_t -{ - null, ///< null value - object, ///< object (unordered set of name/value pairs) - array, ///< array (ordered collection of values) - string, ///< string value - boolean, ///< boolean value - number_integer, ///< number value (signed integer) - number_unsigned, ///< number value (unsigned integer) - number_float, ///< number value (floating-point) - binary, ///< binary array (ordered collection of bytes) - discarded ///< discarded by the parser callback function -}; - -/*! -@brief comparison operator for JSON types - -Returns an ordering that is similar to Python: -- order: null < boolean < number < object < array < string < binary -- furthermore, each type is not smaller than itself -- discarded values are not comparable -- binary is represented as a b"" string in python and directly comparable to a - string; however, making a binary array directly comparable with a string would - be surprising behavior in a JSON file. - -@since version 1.0.0 -*/ -#if JSON_HAS_THREE_WAY_COMPARISON - inline std::partial_ordering operator<=>(const value_t lhs, const value_t rhs) noexcept // *NOPAD* -#else - inline bool operator<(const value_t lhs, const value_t rhs) noexcept -#endif -{ - static constexpr std::array order = {{ - 0 /* null */, 3 /* object */, 4 /* array */, 5 /* string */, - 1 /* boolean */, 2 /* integer */, 2 /* unsigned */, 2 /* float */, - 6 /* binary */ - } - }; - - const auto l_index = static_cast(lhs); - const auto r_index = static_cast(rhs); -#if JSON_HAS_THREE_WAY_COMPARISON - if (l_index < order.size() && r_index < order.size()) - { - return order[l_index] <=> order[r_index]; // *NOPAD* - } - return std::partial_ordering::unordered; -#else - return l_index < order.size() && r_index < order.size() && order[l_index] < order[r_index]; -#endif -} - -// GCC selects the built-in operator< over an operator rewritten from -// a user-defined spaceship operator -// Clang, MSVC, and ICC select the rewritten candidate -// (see GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105200) -#if JSON_HAS_THREE_WAY_COMPARISON && defined(__GNUC__) -inline bool operator<(const value_t lhs, const value_t rhs) noexcept -{ - return std::is_lt(lhs <=> rhs); // *NOPAD* -} -#endif - -} // namespace detail -NLOHMANN_JSON_NAMESPACE_END - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -// #include - - -NLOHMANN_JSON_NAMESPACE_BEGIN -namespace detail -{ - -/*! -@brief replace all occurrences of a substring by another string - -@param[in,out] s the string to manipulate; changed so that all - occurrences of @a f are replaced with @a t -@param[in] f the substring to replace with @a t -@param[in] t the string to replace @a f - -@pre The search string @a f must not be empty. **This precondition is -enforced with an assertion.** - -@since version 2.0.0 -*/ -template -inline void replace_substring(StringType& s, const StringType& f, - const StringType& t) -{ - JSON_ASSERT(!f.empty()); - for (auto pos = s.find(f); // find first occurrence of f - pos != StringType::npos; // make sure f was found - s.replace(pos, f.size(), t), // replace with t, and - pos = s.find(f, pos + t.size())) // find next occurrence of f - {} -} - -/*! - * @brief string escaping as described in RFC 6901 (Sect. 4) - * @param[in] s string to escape - * @return escaped string - * - * Note the order of escaping "~" to "~0" and "/" to "~1" is important. - */ -template -inline StringType escape(StringType s) -{ - replace_substring(s, StringType{"~"}, StringType{"~0"}); - replace_substring(s, StringType{"/"}, StringType{"~1"}); - return s; -} - -/*! - * @brief string unescaping as described in RFC 6901 (Sect. 4) - * @param[in] s string to unescape - * @return unescaped string - * - * Note the order of escaping "~1" to "/" and "~0" to "~" is important. - */ -template -static void unescape(StringType& s) -{ - replace_substring(s, StringType{"~1"}, StringType{"/"}); - replace_substring(s, StringType{"~0"}, StringType{"~"}); -} - -} // namespace detail -NLOHMANN_JSON_NAMESPACE_END - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -#include // size_t - -// #include - - -NLOHMANN_JSON_NAMESPACE_BEGIN -namespace detail -{ - -/// struct to capture the start position of the current token -struct position_t -{ - /// the total number of characters read - std::size_t chars_read_total = 0; - /// the number of characters read in the current line - std::size_t chars_read_current_line = 0; - /// the number of lines read - std::size_t lines_read = 0; - - /// conversion to size_t to preserve SAX interface - constexpr operator size_t() const - { - return chars_read_total; - } -}; - -} // namespace detail -NLOHMANN_JSON_NAMESPACE_END - -// #include - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-FileCopyrightText: 2018 The Abseil Authors -// SPDX-License-Identifier: MIT - - - -#include // array -#include // size_t -#include // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type -#include // index_sequence, make_index_sequence, index_sequence_for - -// #include - - -NLOHMANN_JSON_NAMESPACE_BEGIN -namespace detail -{ - -template -using uncvref_t = typename std::remove_cv::type>::type; - -#ifdef JSON_HAS_CPP_14 - -// the following utilities are natively available in C++14 -using std::enable_if_t; -using std::index_sequence; -using std::make_index_sequence; -using std::index_sequence_for; - -#else - -// alias templates to reduce boilerplate -template -using enable_if_t = typename std::enable_if::type; - -// The following code is taken from https://github.com/abseil/abseil-cpp/blob/10cb35e459f5ecca5b2ff107635da0bfa41011b4/absl/utility/utility.h -// which is part of Google Abseil (https://github.com/abseil/abseil-cpp), licensed under the Apache License 2.0. - -//// START OF CODE FROM GOOGLE ABSEIL - -// integer_sequence -// -// Class template representing a compile-time integer sequence. An instantiation -// of `integer_sequence` has a sequence of integers encoded in its -// type through its template arguments (which is a common need when -// working with C++11 variadic templates). `absl::integer_sequence` is designed -// to be a drop-in replacement for C++14's `std::integer_sequence`. -// -// Example: -// -// template< class T, T... Ints > -// void user_function(integer_sequence); -// -// int main() -// { -// // user_function's `T` will be deduced to `int` and `Ints...` -// // will be deduced to `0, 1, 2, 3, 4`. -// user_function(make_integer_sequence()); -// } -template -struct integer_sequence -{ - using value_type = T; - static constexpr std::size_t size() noexcept - { - return sizeof...(Ints); - } -}; - -// index_sequence -// -// A helper template for an `integer_sequence` of `size_t`, -// `absl::index_sequence` is designed to be a drop-in replacement for C++14's -// `std::index_sequence`. -template -using index_sequence = integer_sequence; - -namespace utility_internal -{ - -template -struct Extend; - -// Note that SeqSize == sizeof...(Ints). It's passed explicitly for efficiency. -template -struct Extend, SeqSize, 0> -{ - using type = integer_sequence < T, Ints..., (Ints + SeqSize)... >; -}; - -template -struct Extend, SeqSize, 1> -{ - using type = integer_sequence < T, Ints..., (Ints + SeqSize)..., 2 * SeqSize >; -}; - -// Recursion helper for 'make_integer_sequence'. -// 'Gen::type' is an alias for 'integer_sequence'. -template -struct Gen -{ - using type = - typename Extend < typename Gen < T, N / 2 >::type, N / 2, N % 2 >::type; -}; - -template -struct Gen -{ - using type = integer_sequence; -}; - -} // namespace utility_internal - -// Compile-time sequences of integers - -// make_integer_sequence -// -// This template alias is equivalent to -// `integer_sequence`, and is designed to be a drop-in -// replacement for C++14's `std::make_integer_sequence`. -template -using make_integer_sequence = typename utility_internal::Gen::type; - -// make_index_sequence -// -// This template alias is equivalent to `index_sequence<0, 1, ..., N-1>`, -// and is designed to be a drop-in replacement for C++14's -// `std::make_index_sequence`. -template -using make_index_sequence = make_integer_sequence; - -// index_sequence_for -// -// Converts a typename pack into an index sequence of the same length, and -// is designed to be a drop-in replacement for C++14's -// `std::index_sequence_for()` -template -using index_sequence_for = make_index_sequence; - -//// END OF CODE FROM GOOGLE ABSEIL - -#endif - -// dispatch utility (taken from ranges-v3) -template struct priority_tag : priority_tag < N - 1 > {}; -template<> struct priority_tag<0> {}; - -// taken from ranges-v3 -template -struct static_const -{ - static JSON_INLINE_VARIABLE constexpr T value{}; -}; - -#ifndef JSON_HAS_CPP_17 - template - constexpr T static_const::value; -#endif - -template -inline constexpr std::array make_array(Args&& ... args) -{ - return std::array {{static_cast(std::forward(args))...}}; -} - -} // namespace detail -NLOHMANN_JSON_NAMESPACE_END - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -#include // numeric_limits -#include // false_type, is_constructible, is_integral, is_same, true_type -#include // declval -#include // tuple -#include // char_traits - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -#include // random_access_iterator_tag - -// #include - -// #include - -// #include - - -NLOHMANN_JSON_NAMESPACE_BEGIN -namespace detail -{ - -template -struct iterator_types {}; - -template -struct iterator_types < - It, - void_t> -{ - using difference_type = typename It::difference_type; - using value_type = typename It::value_type; - using pointer = typename It::pointer; - using reference = typename It::reference; - using iterator_category = typename It::iterator_category; -}; - -// This is required as some compilers implement std::iterator_traits in a way that -// doesn't work with SFINAE. See https://github.com/nlohmann/json/issues/1341. -template -struct iterator_traits -{ -}; - -template -struct iterator_traits < T, enable_if_t < !std::is_pointer::value >> - : iterator_types -{ -}; - -template -struct iterator_traits::value>> -{ - using iterator_category = std::random_access_iterator_tag; - using value_type = T; - using difference_type = ptrdiff_t; - using pointer = T*; - using reference = T&; -}; - -} // namespace detail -NLOHMANN_JSON_NAMESPACE_END - -// #include - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -// #include - - -NLOHMANN_JSON_NAMESPACE_BEGIN - -NLOHMANN_CAN_CALL_STD_FUNC_IMPL(begin); - -NLOHMANN_JSON_NAMESPACE_END - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -// #include - - -NLOHMANN_JSON_NAMESPACE_BEGIN - -NLOHMANN_CAN_CALL_STD_FUNC_IMPL(end); - -NLOHMANN_JSON_NAMESPACE_END - -// #include - -// #include - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - -#ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_ - #define INCLUDE_NLOHMANN_JSON_FWD_HPP_ - - #include // int64_t, uint64_t - #include // map - #include // allocator - #include // string - #include // vector - - // #include - - - /*! - @brief namespace for Niels Lohmann - @see https://github.com/nlohmann - @since version 1.0.0 - */ - NLOHMANN_JSON_NAMESPACE_BEGIN - - /*! - @brief default JSONSerializer template argument - - This serializer ignores the template arguments and uses ADL - ([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl)) - for serialization. - */ - template - struct adl_serializer; - - /// a class to store JSON values - /// @sa https://json.nlohmann.me/api/basic_json/ - template class ObjectType = - std::map, - template class ArrayType = std::vector, - class StringType = std::string, class BooleanType = bool, - class NumberIntegerType = std::int64_t, - class NumberUnsignedType = std::uint64_t, - class NumberFloatType = double, - template class AllocatorType = std::allocator, - template class JSONSerializer = - adl_serializer, - class BinaryType = std::vector, // cppcheck-suppress syntaxError - class CustomBaseClass = void> - class basic_json; - - /// @brief JSON Pointer defines a string syntax for identifying a specific value within a JSON document - /// @sa https://json.nlohmann.me/api/json_pointer/ - template - class json_pointer; - - /*! - @brief default specialization - @sa https://json.nlohmann.me/api/json/ - */ - using json = basic_json<>; - - /// @brief a minimal map-like container that preserves insertion order - /// @sa https://json.nlohmann.me/api/ordered_map/ - template - struct ordered_map; - - /// @brief specialization that maintains the insertion order of object keys - /// @sa https://json.nlohmann.me/api/ordered_json/ - using ordered_json = basic_json; - - NLOHMANN_JSON_NAMESPACE_END - -#endif // INCLUDE_NLOHMANN_JSON_FWD_HPP_ - - -NLOHMANN_JSON_NAMESPACE_BEGIN -/*! -@brief detail namespace with internal helper functions - -This namespace collects functions that should not be exposed, -implementations of some @ref basic_json methods, and meta-programming helpers. - -@since version 2.1.0 -*/ -namespace detail -{ - -///////////// -// helpers // -///////////// - -// Note to maintainers: -// -// Every trait in this file expects a non CV-qualified type. -// The only exceptions are in the 'aliases for detected' section -// (i.e. those of the form: decltype(T::member_function(std::declval()))) -// -// In this case, T has to be properly CV-qualified to constraint the function arguments -// (e.g. to_json(BasicJsonType&, const T&)) - -template struct is_basic_json : std::false_type {}; - -NLOHMANN_BASIC_JSON_TPL_DECLARATION -struct is_basic_json : std::true_type {}; - -// used by exceptions create() member functions -// true_type for pointer to possibly cv-qualified basic_json or std::nullptr_t -// false_type otherwise -template -struct is_basic_json_context : - std::integral_constant < bool, - is_basic_json::type>::type>::value - || std::is_same::value > -{}; - -////////////////////// -// json_ref helpers // -////////////////////// - -template -class json_ref; - -template -struct is_json_ref : std::false_type {}; - -template -struct is_json_ref> : std::true_type {}; - -////////////////////////// -// aliases for detected // -////////////////////////// - -template -using mapped_type_t = typename T::mapped_type; - -template -using key_type_t = typename T::key_type; - -template -using value_type_t = typename T::value_type; - -template -using difference_type_t = typename T::difference_type; - -template -using pointer_t = typename T::pointer; - -template -using reference_t = typename T::reference; - -template -using iterator_category_t = typename T::iterator_category; - -template -using to_json_function = decltype(T::to_json(std::declval()...)); - -template -using from_json_function = decltype(T::from_json(std::declval()...)); - -template -using get_template_function = decltype(std::declval().template get()); - -// trait checking if JSONSerializer::from_json(json const&, udt&) exists -template -struct has_from_json : std::false_type {}; - -// trait checking if j.get is valid -// use this trait instead of std::is_constructible or std::is_convertible, -// both rely on, or make use of implicit conversions, and thus fail when T -// has several constructors/operator= (see https://github.com/nlohmann/json/issues/958) -template -struct is_getable -{ - static constexpr bool value = is_detected::value; -}; - -template -struct has_from_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> -{ - using serializer = typename BasicJsonType::template json_serializer; - - static constexpr bool value = - is_detected_exact::value; -}; - -// This trait checks if JSONSerializer::from_json(json const&) exists -// this overload is used for non-default-constructible user-defined-types -template -struct has_non_default_from_json : std::false_type {}; - -template -struct has_non_default_from_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> -{ - using serializer = typename BasicJsonType::template json_serializer; - - static constexpr bool value = - is_detected_exact::value; -}; - -// This trait checks if BasicJsonType::json_serializer::to_json exists -// Do not evaluate the trait when T is a basic_json type, to avoid template instantiation infinite recursion. -template -struct has_to_json : std::false_type {}; - -template -struct has_to_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> -{ - using serializer = typename BasicJsonType::template json_serializer; - - static constexpr bool value = - is_detected_exact::value; -}; - -template -using detect_key_compare = typename T::key_compare; - -template -struct has_key_compare : std::integral_constant::value> {}; - -// obtains the actual object key comparator -template -struct actual_object_comparator -{ - using object_t = typename BasicJsonType::object_t; - using object_comparator_t = typename BasicJsonType::default_object_comparator_t; - using type = typename std::conditional < has_key_compare::value, - typename object_t::key_compare, object_comparator_t>::type; -}; - -template -using actual_object_comparator_t = typename actual_object_comparator::type; - -///////////////// -// char_traits // -///////////////// - -// Primary template of char_traits calls std char_traits -template -struct char_traits : std::char_traits -{}; - -// Explicitly define char traits for unsigned char since it is not standard -template<> -struct char_traits : std::char_traits -{ - using char_type = unsigned char; - using int_type = uint64_t; - - // Redefine to_int_type function - static int_type to_int_type(char_type c) noexcept - { - return static_cast(c); - } - - static char_type to_char_type(int_type i) noexcept - { - return static_cast(i); - } - - static constexpr int_type eof() noexcept - { - return static_cast(EOF); - } -}; - -// Explicitly define char traits for signed char since it is not standard -template<> -struct char_traits : std::char_traits -{ - using char_type = signed char; - using int_type = uint64_t; - - // Redefine to_int_type function - static int_type to_int_type(char_type c) noexcept - { - return static_cast(c); - } - - static char_type to_char_type(int_type i) noexcept - { - return static_cast(i); - } - - static constexpr int_type eof() noexcept - { - return static_cast(EOF); - } -}; - -/////////////////// -// is_ functions // -/////////////////// - -// https://en.cppreference.com/w/cpp/types/conjunction -template struct conjunction : std::true_type { }; -template struct conjunction : B { }; -template -struct conjunction -: std::conditional(B::value), conjunction, B>::type {}; - -// https://en.cppreference.com/w/cpp/types/negation -template struct negation : std::integral_constant < bool, !B::value > { }; - -// Reimplementation of is_constructible and is_default_constructible, due to them being broken for -// std::pair and std::tuple until LWG 2367 fix (see https://cplusplus.github.io/LWG/lwg-defects.html#2367). -// This causes compile errors in e.g. clang 3.5 or gcc 4.9. -template -struct is_default_constructible : std::is_default_constructible {}; - -template -struct is_default_constructible> - : conjunction, is_default_constructible> {}; - -template -struct is_default_constructible> - : conjunction, is_default_constructible> {}; - -template -struct is_default_constructible> - : conjunction...> {}; - -template -struct is_default_constructible> - : conjunction...> {}; - -template -struct is_constructible : std::is_constructible {}; - -template -struct is_constructible> : is_default_constructible> {}; - -template -struct is_constructible> : is_default_constructible> {}; - -template -struct is_constructible> : is_default_constructible> {}; - -template -struct is_constructible> : is_default_constructible> {}; - -template -struct is_iterator_traits : std::false_type {}; - -template -struct is_iterator_traits> -{ - private: - using traits = iterator_traits; - - public: - static constexpr auto value = - is_detected::value && - is_detected::value && - is_detected::value && - is_detected::value && - is_detected::value; -}; - -template -struct is_range -{ - private: - using t_ref = typename std::add_lvalue_reference::type; - - using iterator = detected_t; - using sentinel = detected_t; - - // to be 100% correct, it should use https://en.cppreference.com/w/cpp/iterator/input_or_output_iterator - // and https://en.cppreference.com/w/cpp/iterator/sentinel_for - // but reimplementing these would be too much work, as a lot of other concepts are used underneath - static constexpr auto is_iterator_begin = - is_iterator_traits>::value; - - public: - static constexpr bool value = !std::is_same::value && !std::is_same::value && is_iterator_begin; -}; - -template -using iterator_t = enable_if_t::value, result_of_begin())>>; - -template -using range_value_t = value_type_t>>; - -// The following implementation of is_complete_type is taken from -// https://blogs.msdn.microsoft.com/vcblog/2015/12/02/partial-support-for-expression-sfinae-in-vs-2015-update-1/ -// and is written by Xiang Fan who agreed to using it in this library. - -template -struct is_complete_type : std::false_type {}; - -template -struct is_complete_type : std::true_type {}; - -template -struct is_compatible_object_type_impl : std::false_type {}; - -template -struct is_compatible_object_type_impl < - BasicJsonType, CompatibleObjectType, - enable_if_t < is_detected::value&& - is_detected::value >> -{ - using object_t = typename BasicJsonType::object_t; - - // macOS's is_constructible does not play well with nonesuch... - static constexpr bool value = - is_constructible::value && - is_constructible::value; -}; - -template -struct is_compatible_object_type - : is_compatible_object_type_impl {}; - -template -struct is_constructible_object_type_impl : std::false_type {}; - -template -struct is_constructible_object_type_impl < - BasicJsonType, ConstructibleObjectType, - enable_if_t < is_detected::value&& - is_detected::value >> -{ - using object_t = typename BasicJsonType::object_t; - - static constexpr bool value = - (is_default_constructible::value && - (std::is_move_assignable::value || - std::is_copy_assignable::value) && - (is_constructible::value && - std::is_same < - typename object_t::mapped_type, - typename ConstructibleObjectType::mapped_type >::value)) || - (has_from_json::value || - has_non_default_from_json < - BasicJsonType, - typename ConstructibleObjectType::mapped_type >::value); -}; - -template -struct is_constructible_object_type - : is_constructible_object_type_impl {}; - -template -struct is_compatible_string_type -{ - static constexpr auto value = - is_constructible::value; -}; - -template -struct is_constructible_string_type -{ - // launder type through decltype() to fix compilation failure on ICPC -#ifdef __INTEL_COMPILER - using laundered_type = decltype(std::declval()); -#else - using laundered_type = ConstructibleStringType; -#endif - - static constexpr auto value = - conjunction < - is_constructible, - is_detected_exact>::value; -}; - -template -struct is_compatible_array_type_impl : std::false_type {}; - -template -struct is_compatible_array_type_impl < - BasicJsonType, CompatibleArrayType, - enable_if_t < - is_detected::value&& - is_iterator_traits>>::value&& -// special case for types like std::filesystem::path whose iterator's value_type are themselves -// c.f. https://github.com/nlohmann/json/pull/3073 - !std::is_same>::value >> -{ - static constexpr bool value = - is_constructible>::value; -}; - -template -struct is_compatible_array_type - : is_compatible_array_type_impl {}; - -template -struct is_constructible_array_type_impl : std::false_type {}; - -template -struct is_constructible_array_type_impl < - BasicJsonType, ConstructibleArrayType, - enable_if_t::value >> - : std::true_type {}; - -template -struct is_constructible_array_type_impl < - BasicJsonType, ConstructibleArrayType, - enable_if_t < !std::is_same::value&& - !is_compatible_string_type::value&& - is_default_constructible::value&& -(std::is_move_assignable::value || - std::is_copy_assignable::value)&& -is_detected::value&& -is_iterator_traits>>::value&& -is_detected::value&& -// special case for types like std::filesystem::path whose iterator's value_type are themselves -// c.f. https://github.com/nlohmann/json/pull/3073 -!std::is_same>::value&& - is_complete_type < - detected_t>::value >> -{ - using value_type = range_value_t; - - static constexpr bool value = - std::is_same::value || - has_from_json::value || - has_non_default_from_json < - BasicJsonType, - value_type >::value; -}; - -template -struct is_constructible_array_type - : is_constructible_array_type_impl {}; - -template -struct is_compatible_integer_type_impl : std::false_type {}; - -template -struct is_compatible_integer_type_impl < - RealIntegerType, CompatibleNumberIntegerType, - enable_if_t < std::is_integral::value&& - std::is_integral::value&& - !std::is_same::value >> -{ - // is there an assert somewhere on overflows? - using RealLimits = std::numeric_limits; - using CompatibleLimits = std::numeric_limits; - - static constexpr auto value = - is_constructible::value && - CompatibleLimits::is_integer && - RealLimits::is_signed == CompatibleLimits::is_signed; -}; - -template -struct is_compatible_integer_type - : is_compatible_integer_type_impl {}; - -template -struct is_compatible_type_impl: std::false_type {}; - -template -struct is_compatible_type_impl < - BasicJsonType, CompatibleType, - enable_if_t::value >> -{ - static constexpr bool value = - has_to_json::value; -}; - -template -struct is_compatible_type - : is_compatible_type_impl {}; - -template -struct is_constructible_tuple : std::false_type {}; - -template -struct is_constructible_tuple> : conjunction...> {}; - -template -struct is_json_iterator_of : std::false_type {}; - -template -struct is_json_iterator_of : std::true_type {}; - -template -struct is_json_iterator_of : std::true_type -{}; - -// checks if a given type T is a template specialization of Primary -template