| #pragma once |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
|
|
| #include <stdint.h> |
| #include <stddef.h> |
| #include <stdbool.h> |
|
|
| #define GGML_V1_MAX_DIMS 4 |
| #define GGML_V1_MAX_NODES 4096 |
| #define GGML_V1_MAX_PARAMS 16 |
| #define GGML_V1_MAX_CONTEXTS 64 |
| #define GGML_V1_MAX_OPT 4 |
|
|
| #ifdef __ARM_NEON |
| |
| typedef __fp16 ggml_v1_fp16_t; |
| #else |
| typedef uint16_t ggml_v1_fp16_t; |
| #endif |
|
|
| |
| float ggml_v1_fp16_to_fp32(ggml_v1_fp16_t x); |
| ggml_v1_fp16_t ggml_v1_fp32_to_fp16(float x); |
|
|
| struct ggml_v1_object; |
| struct ggml_v1_context; |
|
|
| enum ggml_v1_type { |
| GGML_V1_TYPE_Q4_0, |
| GGML_V1_TYPE_Q4_1, |
| GGML_V1_TYPE_I8, |
| GGML_V1_TYPE_I16, |
| GGML_V1_TYPE_I32, |
| GGML_V1_TYPE_F16, |
| GGML_V1_TYPE_F32, |
| GGML_V1_TYPE_COUNT, |
| }; |
|
|
| |
| enum ggml_v1_op { |
| GGML_V1_OP_NONE = 0, |
|
|
| GGML_V1_OP_DUP, |
| GGML_V1_OP_ADD, |
| GGML_V1_OP_SUB, |
| GGML_V1_OP_MUL, |
| GGML_V1_OP_DIV, |
| GGML_V1_OP_SQR, |
| GGML_V1_OP_SQRT, |
| GGML_V1_OP_SUM, |
| GGML_V1_OP_MEAN, |
| GGML_V1_OP_REPEAT, |
| GGML_V1_OP_ABS, |
| GGML_V1_OP_SGN, |
| GGML_V1_OP_NEG, |
| GGML_V1_OP_STEP, |
| GGML_V1_OP_RELU, |
| GGML_V1_OP_GELU, |
| GGML_V1_OP_NORM, |
|
|
| GGML_V1_OP_MUL_MAT, |
|
|
| GGML_V1_OP_SCALE, |
| GGML_V1_OP_CPY, |
| GGML_V1_OP_RESHAPE, |
| GGML_V1_OP_VIEW, |
| GGML_V1_OP_PERMUTE, |
| GGML_V1_OP_TRANSPOSE, |
| GGML_V1_OP_GET_ROWS, |
| GGML_V1_OP_DIAG_MASK_INF, |
| GGML_V1_OP_SOFT_MAX, |
| GGML_V1_OP_ROPE, |
| GGML_V1_OP_CONV_1D_1S, |
| GGML_V1_OP_CONV_1D_2S, |
|
|
| GGML_V1_OP_FLASH_ATTN, |
| GGML_V1_OP_FLASH_FF, |
|
|
| GGML_V1_OP_COUNT, |
| }; |
|
|
| |
| struct ggml_v1_tensor { |
| enum ggml_v1_type type; |
|
|
| int n_dims; |
| int ne[GGML_V1_MAX_DIMS]; |
| size_t nb[GGML_V1_MAX_DIMS]; |
| |
| |
| |
|
|
| |
| enum ggml_v1_op op; |
|
|
| bool is_param; |
|
|
| struct ggml_v1_tensor * grad; |
| struct ggml_v1_tensor * src0; |
| struct ggml_v1_tensor * src1; |
| struct ggml_v1_tensor * opt[GGML_V1_MAX_OPT]; |
|
|
| |
| int n_tasks; |
|
|
| |
| int perf_runs; |
| int64_t perf_cycles; |
| int64_t perf_time_us; |
|
|
| void * data; |
| char padding[8]; |
| }; |
|
|
| |
| struct ggml_v1_cgraph { |
| int n_nodes; |
| int n_leafs; |
| int n_threads; |
|
|
| size_t work_size; |
| struct ggml_v1_tensor * work; |
|
|
| struct ggml_v1_tensor * nodes[GGML_V1_MAX_NODES]; |
| struct ggml_v1_tensor * grads[GGML_V1_MAX_NODES]; |
| struct ggml_v1_tensor * leafs[GGML_V1_MAX_NODES]; |
|
|
| |
| int perf_runs; |
| int64_t perf_cycles; |
| int64_t perf_time_us; |
| }; |
|
|
| |
| struct ggml_v1_scratch { |
| size_t offs; |
| size_t size; |
| void * data; |
| }; |
|
|
| struct ggml_v1_init_params { |
| |
| size_t mem_size; |
| void * mem_buffer; |
| }; |
|
|
| void ggml_v1_time_init(void); |
| int64_t ggml_v1_time_ms(void); |
| int64_t ggml_v1_time_us(void); |
| int64_t ggml_v1_cycles(void); |
| int64_t ggml_v1_cycles_per_ms(void); |
|
|
| void ggml_v1_print_object (const struct ggml_v1_object * obj); |
| void ggml_v1_print_objects(const struct ggml_v1_context * ctx); |
|
|
| int ggml_v1_nelements(const struct ggml_v1_tensor * tensor); |
| size_t ggml_v1_nbytes (const struct ggml_v1_tensor * tensor); |
|
|
| int ggml_v1_blck_size (enum ggml_v1_type type); |
| size_t ggml_v1_type_size (enum ggml_v1_type type); |
| float ggml_v1_type_sizef(enum ggml_v1_type type); |
|
|
| size_t ggml_v1_element_size(const struct ggml_v1_tensor * tensor); |
|
|
| struct ggml_v1_context * ggml_v1_init(struct ggml_v1_init_params params); |
| void ggml_v1_free(struct ggml_v1_context * ctx); |
|
|
| size_t ggml_v1_used_mem(const struct ggml_v1_context * ctx); |
|
|
| size_t ggml_v1_set_scratch(struct ggml_v1_context * ctx, struct ggml_v1_scratch scratch); |
|
|
| struct ggml_v1_tensor * ggml_v1_new_tensor( |
| struct ggml_v1_context * ctx, |
| enum ggml_v1_type type, |
| int n_dims, |
| const int *ne); |
|
|
| struct ggml_v1_tensor * ggml_v1_new_tensor_1d( |
| struct ggml_v1_context * ctx, |
| enum ggml_v1_type type, |
| int ne0); |
|
|
| struct ggml_v1_tensor * ggml_v1_new_tensor_2d( |
| struct ggml_v1_context * ctx, |
| enum ggml_v1_type type, |
| int ne0, |
| int ne1); |
|
|
| struct ggml_v1_tensor * ggml_v1_new_tensor_3d( |
| struct ggml_v1_context * ctx, |
| enum ggml_v1_type type, |
| int ne0, |
| int ne1, |
| int ne2); |
|
|
| struct ggml_v1_tensor * ggml_v1_new_tensor_4d( |
| struct ggml_v1_context * ctx, |
| enum ggml_v1_type type, |
| int ne0, |
| int ne1, |
| int ne2, |
| int ne3); |
|
|
| struct ggml_v1_tensor * ggml_v1_new_i32(struct ggml_v1_context * ctx, int32_t value); |
| struct ggml_v1_tensor * ggml_v1_new_f32(struct ggml_v1_context * ctx, float value); |
|
|
| struct ggml_v1_tensor * ggml_v1_dup_tensor (struct ggml_v1_context * ctx, const struct ggml_v1_tensor * src); |
| struct ggml_v1_tensor * ggml_v1_view_tensor(struct ggml_v1_context * ctx, const struct ggml_v1_tensor * src); |
|
|
| struct ggml_v1_tensor * ggml_v1_set_zero(struct ggml_v1_tensor * tensor); |
| struct ggml_v1_tensor * ggml_v1_set_i32 (struct ggml_v1_tensor * tensor, int32_t value); |
| struct ggml_v1_tensor * ggml_v1_set_f32 (struct ggml_v1_tensor * tensor, float value); |
|
|
| int32_t ggml_v1_get_i32_1d(const struct ggml_v1_tensor * tensor, int i); |
| void ggml_v1_set_i32_1d(const struct ggml_v1_tensor * tensor, int i, int32_t value); |
|
|
| float ggml_v1_get_f32_1d(const struct ggml_v1_tensor * tensor, int i); |
| void ggml_v1_set_f32_1d(const struct ggml_v1_tensor * tensor, int i, float value); |
|
|
| void * ggml_v1_get_data (const struct ggml_v1_tensor * tensor); |
| float * ggml_v1_get_data_f32(const struct ggml_v1_tensor * tensor); |
|
|
| |
| |
| |
|
|
| struct ggml_v1_tensor * ggml_v1_dup( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a); |
|
|
| struct ggml_v1_tensor * ggml_v1_add( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| struct ggml_v1_tensor * b); |
|
|
| struct ggml_v1_tensor * ggml_v1_sub( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| struct ggml_v1_tensor * b); |
|
|
| struct ggml_v1_tensor * ggml_v1_mul( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| struct ggml_v1_tensor * b); |
|
|
| struct ggml_v1_tensor * ggml_v1_div( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| struct ggml_v1_tensor * b); |
|
|
| struct ggml_v1_tensor * ggml_v1_sqr( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a); |
|
|
| struct ggml_v1_tensor * ggml_v1_sqrt( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a); |
|
|
| |
| |
| struct ggml_v1_tensor * ggml_v1_sum( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a); |
|
|
| |
| struct ggml_v1_tensor * ggml_v1_mean( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a); |
|
|
| |
| |
| struct ggml_v1_tensor * ggml_v1_repeat( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| struct ggml_v1_tensor * b); |
|
|
| struct ggml_v1_tensor * ggml_v1_abs( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a); |
|
|
| struct ggml_v1_tensor * ggml_v1_sgn( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a); |
|
|
| struct ggml_v1_tensor * ggml_v1_neg( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a); |
|
|
| struct ggml_v1_tensor * ggml_v1_step( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a); |
|
|
| struct ggml_v1_tensor * ggml_v1_relu( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a); |
|
|
| |
| struct ggml_v1_tensor * ggml_v1_gelu( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a); |
|
|
| |
| |
| struct ggml_v1_tensor * ggml_v1_norm( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a); |
|
|
| |
| |
| |
| struct ggml_v1_tensor * ggml_v1_mul_mat( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| struct ggml_v1_tensor * b); |
|
|
| |
| |
| |
|
|
| |
| struct ggml_v1_tensor * ggml_v1_scale( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| struct ggml_v1_tensor * b); |
|
|
| |
| struct ggml_v1_tensor * ggml_v1_cpy( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| struct ggml_v1_tensor * b); |
|
|
| |
| |
| struct ggml_v1_tensor * ggml_v1_reshape( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| struct ggml_v1_tensor * b); |
|
|
| |
| |
| struct ggml_v1_tensor * ggml_v1_reshape_2d( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| int ne0, |
| int ne1); |
|
|
| |
| |
| struct ggml_v1_tensor * ggml_v1_reshape_3d( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| int ne0, |
| int ne1, |
| int ne2); |
|
|
| |
| struct ggml_v1_tensor * ggml_v1_view_1d( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| int ne0, |
| size_t offset); |
|
|
| struct ggml_v1_tensor * ggml_v1_view_2d( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| int ne0, |
| int ne1, |
| size_t nb1, |
| size_t offset); |
|
|
| struct ggml_v1_tensor * ggml_v1_permute( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| int axis0, |
| int axis1, |
| int axis2, |
| int axis3); |
|
|
| |
| struct ggml_v1_tensor * ggml_v1_transpose( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a); |
|
|
| struct ggml_v1_tensor * ggml_v1_get_rows( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| struct ggml_v1_tensor * b); |
|
|
| |
| |
| struct ggml_v1_tensor * ggml_v1_diag_mask_inf( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| int n_past); |
|
|
| |
| struct ggml_v1_tensor * ggml_v1_soft_max( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a); |
|
|
| |
| |
| |
| |
| struct ggml_v1_tensor * ggml_v1_rope( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| int n_past, |
| int n_dims, |
| int mode); |
|
|
| |
| |
| |
| |
| struct ggml_v1_tensor * ggml_v1_conv_1d_1s( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| struct ggml_v1_tensor * b); |
|
|
| struct ggml_v1_tensor * ggml_v1_conv_1d_2s( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| struct ggml_v1_tensor * b); |
|
|
| struct ggml_v1_tensor * ggml_v1_flash_attn( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * q, |
| struct ggml_v1_tensor * k, |
| struct ggml_v1_tensor * v, |
| bool masked); |
|
|
| struct ggml_v1_tensor * ggml_v1_flash_ff( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * a, |
| struct ggml_v1_tensor * b0, |
| struct ggml_v1_tensor * b1, |
| struct ggml_v1_tensor * c0, |
| struct ggml_v1_tensor * c1); |
|
|
| |
| |
| |
|
|
| void ggml_v1_set_param( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_tensor * tensor); |
|
|
| void ggml_v1_build_forward_expand(struct ggml_v1_cgraph * cgraph, struct ggml_v1_tensor * tensor); |
|
|
| struct ggml_v1_cgraph ggml_v1_build_forward (struct ggml_v1_tensor * tensor); |
| struct ggml_v1_cgraph ggml_v1_build_backward(struct ggml_v1_context * ctx, struct ggml_v1_cgraph * gf, bool keep); |
|
|
| void ggml_v1_graph_compute(struct ggml_v1_context * ctx, struct ggml_v1_cgraph * cgraph); |
| void ggml_v1_graph_reset (struct ggml_v1_cgraph * cgraph); |
|
|
| |
| void ggml_v1_graph_print(const struct ggml_v1_cgraph * cgraph); |
|
|
| |
| void ggml_v1_graph_dump_dot(const struct ggml_v1_cgraph * gb, const struct ggml_v1_cgraph * gf, const char * filename); |
|
|
| |
| |
| |
|
|
| |
| enum ggml_v1_opt_type { |
| GGML_V1_OPT_ADAM, |
| GGML_V1_OPT_LBFGS, |
| }; |
|
|
| |
| enum ggml_v1_linesearch { |
| GGML_V1_LINESEARCH_DEFAULT = 1, |
|
|
| GGML_V1_LINESEARCH_BACKTRACKING_ARMIJO = 0, |
| GGML_V1_LINESEARCH_BACKTRACKING_WOLFE = 1, |
| GGML_V1_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2, |
| }; |
|
|
| |
| enum ggml_v1_opt_result { |
| GGML_V1_OPT_OK = 0, |
| GGML_V1_OPT_DID_NOT_CONVERGE, |
| GGML_V1_OPT_NO_CONTEXT, |
| GGML_V1_OPT_INVALID_WOLFE, |
| GGML_V1_OPT_FAIL, |
|
|
| GGML_V1_LINESEARCH_FAIL = -128, |
| GGML_V1_LINESEARCH_MINIMUM_STEP, |
| GGML_V1_LINESEARCH_MAXIMUM_STEP, |
| GGML_V1_LINESEARCH_MAXIMUM_ITERATIONS, |
| GGML_V1_LINESEARCH_INVALID_PARAMETERS, |
| }; |
|
|
| |
| |
| |
| |
| struct ggml_v1_opt_params { |
| enum ggml_v1_opt_type type; |
|
|
| int n_threads; |
|
|
| |
| |
| |
| |
| |
| |
| int past; |
| float delta; |
|
|
| |
| |
| |
| |
| |
| |
| int max_no_improvement; |
|
|
| bool print_forward_graph; |
| bool print_backward_graph; |
|
|
| |
| struct { |
| int n_iter; |
|
|
| float alpha; |
| float beta1; |
| float beta2; |
| float eps; |
| float eps_f; |
| float eps_g; |
| } adam; |
|
|
| |
| struct { |
| int m; |
| int n_iter; |
| int max_linesearch; |
|
|
| float eps; |
| float ftol; |
| float wolfe; |
| float min_step; |
| float max_step; |
|
|
| enum ggml_v1_linesearch linesearch; |
| } lbfgs; |
| }; |
|
|
| struct ggml_v1_opt_params ggml_v1_opt_default_params(enum ggml_v1_opt_type type); |
|
|
| |
| enum ggml_v1_opt_result ggml_v1_opt( |
| struct ggml_v1_context * ctx, |
| struct ggml_v1_opt_params params, |
| struct ggml_v1_tensor * f); |
|
|
| |
| |
| |
|
|
| int ggml_v1_cpu_has_avx(void); |
| int ggml_v1_cpu_has_avx2(void); |
| int ggml_v1_cpu_has_avx512(void); |
| int ggml_v1_cpu_has_fma(void); |
| int ggml_v1_cpu_has_neon(void); |
| int ggml_v1_cpu_has_arm_fma(void); |
| int ggml_v1_cpu_has_f16c(void); |
| int ggml_v1_cpu_has_fp16_va(void); |
| int ggml_v1_cpu_has_wasm_simd(void); |
| int ggml_v1_cpu_has_blas(void); |
| int ggml_v1_cpu_has_sse3(void); |
| int ggml_v1_cpu_has_vsx(void); |
|
|
| #ifdef __cplusplus |
| } |
| #endif |
|
|