diff --git a/.github/workflows/shared.yml b/.github/workflows/shared.yml index 56cbf34fbf..812efb238d 100644 --- a/.github/workflows/shared.yml +++ b/.github/workflows/shared.yml @@ -140,7 +140,7 @@ jobs: run: | case "${{ matrix.target }}" in "linux-aarch64") - sudo apt-get -y install autoconf-archive + sudo apt-get -y install autoconf-archive pkg-config bazel run //bazel/toolchain:aarch64-linux-musl-gcc ;; "linux-x86_64") diff --git a/MODULE.bazel b/MODULE.bazel new file mode 100644 index 0000000000..00bb18361f --- /dev/null +++ b/MODULE.bazel @@ -0,0 +1,6 @@ +############################################################################### +# Bazel now uses Bzlmod by default to manage external dependencies. +# Please consider migrating your external dependencies from WORKSPACE to MODULE.bazel. +# +# For more details, please check https://github.com/bazelbuild/bazel/issues/18958 +############################################################################### diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel index e12aa50f5d..c977752462 100644 --- a/WORKSPACE.bazel +++ b/WORKSPACE.bazel @@ -318,6 +318,15 @@ versioned_http_archive( version = "2.14", ) +versioned_http_archive( + name = "softblas", + build_file = "//bazel/third_party/softblas:softblas.BUILD", + strip_prefix = "SoftBLAS-{version}", + # sha256 = "", + url = "https://github.com/urbit/SoftBLAS/archive/{version}.tar.gz", + version = "cbffb33f19ea02f9ffbd184d445123c57929ec53", +) + versioned_http_archive( name = "softfloat", build_file = "//bazel/third_party/softfloat:softfloat.BUILD", diff --git a/bazel/third_party/softblas/BUILD.bazel b/bazel/third_party/softblas/BUILD.bazel new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bazel/third_party/softblas/softblas.BUILD b/bazel/third_party/softblas/softblas.BUILD new file mode 100644 index 0000000000..34c80c93e6 --- /dev/null +++ b/bazel/third_party/softblas/softblas.BUILD @@ -0,0 +1,117 @@ +# FILEPATH: /home/neal/lagoon/vere/bazel/third_party/softblas/softblas.BUILD + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +cc_library( + name = "softblas", + visibility = ["//visibility:public"], + deps = select({ + "@platforms//cpu:aarch64": [":softblas_aarch64"], + "@platforms//cpu:x86_64": [":softblas_x86_64"], + "//conditions:default": [], + }), +) + +cc_library( + name = "softblas_aarch64", + visibility = ["//visibility:public"], + hdrs = ["include/softblas.h"], + includes = ["include"], + srcs = [ + "include/softblas.h", + "src/softblas_state.c", + "src/blas/level1/sasum.c", + "src/blas/level1/dasum.c", + "src/blas/level1/hasum.c", + "src/blas/level1/qasum.c", + "src/blas/level1/saxpy.c", + "src/blas/level1/daxpy.c", + "src/blas/level1/haxpy.c", + "src/blas/level1/qaxpy.c", + "src/blas/level1/scopy.c", + "src/blas/level1/dcopy.c", + "src/blas/level1/hcopy.c", + "src/blas/level1/qcopy.c", + "src/blas/level1/sdot.c", + "src/blas/level1/ddot.c", + "src/blas/level1/hdot.c", + "src/blas/level1/qdot.c", + "src/blas/level1/snrm2.c", + "src/blas/level1/dnrm2.c", + "src/blas/level1/hnrm2.c", + "src/blas/level1/qnrm2.c", + "src/blas/level1/sscal.c", + "src/blas/level1/dscal.c", + "src/blas/level1/hscal.c", + "src/blas/level1/qscal.c", + "src/blas/level1/sswap.c", + "src/blas/level1/dswap.c", + "src/blas/level1/hswap.c", + "src/blas/level1/qswap.c", + "src/blas/level1/isamax.c", + "src/blas/level1/idamax.c", + "src/blas/level1/ihamax.c", + "src/blas/level1/iqamax.c", + "src/blas/level2/sgemv.c", + "src/blas/level2/dgemv.c", + "src/blas/level2/hgemv.c", + "src/blas/level2/qgemv.c", + "src/blas/level3/sgemm.c", + "src/blas/level3/dgemm.c", + "src/blas/level3/hgemm.c", + "src/blas/level3/qgemm.c" + ], + deps = ["@softfloat"], +) + +cc_library( + name = "softblas_x86_64", + visibility = ["//visibility:public"], + hdrs = ["include/softblas.h"], + includes = ["include"], + srcs = [ + "include/softblas.h", + "src/softblas_state.c", + "src/blas/level1/sasum.c", + "src/blas/level1/dasum.c", + "src/blas/level1/hasum.c", + "src/blas/level1/qasum.c", + "src/blas/level1/saxpy.c", + "src/blas/level1/daxpy.c", + "src/blas/level1/haxpy.c", + "src/blas/level1/qaxpy.c", + "src/blas/level1/scopy.c", + "src/blas/level1/dcopy.c", + "src/blas/level1/hcopy.c", + "src/blas/level1/qcopy.c", + "src/blas/level1/sdot.c", + "src/blas/level1/ddot.c", + "src/blas/level1/hdot.c", + "src/blas/level1/qdot.c", + "src/blas/level1/snrm2.c", + "src/blas/level1/dnrm2.c", + "src/blas/level1/hnrm2.c", + "src/blas/level1/qnrm2.c", + "src/blas/level1/sscal.c", + "src/blas/level1/dscal.c", + "src/blas/level1/hscal.c", + "src/blas/level1/qscal.c", + "src/blas/level1/sswap.c", + "src/blas/level1/dswap.c", + "src/blas/level1/hswap.c", + "src/blas/level1/qswap.c", + "src/blas/level1/isamax.c", + "src/blas/level1/idamax.c", + "src/blas/level1/ihamax.c", + "src/blas/level1/iqamax.c", + "src/blas/level2/sgemv.c", + "src/blas/level2/dgemv.c", + "src/blas/level2/hgemv.c", + "src/blas/level2/qgemv.c", + "src/blas/level3/sgemm.c", + "src/blas/level3/dgemm.c", + "src/blas/level3/hgemm.c", + "src/blas/level3/qgemm.c" + ], + deps = ["@softfloat"], +) diff --git a/pkg/c3/motes.h b/pkg/c3/motes.h index 975554e94b..8ed8c81389 100644 --- a/pkg/c3/motes.h +++ b/pkg/c3/motes.h @@ -258,6 +258,7 @@ # define c3__corp c3_s4('c','o','r','p') # define c3__corp c3_s4('c','o','r','p') # define c3__cow c3_s3('c','o','w') +# define c3__cplx c3_s3('c','p','l','x') # define c3__cpu c3_s3('c','p','u') # define c3__crad c3_s4('c','r','a','d') # define c3__cram c3_s4('c','r','a','m') @@ -430,6 +431,7 @@ # define c3__fit c3_s3('f','i','t') # define c3__fits c3_s4('f','i','t','s') # define c3__fix c3_s3('f','i','x') +# define c3__fixp c3_s3('f','i','x','p') # define c3__fl c3_s2('f','l') # define c3__flac c3_s4('f','l','a','c') # define c3__flag c3_s4('f','l','a','g') @@ -602,6 +604,7 @@ # define c3__info c3_s4('i','n','f','o') # define c3__init c3_s4('i','n','i','t') # define c3__ins c3_s3('i','n','s') +# define c3__int2 c3_s4('i','n','t','2') # define c3__into c3_s4('i','n','t','o') # define c3__intr c3_s4('i','n','t','r') # define c3__inuk c3_s4('i','n','u','k') @@ -610,6 +613,7 @@ # define c3__is c3_s2('i','s') # define c3__item c3_s4('i','t','e','m') # define c3__ix c3_s2('i','x') +# define c3__i754 c3_s4('i','7','5','4') # define c3__j c3_s1('j') # define c3__jack c3_s4('j','a','c','k') # define c3__jam c3_s3('j','a','m') @@ -971,6 +975,7 @@ # define c3__rasp c3_s4('r','a','s','p') # define c3__raw c3_s3('r','a','w') # define c3__read c3_s4('r','e','a','d') +# define c3__real c3_s4('r','e','a','l') # define c3__reck c3_s4('r','e','c','k') # define c3__reef c3_s4('r','e','e','f') # define c3__resd c3_s4('r','e','s','d') @@ -1232,11 +1237,13 @@ # define c3__ubin c3_s4('u','b','i','n') # define c3__ubit c3_s4('u','b','i','t') # define c3__ud c3_s2('u','d') +# define c3__uint c3_s4('u','i','n','t') # define c3__ulib c3_s4('u','l','i','b') # define c3__un c3_s2('u','n') # define c3__uniq c3_s4('u','n','i','q') # define c3__unix c3_s4('u','n','i','x') # define c3__unt c3_s3('u','n','t') +# define c3__unum c3_s3('u','n','u','m') # define c3__up c3_s2('u','p') # define c3__url c3_s3('u','r','l') # define c3__urth c3_s4('u','r','t','h') diff --git a/pkg/noun/BUILD.bazel b/pkg/noun/BUILD.bazel index 1be5825990..76959d569d 100644 --- a/pkg/noun/BUILD.bazel +++ b/pkg/noun/BUILD.bazel @@ -39,9 +39,11 @@ vere_library( "@openssl", "@pdjson", "@sigsegv", + "@softblas", "@softfloat", "@urcrypt", "@whereami", + "@zlib", ] + select({ "@platforms//os:macos": ["//pkg/noun/platform/darwin"], "@platforms//os:linux": [ diff --git a/pkg/noun/allocate.c b/pkg/noun/allocate.c index 8ca5085397..829356a96a 100644 --- a/pkg/noun/allocate.c +++ b/pkg/noun/allocate.c @@ -2005,9 +2005,8 @@ u3a_maid(FILE* fil_u, c3_c* cap_c, c3_w wor_w) /* _ca_print_memory(): un-captioned u3a_print_memory(). */ static void -_ca_print_memory(FILE* fil_u, c3_w wor_w) +_ca_print_memory(FILE* fil_u, c3_w byt_w) { - c3_w byt_w = (wor_w * 4); c3_w gib_w = (byt_w / 1000000000); c3_w mib_w = (byt_w % 1000000000) / 1000000; c3_w kib_w = (byt_w % 1000000) / 1000; @@ -2028,43 +2027,55 @@ _ca_print_memory(FILE* fil_u, c3_w wor_w) } } +/* u3a_quac_free: free quac memory. +*/ +void +u3a_quac_free(u3m_quac* qua_u) +{ + c3_w i_w = 0; + while ( qua_u->qua_u[i_w] != NULL ) { + u3a_quac_free(qua_u->qua_u[i_w]); + i_w++; + } + c3_free(qua_u->nam_c); + c3_free(qua_u->qua_u); + c3_free(qua_u); +} + /* u3a_prof(): mark/measure/print memory profile. RETAIN. */ -c3_w -u3a_prof(FILE* fil_u, c3_w den_w, u3_noun mas) +u3m_quac* +u3a_prof(FILE* fil_u, u3_noun mas) { - c3_w tot_w = 0; + u3m_quac* pro_u = c3_calloc(sizeof(*pro_u)); u3_noun h_mas, t_mas; if ( c3n == u3r_cell(mas, &h_mas, &t_mas) ) { - fprintf(fil_u, "%.*smistyped mass\r\n", den_w, ""); - return tot_w; + fprintf(fil_u, "mistyped mass\r\n"); + c3_free(pro_u); + return NULL; } - else if ( _(u3du(h_mas)) ) { - fprintf(fil_u, "%.*smistyped mass head\r\n", den_w, ""); + else if ( c3y == u3du(h_mas) ) { + fprintf(fil_u, "mistyped mass head\r\n"); { c3_c* lab_c = u3m_pretty(h_mas); fprintf(fil_u, "h_mas: %s", lab_c); c3_free(lab_c); } - return tot_w; + c3_free(pro_u); + return NULL; } else { - { - c3_c* lab_c = u3m_pretty(h_mas); - fprintf(fil_u, "%*s%s: ", den_w, "", lab_c); - c3_free(lab_c); - } u3_noun it_mas, tt_mas; if ( c3n == u3r_cell(t_mas, &it_mas, &tt_mas) ) { - fprintf(fil_u, "%*smistyped mass tail\r\n", den_w, ""); - return tot_w; + fprintf(fil_u, "mistyped mass tail\r\n"); + c3_free(pro_u); + return NULL; } else if ( c3y == it_mas ) { - tot_w += u3a_mark_noun(tt_mas); - _ca_print_memory(fil_u, tot_w); + c3_w siz_w = u3a_mark_noun(tt_mas); #if 1 /* The basic issue here is that tt_mas is included in .sac @@ -2075,7 +2086,7 @@ u3a_prof(FILE* fil_u, c3_w den_w, u3_noun mas) * * see u3a_mark_ptr(). */ - if ( _(u3a_is_dog(tt_mas)) ) { + if ( c3y == u3a_is_dog(tt_mas) ) { u3a_box* box_u = u3a_botox(u3a_to_ptr(tt_mas)); #ifdef U3_MEMORY_DEBUG if ( 1 == box_u->eus_w ) { @@ -2094,45 +2105,131 @@ u3a_prof(FILE* fil_u, c3_w den_w, u3_noun mas) #endif } #endif + pro_u->nam_c = u3r_string(h_mas); + pro_u->siz_w = siz_w*4; + pro_u->qua_u = NULL; + return pro_u; - return tot_w; } else if ( c3n == it_mas ) { - fprintf(fil_u, "\r\n"); - - while ( _(u3du(tt_mas)) ) { - tot_w += u3a_prof(fil_u, den_w+2, u3h(tt_mas)); + pro_u->qua_u = c3_malloc(sizeof(pro_u->qua_u)); + c3_w i_w = 0; + c3_t bad_t = 0; + while ( c3y == u3du(tt_mas) ) { + u3m_quac* new_u = u3a_prof(fil_u, u3h(tt_mas)); + if ( NULL == new_u ) { + bad_t = 1; + } else { + pro_u->qua_u = c3_realloc(pro_u->qua_u, (i_w + 2) * sizeof(pro_u->qua_u)); + pro_u->siz_w += new_u->siz_w; + pro_u->qua_u[i_w] = new_u; + } tt_mas = u3t(tt_mas); + i_w++; } + pro_u->qua_u[i_w] = NULL; - fprintf(fil_u, "%*s--", den_w, ""); - _ca_print_memory(fil_u, tot_w); - - return tot_w; - + if ( bad_t ) { + i_w = 0; + while ( pro_u->qua_u[i_w] != NULL ) { + u3a_quac_free(pro_u->qua_u[i_w]); + i_w++; + } + c3_free(pro_u->qua_u); + c3_free(pro_u); + return NULL; + } else { + pro_u->nam_c = u3r_string(h_mas); + return pro_u; + } } else { - fprintf(fil_u, "%*smistyped (strange) mass tail\r\n", den_w, ""); - return tot_w; + fprintf(fil_u, "mistyped (strange) mass tail\r\n"); + c3_free(pro_u); + return NULL; + } + } +} + + +/* u3a_print_quac: print a memory report. +*/ + +void +u3a_print_quac(FILE* fil_u, c3_w den_w, u3m_quac* mas_u) +{ + u3_assert( 0 != fil_u ); + + if ( mas_u->siz_w ) { + fprintf(fil_u, "%*s%s: ", den_w, "", mas_u->nam_c); + + if ( mas_u->qua_u == NULL ) { + _ca_print_memory(fil_u, mas_u->siz_w); + } else { + fprintf(fil_u, "\r\n"); + c3_w i_w = 0; + while ( mas_u->qua_u[i_w] != NULL ) { + u3a_print_quac(fil_u, den_w+2, mas_u->qua_u[i_w]); + i_w++; + } + fprintf(fil_u, "%*s--", den_w, ""); + _ca_print_memory(fil_u, mas_u->siz_w); } } } /* u3a_mark_road(): mark ad-hoc persistent road structures. */ -c3_w -u3a_mark_road(FILE* fil_u) -{ - c3_w tot_w = 0; - tot_w += u3a_maid(fil_u, " namespace", u3a_mark_noun(u3R->ski.gul)); - tot_w += u3a_maid(fil_u, " trace stack", u3a_mark_noun(u3R->bug.tax)); - tot_w += u3a_maid(fil_u, " trace buffer", u3a_mark_noun(u3R->bug.mer)); - tot_w += u3a_maid(fil_u, " profile batteries", u3a_mark_noun(u3R->pro.don)); - tot_w += u3a_maid(fil_u, " profile doss", u3a_mark_noun(u3R->pro.day)); - tot_w += u3a_maid(fil_u, " new profile trace", u3a_mark_noun(u3R->pro.trace)); - tot_w += u3a_maid(fil_u, " transient memoization cache", u3h_mark(u3R->cax.har_p)); - tot_w += u3a_maid(fil_u, " persistent memoization cache", u3h_mark(u3R->cax.per_p)); - return u3a_maid(fil_u, "total road stuff", tot_w); +u3m_quac* +u3a_mark_road() +{ + u3m_quac** qua_u = c3_malloc(sizeof(*qua_u) * 9); + + qua_u[0] = c3_calloc(sizeof(*qua_u[0])); + qua_u[0]->nam_c = strdup("namespace"); + qua_u[0]->siz_w = u3a_mark_noun(u3R->ski.gul) * 4; + + qua_u[1] = c3_calloc(sizeof(*qua_u[1])); + qua_u[1]->nam_c = strdup("trace stack"); + qua_u[1]->siz_w = u3a_mark_noun(u3R->bug.tax) * 4; + + qua_u[2] = c3_calloc(sizeof(*qua_u[2])); + qua_u[2]->nam_c = strdup("trace buffer"); + qua_u[2]->siz_w = u3a_mark_noun(u3R->bug.mer) * 4; + + qua_u[3] = c3_calloc(sizeof(*qua_u[3])); + qua_u[3]->nam_c = strdup("profile batteries"); + qua_u[3]->siz_w = u3a_mark_noun(u3R->pro.don) * 4; + + qua_u[4] = c3_calloc(sizeof(*qua_u[4])); + qua_u[4]->nam_c = strdup("profile doss"); + qua_u[4]->siz_w = u3a_mark_noun(u3R->pro.day) * 4; + + qua_u[5] = c3_calloc(sizeof(*qua_u[5])); + qua_u[5]->nam_c = strdup("new profile trace"); + qua_u[5]->siz_w = u3a_mark_noun(u3R->pro.trace) * 4; + + qua_u[6] = c3_calloc(sizeof(*qua_u[6])); + qua_u[6]->nam_c = strdup("transient memoization cache"); + qua_u[6]->siz_w = u3h_mark(u3R->cax.har_p) * 4; + + qua_u[7] = c3_calloc(sizeof(*qua_u[7])); + qua_u[7]->nam_c = strdup("persistent memoization cache"); + qua_u[7]->siz_w = u3h_mark(u3R->cax.per_p) * 4; + + qua_u[8] = NULL; + + c3_w sum_w = 0; + for (c3_w i_w = 0; i_w < 8; i_w++) { + sum_w += qua_u[i_w]->siz_w; + } + + u3m_quac* tot_u = c3_malloc(sizeof(*tot_u)); + tot_u->nam_c = strdup("total road stuff"); + tot_u->siz_w = sum_w; + tot_u->qua_u = qua_u; + + return tot_u; } /* u3a_reclaim(): clear ad-hoc persistent caches to reclaim memory. diff --git a/pkg/noun/allocate.h b/pkg/noun/allocate.h index b851b64c98..d0954f9656 100644 --- a/pkg/noun/allocate.h +++ b/pkg/noun/allocate.h @@ -606,8 +606,8 @@ /* u3a_mark_road(): mark ad-hoc persistent road structures. */ - c3_w - u3a_mark_road(FILE* fil_u); + u3m_quac* + u3a_mark_road(); /* u3a_reclaim(): clear ad-hoc persistent caches to reclaim memory. */ @@ -702,21 +702,35 @@ void u3a_print_time(c3_c* str_c, c3_c* cap_c, c3_d mic_d); + /* u3a_print_quac: print a quac memory report. + */ + void + u3a_print_quac(FILE* fil_u, c3_w den_w, u3m_quac* mas_u); + /* u3a_print_memory(): print memory amount. */ void u3a_print_memory(FILE* fil_u, c3_c* cap_c, c3_w wor_w); - /* u3a_prof(): mark/measure/print memory profile. RETAIN. */ - c3_w - u3a_prof(FILE* fil_u, c3_w den_w, u3_noun mas); + u3m_quac* + u3a_prof(FILE* fil_u, u3_noun mas); /* u3a_maid(): maybe print memory. */ c3_w u3a_maid(FILE* fil_u, c3_c* cap_c, c3_w wor_w); + /* u3a_quac_free(): free quac memory. + */ + void + u3a_quac_free(u3m_quac* qua_u); + + /* u3a_uncap_print_memory(): un-captioned print memory amount. + */ + void + u3a_uncap_print_memory(FILE* fil_u, c3_w byt_w); + /* u3a_deadbeef(): write 0xdeadbeef from hat to cap. */ void diff --git a/pkg/noun/jets.c b/pkg/noun/jets.c index 45e7e8144a..c0b17142ef 100644 --- a/pkg/noun/jets.c +++ b/pkg/noun/jets.c @@ -2305,27 +2305,61 @@ _cj_mark_hank(u3_noun kev, void* dat) /* u3j_mark(): mark jet state for gc. */ -c3_w -u3j_mark(FILE* fil_u) +u3m_quac* +u3j_mark() { - c3_w tot_w = 0; + u3m_quac** qua_u = c3_malloc(sizeof(*qua_u) * 7); - tot_w += u3a_maid(fil_u, " warm jet state", u3h_mark(u3R->jed.war_p)); - tot_w += u3a_maid(fil_u, " cold jet state", u3h_mark(u3R->jed.cod_p)); - tot_w += u3a_maid(fil_u, " hank cache", u3h_mark(u3R->jed.han_p)); - tot_w += u3a_maid(fil_u, " battery hash cache", u3h_mark(u3R->jed.bas_p)); + qua_u[0] = c3_calloc(sizeof(*qua_u[0])); + qua_u[0]->nam_c = strdup("warm jet state"); + qua_u[0]->siz_w = u3h_mark(u3R->jed.war_p) * 4; - { - c3_w han_w = 0; - u3h_walk_with(u3R->jed.han_p, _cj_mark_hank, &han_w); - tot_w += u3a_maid(fil_u, " call site cache", han_w); + qua_u[1] = c3_calloc(sizeof(*qua_u[1])); + qua_u[1]->nam_c = strdup("cold jet state"); + qua_u[1]->siz_w = u3h_mark(u3R->jed.cod_p) * 4; + + qua_u[2] = c3_calloc(sizeof(*qua_u[2])); + qua_u[2]->nam_c = strdup("hank cache"); + qua_u[2]->siz_w = u3h_mark(u3R->jed.han_p) * 4; + + qua_u[3] = c3_calloc(sizeof(*qua_u[3])); + qua_u[3]->nam_c = strdup("battery hash cache"); + qua_u[3]->siz_w = u3h_mark(u3R->jed.bas_p) * 4; + + qua_u[4] = c3_calloc(sizeof(*qua_u[4])); + qua_u[4]->nam_c = strdup("call site cache"); + u3h_walk_with(u3R->jed.han_p, _cj_mark_hank, &qua_u[4]->siz_w); + qua_u[4]->siz_w *= 4; + + c3_w sum_w = 0; + for ( c3_w i_w = 0; i_w < 5; i_w++ ) { + sum_w += qua_u[i_w]->siz_w; } + u3m_quac* tot_u = c3_calloc(sizeof(*tot_u)); + tot_u->nam_c = strdup("total jet stuff"); + if ( u3R == &(u3H->rod_u) ) { - tot_w += u3a_maid(fil_u, " hot jet state", u3h_mark(u3R->jed.hot_p)); - } + qua_u[5] = c3_calloc(sizeof(*qua_u[5])); + qua_u[5]->nam_c = strdup("hot jet state"); + qua_u[5]->siz_w = u3h_mark(u3R->jed.hot_p) * 4; + + sum_w += qua_u[5]->siz_w; + + qua_u[6] = NULL; + + tot_u->siz_w = sum_w; + tot_u->qua_u = qua_u; - return u3a_maid(fil_u, "total jet stuff", tot_w); + return tot_u; + } else { + qua_u[5] = NULL; + + tot_u->siz_w = sum_w; + tot_u->qua_u = qua_u; + + return tot_u; + } } /* u3j_free_hank(): free an entry from the hank cache. diff --git a/pkg/noun/jets.h b/pkg/noun/jets.h index 1440f7558a..81301d1199 100644 --- a/pkg/noun/jets.h +++ b/pkg/noun/jets.h @@ -296,8 +296,8 @@ /* u3j_mark(): mark jet state for gc. */ - c3_w - u3j_mark(FILE* fil_u); + u3m_quac* + u3j_mark(); /* u3j_free(): free jet state. */ diff --git a/pkg/noun/jets/e/crc32.c b/pkg/noun/jets/e/crc32.c new file mode 100644 index 0000000000..1afc473a30 --- /dev/null +++ b/pkg/noun/jets/e/crc32.c @@ -0,0 +1,59 @@ +/// @file + +#include +#include +#include "zlib.h" + +#include "jets/w.h" + +#include "noun.h" + +u3_noun +u3qe_crc32(u3_noun input_octs) +{ + u3_atom head = u3h(input_octs); + u3_atom tail = u3t(input_octs); + c3_w tel_w = u3r_met(3, tail); + c3_w hed_w; + if ( c3n == u3r_safe_word(head, &hed_w) ) { + return u3m_bail(c3__fail); + } + c3_y* input; + + if (c3y == u3a_is_cat(tail)) { + input = &tail; + } + else { + u3a_atom* vat_u = u3a_to_ptr(tail); + input = (c3_y*)vat_u->buf_w; + } + + if ( tel_w > hed_w ) { + return u3m_error("subtract-underflow"); + } + + c3_w led_w = hed_w - tel_w; + c3_w crc_w = 0; + + crc_w = crc32(crc_w, input, tel_w); + + while ( led_w > 0 ) { + c3_y byt_y = 0; + crc_w = crc32(crc_w, &byt_y, 1); + led_w--; + } + + return u3i_word(crc_w); +} + +u3_noun +u3we_crc32(u3_noun cor) +{ + u3_noun a = u3r_at(u3x_sam, cor); + + if ( (u3du(a) == c3y) && (u3ud(u3h(a)) == c3y) && (u3ud(u3t(a)) == c3y) ) { + return u3qe_crc32(a); + } else { + return u3m_bail(c3__exit); + } +} diff --git a/pkg/noun/jets/i/lagoon.c b/pkg/noun/jets/i/lagoon.c new file mode 100644 index 0000000000..13c0f2a138 --- /dev/null +++ b/pkg/noun/jets/i/lagoon.c @@ -0,0 +1,3314 @@ +/// @file + +#include "jets/q.h" +#include "jets/w.h" + +#include "noun.h" +#include "softfloat.h" +#include "softblas.h" + +#include // for pow() +#include + +#define f16_ceil(a) f16_roundToInt( a, softfloat_round_max, false ) +#define f32_ceil(a) f32_roundToInt( a, softfloat_round_max, false ) +#define f64_ceil(a) f64_roundToInt( a, softfloat_round_max, false ) +#define f128M_ceil(a, b) f128M_roundToInt( a, softfloat_round_max, false, b ) + + union half { + float16_t h; + c3_w c; + }; + + union sing { + float32_t s; + c3_w c; + }; + + union doub { + float64_t d; + c3_d c; + }; + + union quad { + float128_t q; + c3_d c[2]; + }; + + // $?(%n %u %d %z %a) + static inline void + _set_rounding(c3_w a) + { + // We could use SoftBLAS set_rounding() to set the SoftFloat + // mode as well, but it's more explicit to do it here since + // we may use SoftFloat in any given Lagoon jet and we want + // you, dear developer, to see it set here. + switch ( a ) + { + default: + u3m_bail(c3__fail); + break; + // %n - near + case c3__n: + softfloat_roundingMode = softfloat_round_near_even; + softblas_roundingMode = 'n'; + break; + // %z - zero + case c3__z: + softfloat_roundingMode = softfloat_round_minMag; + softblas_roundingMode = 'z'; + break; + // %u - up + case c3__u: + softfloat_roundingMode = softfloat_round_max; + softblas_roundingMode = 'u'; + break; + // %d - down + case c3__d: + softfloat_roundingMode = softfloat_round_min; + softblas_roundingMode = 'd'; + break; + // %a - away + case c3__a: + softfloat_roundingMode = softfloat_round_near_maxMag; + softblas_roundingMode = 'a'; + break; + } + } + +/* length of shape = x * y * z * w * ... +*/ + static inline c3_d _get_length(u3_noun shape) + { + c3_d len = 1; + while (u3_nul != shape) { + len = len * u3x_atom(u3h(shape)); + shape = u3t(shape); + } + return len; + } + +/* get dims from shape as array [x y z w ...] +*/ + static inline c3_d* _get_dims(u3_noun shape) + { + u3_atom len = u3qb_lent(shape); + c3_d len_d = u3r_chub(0, len); + c3_d* dims = (c3_d*)u3a_malloc(len_d*sizeof(c3_d)); + for (c3_d i = 0; i < len_d; i++) { + dims[i] = u3r_chub(0, u3x_atom(u3h(shape))); + shape = u3t(shape); + } + u3z(len); + return dims; + } + +/* check consistency of array shape and bloq size + |= =ray + ^- ? + .= (roll shape.meta.ray ^mul) + (dec (met bloq.meta.ray data.ray)) +*/ + static inline c3_o _check(u3_noun ray) + { + // Calculate expected size. + u3_atom shp = u3h(u3h(ray)); // (reported) shape of ray, +4 + u3_atom blq = u3h(u3t(u3h(ray))); // block size of ray, +10 + u3_atom sin = _get_length(shp); // calculated length of ray + + // Calculate actual size. + u3_atom len = u3r_met(blq, u3t(ray)); // length of ray + u3_atom dex = u3qa_dec(len); // decrement length b/c of pinned 1 + + return __(sin == dex); + } + +/* add - axpy = 1*x+y +*/ + u3_noun + u3qi_la_add_i754(u3_noun x_data, + u3_noun y_data, + u3_noun shape, + u3_noun bloq + ) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: + haxpy(len_x, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); + break; + + case 5: + saxpy(len_x, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); + break; + + case 6: + daxpy(len_x, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); + break; + + case 7: + qaxpy(len_x, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); + break; + } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); + + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + } + +/* sub - axpy = -1*y+x +*/ + u3_noun + u3qi_la_sub_i754(u3_noun x_data, + u3_noun y_data, + u3_noun shape, + u3_noun bloq + ) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: + haxpy(len_x, (float16_t){SB_REAL16_NEGONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); + break; + + case 5: + saxpy(len_x, (float32_t){SB_REAL32_NEGONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); + break; + + case 6: + daxpy(len_x, (float64_t){SB_REAL64_NEGONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); + break; + + case 7: + qaxpy(len_x, (float128_t){SB_REAL128L_NEGONE,SB_REAL128U_NEGONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); + break; + } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); + + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + } + + +/* mul - x.*y + elementwise multiplication +*/ + u3_noun + u3qi_la_mul_i754(u3_noun x_data, + u3_noun y_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: + for (c3_d i = 0; i < len_x; i++) { + ((float16_t*)y_bytes)[i] = f16_mul(((float16_t*)x_bytes)[i], ((float16_t*)y_bytes)[i]); + } + break; + + case 5: + for (c3_d i = 0; i < len_x; i++) { + ((float32_t*)y_bytes)[i] = f32_mul(((float32_t*)x_bytes)[i], ((float32_t*)y_bytes)[i]); + } + break; + + case 6: + for (c3_d i = 0; i < len_x; i++) { + ((float64_t*)y_bytes)[i] = f64_mul(((float64_t*)x_bytes)[i], ((float64_t*)y_bytes)[i]); + } + break; + + case 7: + for (c3_d i = 0; i < len_x; i++) { + f128M_mul(&(((float128_t*)y_bytes)[i]), &(((float128_t*)x_bytes)[i]), &(((float128_t*)y_bytes)[i])); + } + break; + } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); + + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + } + +/* div - x/y + elementwise division +*/ + u3_noun + u3qi_la_div_i754(u3_noun x_data, + u3_noun y_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: + for (c3_d i = 0; i < len_x; i++) { + ((float16_t*)y_bytes)[i] = f16_div(((float16_t*)x_bytes)[i], ((float16_t*)y_bytes)[i]); + } + break; + + case 5: + for (c3_d i = 0; i < len_x; i++) { + ((float32_t*)y_bytes)[i] = f32_div(((float32_t*)x_bytes)[i], ((float32_t*)y_bytes)[i]); + } + break; + + case 6: + for (c3_d i = 0; i < len_x; i++) { + ((float64_t*)y_bytes)[i] = f64_div(((float64_t*)x_bytes)[i], ((float64_t*)y_bytes)[i]); + } + break; + + case 7: + for (c3_d i = 0; i < len_x; i++) { + f128M_div(&(((float128_t*)y_bytes)[i]), &(((float128_t*)x_bytes)[i]), &(((float128_t*)y_bytes)[i])); + } + break; + } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); + + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + } + +/* mod - x % y = x - r*floor(x/r) + remainder after division +*/ + u3_noun + u3qi_la_mod_i754(u3_noun x_data, + u3_noun y_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: + for (c3_d i = 0; i < len_x; i++) { + float16_t x_val16 = ((float16_t*)x_bytes)[i]; + float16_t y_val16 = ((float16_t*)y_bytes)[i]; + // Perform division x/n + float16_t div_result16 = f16_div(x_val16, y_val16); + // Compute floor of the division result + c3_ds floor_result16 = f16_to_i64(div_result16, softfloat_round_minMag, false); + float16_t floor_float16 = i64_to_f16(floor_result16); + // Multiply n by floor(x/n) + float16_t mult_result16 = f16_mul(y_val16, floor_float16); + // Compute remainder: x - n * floor(x/n) + ((float16_t*)y_bytes)[i] = f16_sub(x_val16, mult_result16); + } + break; + + case 5: + for (c3_d i = 0; i < len_x; i++) { + float32_t x_val32 = ((float32_t*)x_bytes)[i]; + float32_t y_val32 = ((float32_t*)y_bytes)[i]; + // Perform division x/n + float32_t div_result32 = f32_div(x_val32, y_val32); + // Compute floor of the division result + c3_ds floor_result32 = f32_to_i64(div_result32, softfloat_round_minMag, false); + float32_t floor_float32 = i64_to_f32(floor_result32); + // Multiply n by floor(x/n) + float32_t mult_result32 = f32_mul(y_val32, floor_float32); + // Compute remainder: x - n * floor(x/n) + ((float32_t*)y_bytes)[i] = f32_sub(x_val32, mult_result32); + } + break; + + case 6: + for (c3_d i = 0; i < len_x; i++) { + float64_t x_val64 = ((float64_t*)x_bytes)[i]; + float64_t y_val64 = ((float64_t*)y_bytes)[i]; + // Perform division x/n + float64_t div_result64 = f64_div(x_val64, y_val64); + // Compute floor of the division result + c3_ds floor_result64 = f64_to_i64(div_result64, softfloat_round_minMag, false); + float64_t floor_float64 = i64_to_f64(floor_result64); + // Multiply n by floor(x/n) + float64_t mult_result64 = f64_mul(y_val64, floor_float64); + // Compute remainder: x - n * floor(x/n) + ((float64_t*)y_bytes)[i] = f64_sub(x_val64, mult_result64); + } + break; + + case 7: + for (c3_d i = 0; i < len_x; i++) { + float128_t x_val128 = ((float128_t*)x_bytes)[i]; + float128_t y_val128 = ((float128_t*)y_bytes)[i]; + // Perform division x/n + float128_t div_result128; + f128M_div((float128_t*)&x_val128, (float128_t*)&y_val128, (float128_t*)&div_result128); + // Compute floor of the division result + c3_ds floor_result128 = f128M_to_i64(&div_result128, softfloat_round_minMag, false); + float128_t floor_float128; + i64_to_f128M(floor_result128, &floor_float128); + // Multiply n by floor(x/n) + float128_t mult_result128; + f128M_mul(((float128_t*)&y_val128), ((float128_t*)&floor_float128), ((float128_t*)&mult_result128)); + // Compute remainder: x - n * floor(x/n) + f128M_sub(((float128_t*)&x_val128), ((float128_t*)&mult_result128), &(((float128_t*)y_bytes)[i])); + } + break; + } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); + + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + } + +/* cumsum - x[0] + x[1] + ... x[n] +*/ + u3_noun + u3qi_la_cumsum_i754(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // y_bytes is the data array (w/ leading 0x1, skipped by for range) + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, x_bytes, x_data); + + u3_noun r_data; + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: { + float16_t sum16[2]; + sum16[0] = (float16_t){SB_REAL16_ZERO}; + for (c3_d i = len_x; i > 0; i--) { + sum16[0] = f16_add(sum16[0], ((float16_t*)x_bytes)[i-1]); + } + sum16[1].v = 0x1; + r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)sum16); + break;} + + case 5: { + float32_t sum32[2]; + sum32[0] = (float32_t){SB_REAL32_ZERO}; + for (c3_d i = len_x; i > 0; i--) { + sum32[0] = f32_add(sum32[0], ((float32_t*)x_bytes)[i-1]); + } + sum32[1].v = 0x1; + r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)sum32); + break;} + + case 6: { + float64_t sum64[2]; + sum64[0] = (float64_t){SB_REAL64_ZERO}; + for (c3_d i = len_x; i > 0; i--) { + sum64[0] = f64_add(sum64[0], ((float64_t*)x_bytes)[i-1]); + } + sum64[1].v = 0x1; + r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)sum64); + break;} + + case 7: { + float128_t sum128[2]; + sum128[0] = (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO}; + for (c3_d i = len_x; i > 0; i--) { + f128M_add(&(sum128[0]), &(((float128_t*)x_bytes)[i-1]), &(sum128[0])); + } + sum128[1] = (float128_t){0x1, 0x0}; + r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)sum128); + break;} + } + + // Clean up and return. + u3a_free(x_bytes); + + return r_data; + } + +/* argmin - argmin(x) +*/ + u3_noun + u3qi_la_argmin_i754(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1, which doesn't matter here) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + c3_w min_idx = 0; + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: { + float16_t min_val16 = ((float16_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + if(f16_lt(((float16_t*)x_bytes)[i], min_val16)) { + min_val16 = ((float16_t*)x_bytes)[i]; + min_idx = (len_x - i - 1); + } + } + break;} + + case 5: { + float32_t min_val32 = ((float32_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + if(f32_lt(((float32_t*)x_bytes)[i], min_val32)) { + min_val32 = ((float32_t*)x_bytes)[i]; + min_idx = (len_x - i - 1); + } + } + break;} + + case 6: { + float64_t min_val64 = ((float64_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + if(f64_lt(((float64_t*)x_bytes)[i], min_val64)) { + min_val64 = ((float64_t*)x_bytes)[i]; + min_idx = (len_x - i - 1); + } + } + break;} + + case 7: { + float128_t min_val128 = ((float128_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + if(f128M_lt(&(((float128_t*)x_bytes)[i]), &min_val128)) { + min_val128 = *f128M_min(&min_val128, &((float128_t*)x_bytes)[i]); + min_idx = (len_x - i - 1); + } + } + break;} + } + + u3_noun r_data = u3i_chub(min_idx); + + return r_data; + } + +/* argmax - argmax(x) +*/ + u3_noun + u3qi_la_argmax_i754(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1, which doesn't matter here) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + c3_w max_idx = 0; + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: { + float16_t max_val16 = ((float16_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + if(f16_gt(((float16_t*)x_bytes)[i], max_val16)) { + max_val16 = ((float16_t*)x_bytes)[i]; + max_idx = (len_x - i - 1); + } + } + break;} + + case 5: { + float32_t max_val32 = ((float32_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + if(f32_gt(((float32_t*)x_bytes)[i], max_val32)) { + max_val32 = ((float32_t*)x_bytes)[i]; + max_idx = (len_x - i - 1); + } + } + break;} + + case 6: { + float64_t max_val64 = ((float64_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + if(f64_gt(((float64_t*)x_bytes)[i], max_val64)) { + max_val64 = ((float64_t*)x_bytes)[i]; + max_idx = (len_x - i - 1); + } + } + break;} + + case 7: { + float128_t max_val128 = ((float128_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + if(f128M_gt(&(((float128_t*)x_bytes)[i]), &max_val128)) { + max_val128 = *f128M_max(&max_val128, &((float128_t*)x_bytes)[i]); + max_idx = (len_x - i - 1); + } + } + break;} + } + + u3_noun r_data = u3i_chub(max_idx); + + return r_data; + } + +/* ravel - x -> ~[x[0], x[1], ... x[n]] + entire nd-array busted out as a linear list +*/ + u3_noun + u3qi_la_ravel_i754(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + // r_data is the result noun of [data] + u3_noun r_data = u3_nul; + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: + for (c3_d i = 0; i < len_x; i++) { + float16_t x_val16 = ((float16_t*)x_bytes)[i]; + r_data = u3nc(u3i_word(x_val16.v), r_data); + } + break; + + case 5: + for (c3_d i = 0; i < len_x; i++) { + float32_t x_val32 = ((float32_t*)x_bytes)[i]; + r_data = u3nc(u3i_word(x_val32.v), r_data); + } + break; + + case 6: + for (c3_d i = 0; i < len_x; i++) { + float64_t x_val64 = ((float64_t*)x_bytes)[i]; + r_data = u3nc(u3i_chub(x_val64.v), r_data); + } + break; + + case 7: + for (c3_d i = 0; i < len_x; i++) { + float128_t x_val128 = ((float128_t*)x_bytes)[i]; + r_data = u3nc(u3i_chubs(2, (c3_d*)&(x_val128.v)), r_data); + } + break; + } + + // Clean up and return. + u3a_free(x_bytes); + + return r_data; + } + +/* min - min(x,y) +*/ + u3_noun + u3qi_la_min_i754(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/ leading 0x1, skipped by for range) + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, x_bytes, x_data); + + u3_noun r_data; + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: { + float16_t min_val16 = ((float16_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + min_val16 = f16_min(min_val16, ((float16_t*)x_bytes)[i]); + } + float16_t r16[2]; + r16[0] = min_val16; + r16[1].v = 0x1; + r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)r16); + break;} + + case 5: { + float32_t min_val32 = ((float32_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + min_val32 = f32_min(min_val32, ((float32_t*)x_bytes)[i]); + } + float32_t r32[2]; + r32[0] = min_val32; + r32[1].v = 0x1; + r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)r32); + break;} + + case 6: { + float64_t min_val64 = ((float64_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + min_val64 = f64_min(min_val64, ((float64_t*)x_bytes)[i]); + } + float64_t r64[2]; + r64[0] = min_val64; + r64[1].v = 0x1; + r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)r64); + break;} + + case 7: { + float128_t min_val128 = ((float128_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + min_val128 = *f128M_min(&min_val128, &((float128_t*)x_bytes)[i]); + } + float128_t r128[2]; + r128[0] = min_val128; + r128[1] = (float128_t){0x1, 0x0}; + r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)r128); + break;} + } + + // Clean up and return. + u3a_free(x_bytes); + + return r_data; + } + +/* max - max(x,y) +*/ + u3_noun + u3qi_la_max_i754(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/ leading 0x1, skipped by for range) + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, x_bytes, x_data); + + u3_noun r_data; + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: { + float16_t max_val16 = ((float16_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + max_val16 = f16_max(max_val16, ((float16_t*)x_bytes)[i]); + } + float16_t r16[2]; + r16[0] = max_val16; + r16[1].v = 0x1; + r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)r16); + break;} + + case 5: { + float32_t max_val32 = ((float32_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + max_val32 = f32_max(max_val32, ((float32_t*)x_bytes)[i]); + } + float32_t r32[2]; + r32[0] = max_val32; + r32[1].v = 0x1; + r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)r32); + break;} + + case 6: { + float64_t max_val64 = ((float64_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + max_val64 = f64_max(max_val64, ((float64_t*)x_bytes)[i]); + } + float64_t r64[2]; + r64[0] = max_val64; + r64[1].v = 0x1; + r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)r64); + break;} + + case 7: { + float128_t max_val128 = ((float128_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + max_val128 = *f128M_max(&max_val128, &((float128_t*)x_bytes)[i]); + } + float128_t r128[2]; + r128[0] = max_val128; + r128[1] = (float128_t){0x1, 0x0}; + r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)r128); + break;} + } + + // Clean up and return. + u3a_free(x_bytes); + + return r_data; + } + +/* abs - |x| +*/ + u3_noun + u3qi_la_abs_i754(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/ leading 0x1, skipped by for range) + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, x_bytes, x_data); + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: + for (c3_d i = 0; i < len_x; i++) { + ((float16_t*)x_bytes)[i] = f16_abs(((float16_t*)x_bytes)[i]); + } + break; + + case 5: + for (c3_d i = 0; i < len_x; i++) { + ((float32_t*)x_bytes)[i] = f32_abs(((float32_t*)x_bytes)[i]); + } + break; + + case 6: + for (c3_d i = 0; i < len_x; i++) { + ((float64_t*)x_bytes)[i] = f64_abs(((float64_t*)x_bytes)[i]); + } + break; + + case 7: + for (c3_d i = 0; i < len_x; i++) { + ((float128_t*)x_bytes)[i] = f128_abs(((float128_t*)x_bytes)[i]); + } + break; + } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes); + + // Clean up and return. + u3a_free(x_bytes); + + return r_data; + } + +/* gth - x > y +*/ + u3_noun + u3qi_la_gth_i754(u3_noun x_data, + u3_noun y_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: + for (c3_d i = 0; i < len_x; i++) { + float16_t x_val16 = ((float16_t*)x_bytes)[i]; + float16_t y_val16 = ((float16_t*)y_bytes)[i]; + ((float16_t*)y_bytes)[i] = f16_gt(x_val16, y_val16) ? (float16_t){SB_REAL16_ONE} : (float16_t){SB_REAL16_ZERO}; + } + break; + + case 5: + for (c3_d i = 0; i < len_x; i++) { + float32_t x_val32 = ((float32_t*)x_bytes)[i]; + float32_t y_val32 = ((float32_t*)y_bytes)[i]; + ((float32_t*)y_bytes)[i] = f32_gt(x_val32, y_val32) ? (float32_t){SB_REAL32_ONE} : (float32_t){SB_REAL32_ZERO}; + } + break; + + case 6: + for (c3_d i = 0; i < len_x; i++) { + float64_t x_val64 = ((float64_t*)x_bytes)[i]; + float64_t y_val64 = ((float64_t*)y_bytes)[i]; + ((float64_t*)y_bytes)[i] = f64_gt(x_val64, y_val64) ? (float64_t){SB_REAL64_ONE} : (float64_t){SB_REAL64_ZERO}; + } + break; + + case 7: + for (c3_d i = 0; i < len_x; i++) { + float128_t x_val128 = ((float128_t*)x_bytes)[i]; + float128_t y_val128 = ((float128_t*)y_bytes)[i]; + ((float128_t*)y_bytes)[i] = f128M_gt(((float128_t*)&x_val128), ((float128_t*)&y_val128)) ? (float128_t){SB_REAL128L_ONE, SB_REAL128U_ONE} : (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO}; + } + break; + } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); + + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + } + +/* gte - x > y +*/ + u3_noun + u3qi_la_gte_i754(u3_noun x_data, + u3_noun y_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: + for (c3_d i = 0; i < len_x; i++) { + float16_t x_val16 = ((float16_t*)x_bytes)[i]; + float16_t y_val16 = ((float16_t*)y_bytes)[i]; + ((float16_t*)y_bytes)[i] = f16_ge(x_val16, y_val16) ? (float16_t){SB_REAL16_ONE} : (float16_t){SB_REAL16_ZERO}; + } + break; + + case 5: + for (c3_d i = 0; i < len_x; i++) { + float32_t x_val32 = ((float32_t*)x_bytes)[i]; + float32_t y_val32 = ((float32_t*)y_bytes)[i]; + ((float32_t*)y_bytes)[i] = f32_ge(x_val32, y_val32) ? (float32_t){SB_REAL32_ONE} : (float32_t){SB_REAL32_ZERO}; + } + break; + + case 6: + for (c3_d i = 0; i < len_x; i++) { + float64_t x_val64 = ((float64_t*)x_bytes)[i]; + float64_t y_val64 = ((float64_t*)y_bytes)[i]; + ((float64_t*)y_bytes)[i] = f64_ge(x_val64, y_val64) ? (float64_t){SB_REAL64_ONE} : (float64_t){SB_REAL64_ZERO}; + } + break; + + case 7: + for (c3_d i = 0; i < len_x; i++) { + float128_t x_val128 = ((float128_t*)x_bytes)[i]; + float128_t y_val128 = ((float128_t*)y_bytes)[i]; + ((float128_t*)y_bytes)[i] = f128M_ge(((float128_t*)&x_val128), ((float128_t*)&y_val128)) ? (float128_t){SB_REAL128L_ONE, SB_REAL128U_ONE} : (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO}; + } + break; + } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); + + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + } + +/* lth - x > y +*/ + u3_noun + u3qi_la_lth_i754(u3_noun x_data, + u3_noun y_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: + for (c3_d i = 0; i < len_x; i++) { + float16_t x_val16 = ((float16_t*)x_bytes)[i]; + float16_t y_val16 = ((float16_t*)y_bytes)[i]; + ((float16_t*)y_bytes)[i] = f16_lt(x_val16, y_val16) ? (float16_t){SB_REAL16_ONE} : (float16_t){SB_REAL16_ZERO}; + } + break; + + case 5: + for (c3_d i = 0; i < len_x; i++) { + float32_t x_val32 = ((float32_t*)x_bytes)[i]; + float32_t y_val32 = ((float32_t*)y_bytes)[i]; + ((float32_t*)y_bytes)[i] = f32_lt(x_val32, y_val32) ? (float32_t){SB_REAL32_ONE} : (float32_t){SB_REAL32_ZERO}; + } + break; + + case 6: + for (c3_d i = 0; i < len_x; i++) { + float64_t x_val64 = ((float64_t*)x_bytes)[i]; + float64_t y_val64 = ((float64_t*)y_bytes)[i]; + ((float64_t*)y_bytes)[i] = f64_lt(x_val64, y_val64) ? (float64_t){SB_REAL64_ONE} : (float64_t){SB_REAL64_ZERO}; + } + break; + + case 7: + for (c3_d i = 0; i < len_x; i++) { + float128_t x_val128 = ((float128_t*)x_bytes)[i]; + float128_t y_val128 = ((float128_t*)y_bytes)[i]; + ((float128_t*)y_bytes)[i] = f128M_lt(((float128_t*)&x_val128), ((float128_t*)&y_val128)) ? (float128_t){SB_REAL128L_ONE, SB_REAL128U_ONE} : (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO}; + } + break; + } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); + + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + } + +/* lte - x > y +*/ + u3_noun + u3qi_la_lte_i754(u3_noun x_data, + u3_noun y_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: + for (c3_d i = 0; i < len_x; i++) { + float16_t x_val16 = ((float16_t*)x_bytes)[i]; + float16_t y_val16 = ((float16_t*)y_bytes)[i]; + ((float16_t*)y_bytes)[i] = f16_le(x_val16, y_val16) ? (float16_t){SB_REAL16_ONE} : (float16_t){SB_REAL16_ZERO}; + } + break; + + case 5: + for (c3_d i = 0; i < len_x; i++) { + float32_t x_val32 = ((float32_t*)x_bytes)[i]; + float32_t y_val32 = ((float32_t*)y_bytes)[i]; + ((float32_t*)y_bytes)[i] = f32_le(x_val32, y_val32) ? (float32_t){SB_REAL32_ONE} : (float32_t){SB_REAL32_ZERO}; + } + break; + + case 6: + for (c3_d i = 0; i < len_x; i++) { + float64_t x_val64 = ((float64_t*)x_bytes)[i]; + float64_t y_val64 = ((float64_t*)y_bytes)[i]; + ((float64_t*)y_bytes)[i] = f64_le(x_val64, y_val64) ? (float64_t){SB_REAL64_ONE} : (float64_t){SB_REAL64_ZERO}; + } + break; + + case 7: + for (c3_d i = 0; i < len_x; i++) { + float128_t x_val128 = ((float128_t*)x_bytes)[i]; + float128_t y_val128 = ((float128_t*)y_bytes)[i]; + ((float128_t*)y_bytes)[i] = f128M_le(((float128_t*)&x_val128), ((float128_t*)&y_val128)) ? (float128_t){SB_REAL128L_ONE, SB_REAL128U_ONE} : (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO}; + } + break; + } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); + + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + } + +/* adds - axpy = 1*x+[n] +*/ + u3_noun + u3qi_la_adds_i754(u3_noun x_data, + u3_noun n, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + + float16_t n16; + float32_t n32; + float64_t n64; + float128_t n128; + + // Switch on the block size. We assume that n fits in the target block size; Hoon typecheck should prevent. + switch (u3x_atom(bloq)) { + case 4: + u3r_bytes(0, 2, (c3_y*)&(n16.v), n); + // set y to [n] + for (c3_d i = 0; i < len_x; i++) { + ((float16_t*)y_bytes)[i] = n16; + } + haxpy(len_x, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); + break; + + case 5: + u3r_bytes(0, 4, (c3_y*)&(n32.v), n); + // set y to [n] + for (c3_d i = 0; i < len_x; i++) { + ((float32_t*)y_bytes)[i] = n32; + } + saxpy(len_x, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); + break; + + case 6: + u3r_bytes(0, 8, (c3_y*)&(n64.v), n); + // set y to [n] + for (c3_d i = 0; i < len_x; i++) { + ((float64_t*)y_bytes)[i] = n64; + } + daxpy(len_x, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); + break; + + case 7: + u3r_bytes(0, 16, (c3_y*)&(n128.v[0]), n); + // set y to [n] + for (c3_d i = 0; i < len_x; i++) { + ((float128_t*)y_bytes)[i] = (float128_t){n128.v[0], n128.v[1]}; + } + qaxpy(len_x, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); + break; + } + + // r_data is the result noun of [data] + y_bytes[syz_x] = 0x1; // pin head + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); + + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + } + +/* subs - axpy = -1*[n]+x +*/ + u3_noun + u3qi_la_subs_i754(u3_noun x_data, + u3_noun n, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + // y_bytes is the data array (w/o leading 0x1) + c3_y* y_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + + float16_t n16; + float32_t n32; + float64_t n64; + float128_t n128; + + // Switch on the block size. We assume that n fits in the target block size; Hoon typecheck should prevent. + switch (u3x_atom(bloq)) { + case 4: + u3r_bytes(0, 2, (c3_y*)&(n16.v), n); + // set y to [n] + for (c3_d i = 0; i < len_x; i++) { + ((float16_t*)y_bytes)[i] = n16; + } + haxpy(len_x, (float16_t){SB_REAL16_NEGONE}, (float16_t*)y_bytes, 1, (float16_t*)x_bytes, 1); + break; + + case 5: + u3r_bytes(0, 4, (c3_y*)&(n32.v), n); + // set y to [n] + for (c3_d i = 0; i < len_x; i++) { + ((float32_t*)y_bytes)[i] = n32; + } + saxpy(len_x, (float32_t){SB_REAL32_NEGONE}, (float32_t*)y_bytes, 1, (float32_t*)x_bytes, 1); + break; + + case 6: + u3r_bytes(0, 8, (c3_y*)&(n64.v), n); + // set y to [n] + for (c3_d i = 0; i < len_x; i++) { + ((float64_t*)y_bytes)[i] = n64; + } + daxpy(len_x, (float64_t){SB_REAL64_NEGONE}, (float64_t*)y_bytes, 1, (float64_t*)x_bytes, 1); + break; + + case 7: + u3r_bytes(0, 16, (c3_y*)&(n128.v[0]), n); + // set y to [n] + for (c3_d i = 0; i < len_x; i++) { + ((float128_t*)y_bytes)[i] = (float128_t){n128.v[0], n128.v[1]}; + } + qaxpy(len_x, (float128_t){SB_REAL128L_NEGONE,SB_REAL128U_NEGONE}, (float128_t*)y_bytes, 1, (float128_t*)x_bytes, 1); + break; + } + + // r_data is the result noun of [data] + x_bytes[syz_x] = 0x1; // pin head + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes); + + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + } + +/* muls - ?scal n * x + elementwise multiplication +*/ + u3_noun + u3qi_la_muls_i754(u3_noun x_data, + u3_noun n, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + x_bytes[syz_x] = 0x1; // pin head + + float16_t n16; + float32_t n32; + float64_t n64; + float128_t n128; + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: + u3r_bytes(0, 2, (c3_y*)&(n16.v), n); + hscal(len_x, n16, (float16_t*)x_bytes, 1); + break; + + case 5: + u3r_bytes(0, 4, (c3_y*)&(n32.v), n); + sscal(len_x, n32, (float32_t*)x_bytes, 1); + break; + + case 6: + u3r_bytes(0, 8, (c3_y*)&(n64.v), n); + dscal(len_x, n64, (float64_t*)x_bytes, 1); + break; + + case 7: + u3r_bytes(0, 16, (c3_y*)&(n128.v[0]), n); + qscal(len_x, n128, (float128_t*)x_bytes, 1); + break; + } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes); + + // Clean up and return. + u3a_free(x_bytes); + + return r_data; + } + +/* divs - ?scal 1/n * x + elementwise division +*/ + u3_noun + u3qi_la_divs_i754(u3_noun x_data, + u3_noun n, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + x_bytes[syz_x] = 0x1; // pin head + + float16_t in16; + float32_t in32; + float64_t in64; + float128_t in128; + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: + // XX note that in16 is doing double duty here + u3r_bytes(0, 2, (c3_y*)&(in16.v), n); + in16 = f16_div((float16_t){SB_REAL16_ONE}, in16); + hscal(len_x, in16, (float16_t*)x_bytes, 1); + break; + + case 5: + // XX note that in32 is doing double duty here + u3r_bytes(0, 4, (c3_y*)&(in32.v), n); + in32 = f32_div((float32_t){SB_REAL32_ONE}, in32); + sscal(len_x, in32, (float32_t*)x_bytes, 1); + break; + + case 6: + // XX note that in64 is doing double duty here + u3r_bytes(0, 8, (c3_y*)&(in64.v), n); + in64 = f64_div((float64_t){SB_REAL64_ONE}, in64); + dscal(len_x, in64, (float64_t*)x_bytes, 1); + break; + + case 7: + // XX note that in128 is doing double duty here + u3r_bytes(0, 16, (c3_y*)&(in128.v[0]), n); + f128M_div(&((float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}), &in128, &in128); + qscal(len_x, in128, (float128_t*)x_bytes, 1); + break; + } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes); + + // Clean up and return. + u3a_free(x_bytes); + + return r_data; + } + +/* mods - x % [n] = x - r*floor(x/r) + remainder after scalar division +*/ + u3_noun + u3qi_la_mods_i754(u3_noun x_data, + u3_noun n, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + // we reuse it for results for parsimony + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, x_bytes, x_data); + + float16_t n16, in16; + float32_t n32, in32; + float64_t n64, in64; + float128_t n128, in128; + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: + u3r_bytes(0, 2, (c3_y*)&(n16.v), n); + in16 = f16_div((float16_t){SB_REAL16_ONE}, n16); + + for (c3_d i = 0; i < len_x; i++) { + float16_t x_val16 = ((float16_t*)x_bytes)[i]; + // Perform division x/n + float16_t div_result16 = f16_mul(in16, x_val16); + // Compute floor of the division result + c3_ds floor_result16 = f16_to_i64(div_result16, softfloat_round_minMag, false); + float16_t floor_float16 = i64_to_f16(floor_result16); + // Multiply n by floor(x/n) + float16_t mult_result16 = f16_mul(n16, floor_float16); + // Compute remainder: x - n * floor(x/n) + ((float16_t*)x_bytes)[i] = f16_sub(x_val16, mult_result16); + } + break; + + case 5: + u3r_bytes(0, 4, (c3_y*)&(n32.v), n); + in32 = f32_div((float32_t){SB_REAL32_ONE}, n32); + + for (c3_d i = 0; i < len_x; i++) { + float32_t x_val32 = ((float32_t*)x_bytes)[i]; + // Perform division x/n + float32_t div_result32 = f32_mul((float32_t)in32, (float32_t)x_val32); + // Compute floor of the division result + c3_ds floor_result32 = f32_to_i64(div_result32, softfloat_round_minMag, false); + float32_t floor_float32 = i64_to_f32(floor_result32); + // Multiply n by floor(x/n) + float32_t mult_result32 = f32_mul(n32, floor_float32); + // Compute remainder: x - n * floor(x/n) + ((float32_t*)x_bytes)[i] = f32_sub(x_val32, mult_result32); + } + break; + + case 6: + u3r_bytes(0, 8, (c3_y*)&(n64.v), n); + in64 = f64_div((float64_t){SB_REAL64_ONE}, n64); + + for (c3_d i = 0; i < len_x; i++) { + float64_t x_val64 = ((float64_t*)x_bytes)[i]; + // Perform division x/n + float64_t div_result64 = f64_mul(in64, x_val64); + // Compute floor of the division result + c3_ds floor_result64 = f64_to_i64(div_result64, softfloat_round_minMag, false); + float64_t floor_float64 = i64_to_f64(floor_result64); + // Multiply n by floor(x/n) + float64_t mult_result64 = f64_mul(n64, floor_float64); + // Compute remainder: x - n * floor(x/n) + ((float64_t*)x_bytes)[i] = f64_sub(x_val64, mult_result64); + } + break; + + case 7: + u3r_bytes(0, 16, (c3_y*)&(n128.v[0]), n); + f128M_div(&((float128_t){SB_REAL128L_ONE,SB_REAL128U_ZERO}), &n128, &in128); + + for (c3_d i = 0; i < len_x; i++) { + float128_t x_val128 = ((float128_t*)x_bytes)[i]; + // Perform division x/n + float128_t div_result128; + f128M_mul((float128_t*)&in128, (float128_t*)&x_val128, (float128_t*)&div_result128); + // Compute floor of the division result + c3_ds floor_result128 = f128M_to_i64(&div_result128, softfloat_round_minMag, false); + float128_t floor_float128; + i64_to_f128M(floor_result128, &floor_float128); + // Multiply n by floor(x/n) + float128_t mult_result128; + f128M_mul(((float128_t*)&n128), ((float128_t*)&floor_float128), ((float128_t*)&mult_result128)); + // Compute remainder: x - n * floor(x/n) + f128M_sub(((float128_t*)&x_val128), ((float128_t*)&mult_result128), &(((float128_t*)x_bytes)[i])); + } + break; + } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes); + + // Clean up and return. + u3a_free(x_bytes); + + return r_data; + } + +/* dot - ?dot = x · y +*/ + u3_noun + u3qi_la_dot_i754(u3_noun x_data, + u3_noun y_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); + + u3_noun r_data; + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: { + float16_t r16[2]; + r16[0] = hdot(len_x, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); + r16[1].v = 0x1; + r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)r16); + break;} + + case 5: { + float32_t r32[2]; + r32[0] = sdot(len_x, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); + r32[1].v = 0x1; + r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)r32); + break;} + + case 6: { + float64_t r64[2]; + r64[0] = ddot(len_x, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); + r64[1].v = 0x1; + r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)r64); + break;} + + case 7: { + float128_t r128[2]; + r128[0] = qdot(len_x, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); + r128[1] = (float128_t){0x1, 0x0}; + r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)r128); + break;} + } + + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + } + +/* diag - diag(x) +*/ + u3_noun + u3qi_la_diag(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + // Assert length of dims is 2. + if (u3qb_lent(shape) != 2) { + return u3m_bail(c3__exit); + } + // Unpack shape into an array of dimensions. + c3_d *dims = _get_dims(shape); + if (dims[0] != dims[1]) { + return u3m_bail(c3__exit); + } + + // Unpack the data as a byte array. We assume total length < 2**64. + c3_d len_x = _get_length(shape); + c3_d syz_x = len_x * pow(2, bloq - 3); + c3_d wyd = pow(2, bloq - 3); + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, x_bytes, x_data); + c3_d syz_y = wyd * dims[1]; + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_y+1)*sizeof(c3_y)); + + u3_noun r_data; + + // Grab the index at i*n_x+j in bytes; put it at j. + for (c3_d i = 0; i < dims[1]; i++) { + // Scan across whole field width. + for (c3_y k = 0; k < wyd; k++) { + y_bytes[i*wyd+k] = x_bytes[(i*dims[0]+i)*wyd+k]; + } + } + y_bytes[syz_y] = 0x1; // pin head + + // Unpack the result back into a noun. + r_data = u3i_bytes((syz_y+1)*sizeof(c3_y), y_bytes); + + u3a_free(x_bytes); + u3a_free(y_bytes); + u3a_free(dims); + + return r_data; + } + +/* transpose - x' +*/ + u3_noun + u3qi_la_transpose(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + // Assert length of dims is 2. + if (u3qb_lent(shape) != 2) { + return u3m_bail(c3__exit); + } + // Unpack shape into an array of dimensions. + c3_d *dims = _get_dims(shape); + + // Unpack the data as a byte array. We assume total length < 2**64. + c3_d len_x = _get_length(shape); + c3_d syz_x = len_x * pow(2, bloq - 3); + c3_d wyd = pow(2, bloq - 3); + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, x_bytes, x_data); + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + + u3_noun r_data; + + // Grab the index at i*n_x+j in bytes; put it at j. + for (c3_d i = 0; i < dims[1]; i++) { + for (c3_d j = 0; j < dims[0]; j++) { + // Scan across whole field width. + for (c3_y k = 0; k < wyd; k++) { + y_bytes[(j*dims[1]+i)*wyd+k] = x_bytes[(i*dims[0]+j)*wyd+k]; + } + } + } + y_bytes[syz_x] = 0x1; // pin head + + // Unpack the result back into a noun. + r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); + + u3a_free(x_bytes); + u3a_free(y_bytes); + u3a_free(dims); + + return r_data; + } + +/* linspace - [a a+(b-a)/n ... b] +*/ + u3_noun + u3qi_la_linspace_i754(u3_noun a, + u3_noun b, + u3_noun n, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + u3_noun r_data; + + switch (u3x_atom(bloq)) { + case 4: { + float16_t a16, b16; + u3r_bytes(0, 2, (c3_y*)&(a16.v), a); + u3r_bytes(0, 2, (c3_y*)&(b16.v), b); + float16_t span16 = f16_sub(b16, a16); + float16_t interval16 = f16_div(span16, i32_to_f16(n-1)); + c3_y* x_bytes16 = (c3_y*)u3a_malloc((n*2+1)*sizeof(c3_y)); + for (c3_d i = 1; i < n-1; i++) { + ((float16_t*)x_bytes16)[i] = f16_add(a16, f16_mul(i32_to_f16(i), interval16)); + } + // Assign in reverse order so that n=1 case is correctly left-hand bound. + ((float16_t*)x_bytes16)[n-1] = b16; + ((float16_t*)x_bytes16)[0] = a16; + x_bytes16[n*2] = 0x1; // pin head + r_data = u3i_bytes((n*2+1)*sizeof(c3_y), x_bytes16); + u3a_free(x_bytes16); + break;} + + case 5: { + float32_t a32, b32; + u3r_bytes(0, 4, (c3_y*)&(a32.v), a); + u3r_bytes(0, 4, (c3_y*)&(b32.v), b); + float32_t span32 = f32_sub(b32, a32); + float32_t interval32 = f32_div(span32, i32_to_f32(n-1)); + c3_y* x_bytes32 = (c3_y*)u3a_malloc((n*4+1)*sizeof(c3_y)); + for (c3_d i = 1; i < n-1; i++) { + ((float32_t*)x_bytes32)[i] = f32_add(a32, f32_mul(i32_to_f32(i), interval32)); + } + ((float32_t*)x_bytes32)[n-1] = b32; + ((float32_t*)x_bytes32)[0] = a32; + x_bytes32[n*4] = 0x1; // pin head + r_data = u3i_bytes((n*4+1)*sizeof(c3_y), x_bytes32); + u3a_free(x_bytes32); + break;} + + case 6: { + float64_t a64, b64; + u3r_bytes(0, 8, (c3_y*)&(a64.v), a); + u3r_bytes(0, 8, (c3_y*)&(b64.v), b); + float64_t span64 = f64_sub(b64, a64); + float64_t interval64 = f64_div(span64, i32_to_f64(n-1)); + c3_y* x_bytes64 = (c3_y*)u3a_malloc((n*8+1)*sizeof(c3_y)); + for (c3_d i = 1; i < n-1; i++) { + ((float64_t*)x_bytes64)[i] = f64_add(a64, f64_mul(i32_to_f64(i), interval64)); + } + ((float64_t*)x_bytes64)[n-1] = b64; + ((float64_t*)x_bytes64)[0] = a64; + x_bytes64[n*8] = 0x1; // pin head + r_data = u3i_bytes((n*8+1)*sizeof(c3_y), x_bytes64); + u3a_free(x_bytes64); + break;} + + case 7: { + float128_t a128, b128; + u3r_bytes(0, 16, (c3_y*)&(a128.v[0]), a); + u3r_bytes(0, 16, (c3_y*)&(b128.v[0]), b); + float128_t span128; + f128M_sub(&b128, &a128, &span128); + float128_t interval128; + float128_t n128; + i32_to_f128M(n-1, &n128); + f128M_div(&span128, &n128, &interval128); + c3_y* x_bytes128 = (c3_y*)u3a_malloc((n*16+1)*sizeof(c3_y)); + float128_t i128; + for (c3_d i = 1; i < n-1; i++) { + i32_to_f128M(i, &i128); + f128M_mul(&i128, &interval128, &((float128_t*)x_bytes128)[i]); + f128M_add(&a128, &((float128_t*)x_bytes128)[i], &((float128_t*)x_bytes128)[i]); + } + ((float128_t*)x_bytes128)[n-1] = b128; + ((float128_t*)x_bytes128)[0] = a128; + x_bytes128[n*16] = 0x1; // pin head + r_data = u3i_bytes((n*16+1)*sizeof(c3_y), x_bytes128); + u3a_free(x_bytes128); + break;} + } + + return r_data; + } + +/* range - [a a+d ... b] +*/ + u3_noun + u3qi_la_range_i754(u3_noun a, + u3_noun b, + u3_noun d, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + u3_noun r_data; + + switch (u3x_atom(bloq)) { + case 4: { + float16_t a16, b16, interval16; + u3r_bytes(0, 2, (c3_y*)&(a16.v), a); + u3r_bytes(0, 2, (c3_y*)&(b16.v), b); + u3r_bytes(0, 2, (c3_y*)&(interval16.v), d); + c3_d n16 = f16_to_i64(f16_ceil(f16_div(f16_sub(b16, a16), interval16)), softfloat_round_minMag, false); + c3_y* x_bytes16 = (c3_y*)u3a_malloc(((n16+1)*2)*sizeof(c3_y)); + ((float16_t*)x_bytes16)[0] = a16; + for (c3_d i = 1; i < n16; i++) { + ((float16_t*)x_bytes16)[i] = f16_add(a16, f16_mul(i32_to_f16(i), interval16)); + } + ((float16_t*)x_bytes16)[n16].v = 0x1; // pin head + r_data = u3i_bytes(((n16+1)*2)*sizeof(c3_y), x_bytes16); + u3a_free(x_bytes16); + break;} + + case 5: { + float32_t a32, b32, interval32; + u3r_bytes(0, 4, (c3_y*)&(a32.v), a); + u3r_bytes(0, 4, (c3_y*)&(b32.v), b); + u3r_bytes(0, 4, (c3_y*)&(interval32.v), d); + c3_d n32 = f32_to_i64(f32_ceil(f32_div(f32_sub(b32, a32), interval32)), softfloat_round_minMag, false); + c3_y* x_bytes32 = (c3_y*)u3a_malloc(((n32+1)*4)*sizeof(c3_y)); + ((float32_t*)x_bytes32)[0] = a32; + for (c3_d i = 1; i < n32; i++) { + ((float32_t*)x_bytes32)[i] = f32_add(a32, f32_mul(i32_to_f32(i), interval32)); + } + ((float32_t*)x_bytes32)[n32].v = 0x1; // pin head + r_data = u3i_bytes(((n32+1)*4)*sizeof(c3_y), x_bytes32); + u3a_free(x_bytes32); + break;} + + case 6: { + float64_t a64, b64, interval64; + u3r_bytes(0, 8, (c3_y*)&(a64.v), a); + u3r_bytes(0, 8, (c3_y*)&(b64.v), b); + u3r_bytes(0, 8, (c3_y*)&(interval64.v), d); + c3_d n64 = f64_to_i64(f64_ceil(f64_div(f64_sub(b64, a64), interval64)), softfloat_round_minMag, false); + c3_y* x_bytes64 = (c3_y*)u3a_malloc(((n64+1)*8)*sizeof(c3_y)); + ((float64_t*)x_bytes64)[0] = a64; + for (c3_d i = 1; i < n64; i++) { + ((float64_t*)x_bytes64)[i] = f64_add(a64, f64_mul(i32_to_f64(i), interval64)); + } + ((float64_t*)x_bytes64)[n64].v = 0x1; // pin head + r_data = u3i_bytes(((n64+1)*8)*sizeof(c3_y), x_bytes64); + u3a_free(x_bytes64); + break;} + + case 7: { + float128_t a128, b128, interval128; + u3r_bytes(0, 16, (c3_y*)&(a128.v[0]), a); + u3r_bytes(0, 16, (c3_y*)&(b128.v[0]), b); + u3r_bytes(0, 16, (c3_y*)&(interval128.v[0]), d); + float128_t tmp; + f128M_sub(&b128, &a128, &tmp); + f128M_div(&tmp, &interval128, &tmp); + f128M_ceil(&tmp, &tmp); + c3_d n128 = f128M_to_i64(&tmp, softfloat_round_minMag, false); + c3_y* x_bytes128 = (c3_y*)u3a_malloc(((n128+1)*16)*sizeof(c3_y)); + float128_t i128; + ((float128_t*)x_bytes128)[0] = a128; + for (c3_d i = 1; i < n128; i++) { + i32_to_f128M(i, &i128); + f128M_mul(&i128, &interval128, &((float128_t*)x_bytes128)[i]); + f128M_add(&a128, &((float128_t*)x_bytes128)[i], &((float128_t*)x_bytes128)[i]); + } + ((float128_t*)x_bytes128)[n128].v[0] = 0x1; // pin head + ((float128_t*)x_bytes128)[n128].v[1] = 0x0; // pin head + r_data = u3i_bytes(((n128+1)*16)*sizeof(c3_y), x_bytes128); + u3a_free(x_bytes128); + break;} + } + + return r_data; + } + +/* trace - tr(x) +*/ + u3_noun + u3qi_la_trace_i754(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + u3_noun d_data = u3qi_la_diag(x_data, shape, bloq); + c3_d len_x0 = _get_dims(shape)[0]; + u3_noun r_data = u3qi_la_dot_i754(d_data, d_data, u3nt(len_x0, 0x1, u3_nul), u3k(bloq)); + return r_data; + } + +/* mmul +*/ + u3_noun + u3qi_la_mmul_i754(u3_noun x_data, + u3_noun y_data, + u3_noun x_shape, + u3_noun y_shape, + u3_noun bloq) + { + // Unpack the data as a byte array. We assume total length < 2**64. + c3_d M = u3x_atom(u3h(x_shape)); + c3_d Na= u3x_atom(u3h(u3t(x_shape))); + c3_d Nb= u3x_atom(u3h(y_shape)); + c3_d P = u3x_atom(u3h(u3t(y_shape))); + + if ((u3_nul != u3t(u3t(x_shape))) || + (u3_nul != u3t(u3t(y_shape))) || + (Na != Nb)) { + return u3m_bail(c3__exit); + } + c3_d N = Na; + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(x_shape); // M*N + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); // M*N + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + // len_x is length in base units + c3_d len_y = _get_length(y_shape); // N*P + + // syz_x is length in bytes + c3_d syz_y = len_y * pow(2, bloq-3); // N*P + + // y_bytes is the data array (w/o leading 0x1) + c3_y* y_bytes = (c3_y*)u3a_malloc(syz_y*sizeof(c3_y)); + u3r_bytes(0, syz_y, y_bytes, y_data); + + // len_r is length in base units + c3_d len_r = M*P; // M*P + + // syz_r is length in bytes + c3_d syz_r = len_r * pow(2, bloq-3); // M*P + + // r_bytes is the result array + c3_y* r_bytes = (c3_y*)u3a_malloc((syz_r+1)*sizeof(c3_y)); + r_bytes[syz_r] = 0x1; // pin head + // initialize with 0x0s + for (c3_d i = 0; i < syz_r; i++) { + r_bytes[i] = 0x0; + } + + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: + hgemm('N', 'N', M, N, P, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, N, (float16_t*)y_bytes, P, (float16_t){SB_REAL16_ZERO}, (float16_t*)r_bytes, P); + break; + + case 5: + sgemm('N', 'N', M, N, P, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, N, (float32_t*)y_bytes, P, (float32_t){SB_REAL32_ZERO}, (float32_t*)r_bytes, P); + break; + + case 6: + dgemm('N', 'N', M, N, P, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, N, (float64_t*)y_bytes, P, (float64_t){SB_REAL64_ZERO}, (float64_t*)r_bytes, P); + break; + + case 7: + qgemm('N', 'N', M, N, P, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, N, (float128_t*)y_bytes, P, (float128_t){SB_REAL128L_ZERO,SB_REAL128U_ZERO}, (float128_t*)r_bytes, P); + break; + } + + // Unpack the result back into a noun. + u3_noun r_data = u3i_bytes(syz_r+1, r_bytes); + u3_noun M_ = u3i_chub(M); + u3_noun P_ = u3i_chub(P); + + u3a_free(x_bytes); + u3a_free(y_bytes); + u3a_free(r_bytes); + + return u3nc(u3nq(u3nt(M_, P_, u3_nul), u3k(bloq), c3__i754, u3_nul), r_data); + } + + u3_noun + u3wi_la_add(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, + y_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &y_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3r_sing(x_meta, y_meta) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) || + c3n == u3ud(rnd) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: + _set_rounding(rnd); + u3_noun r_data = u3qi_la_add_i754(x_data, y_data, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_sub(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, + y_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &y_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3r_sing(x_meta, y_meta) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) || + c3n == u3ud(rnd) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: + _set_rounding(rnd); + u3_noun r_data = u3qi_la_sub_i754(x_data, y_data, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_mul(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, + y_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &y_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3r_sing(x_meta, y_meta) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) || + c3n == u3ud(rnd) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: + _set_rounding(rnd); + u3_noun r_data = u3qi_la_mul_i754(x_data, y_data, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_div(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, + y_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &y_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3r_sing(x_meta, y_meta) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) || + c3n == u3ud(rnd) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: + _set_rounding(rnd); + u3_noun r_data = u3qi_la_div_i754(x_data, y_data, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_mod(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, + y_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &y_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3r_sing(x_meta, y_meta) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) || + c3n == u3ud(rnd) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: + _set_rounding(rnd); + u3_noun r_data = u3qi_la_mod_i754(x_data, y_data, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_cumsum(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_3, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) || + c3n == _check(u3nc(x_meta, x_data)) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: + _set_rounding(rnd); + u3_noun r_data = u3qi_la_cumsum_i754(x_data, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3nq(u3nc(0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_argmin(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_3, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) || + c3n == _check(u3nc(x_meta, x_data)) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: { + u3_noun r_data = u3qi_la_argmin_i754(x_data, x_shape, x_bloq); + // bare atom (@ index) + return r_data;} + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_ravel(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_3, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) || + c3n == _check(u3nc(x_meta, x_data)) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: { + u3_noun r_data = u3qi_la_ravel_i754(x_data, x_shape, x_bloq); + // (list @) + return r_data;} + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_argmax(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_3, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) || + c3n == _check(u3nc(x_meta, x_data)) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: { + u3_noun r_data = u3qi_la_argmax_i754(x_data, x_shape, x_bloq); + // bare atom (@ index) + return r_data;} + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_min(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_3, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) || + c3n == _check(u3nc(x_meta, x_data)) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: { + u3_noun r_data = u3qi_la_min_i754(x_data, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);} + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_max(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_3, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) || + c3n == _check(u3nc(x_meta, x_data)) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: { + u3_noun r_data = u3qi_la_max_i754(x_data, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);} + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_abs(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_3, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: { + u3_noun r_data = u3qi_la_abs_i754(x_data, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);} + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_gth(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, + y_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &y_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3r_sing(x_meta, y_meta) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: { + u3_noun r_data = u3qi_la_gth_i754(x_data, y_data, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3k(x_meta), r_data);} + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_gte(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, + y_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &y_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3r_sing(x_meta, y_meta) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: { + u3_noun r_data = u3qi_la_gte_i754(x_data, y_data, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3k(x_meta), r_data);} + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_lth(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, + y_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &y_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3r_sing(x_meta, y_meta) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: { + u3_noun r_data = u3qi_la_lth_i754(x_data, y_data, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3k(x_meta), r_data);} + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_lte(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, + y_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &y_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3r_sing(x_meta, y_meta) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: { + u3_noun r_data = u3qi_la_lte_i754(x_data, y_data, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3k(x_meta), r_data);} + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_adds(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, n; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_3, &n, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(n) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + switch (x_kind) { + case c3__i754: + _set_rounding(rnd); + u3_noun r_data = u3qi_la_adds_i754(x_data, n, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); + + default: + return u3_none; + } + } + } + + u3_noun + u3wi_la_subs(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, n; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_3, &n, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(n) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + switch (x_kind) { + case c3__i754: + _set_rounding(rnd); + u3_noun r_data = u3qi_la_subs_i754(x_data, n, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); + + default: + return u3_none; + } + } + } + + u3_noun + u3wi_la_muls(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, n; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_3, &n, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(n) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + switch (x_kind) { + case c3__i754: + _set_rounding(rnd); + u3_noun r_data = u3qi_la_muls_i754(x_data, n, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); + + default: + return u3_none; + } + } + } + + u3_noun + u3wi_la_divs(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, n; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_3, &n, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(n) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + switch (x_kind) { + case c3__i754: + _set_rounding(rnd); + u3_noun r_data = u3qi_la_divs_i754(x_data, n, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); + + default: + return u3_none; + } + } + } + + u3_noun + u3wi_la_mods(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, n; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_3, &n, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(n) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + switch (x_kind) { + case c3__i754: + _set_rounding(rnd); + u3_noun r_data = u3qi_la_mods_i754(x_data, n, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); + + default: + return u3_none; + } + } + } + + u3_noun + u3wi_la_dot(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, + y_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &y_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3r_sing(x_meta, y_meta) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: + _set_rounding(rnd); + u3_noun r_data = u3qi_la_dot_i754(x_data, y_data, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + c3_d len_x0 = _get_dims(x_shape)[0]; + return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_transpose(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_3, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) || + c3n == _check(cor) + ) + { + return u3m_bail(c3__exit); + } else { + u3_noun r_data = u3qi_la_transpose(x_data, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3nq(u3nt(u3k(u3h(x_shape)), u3k(u3h(u3t(x_shape))), u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); + } + } + } + + u3_noun + u3wi_la_linspace(u3_noun cor) + { + u3_noun x_meta, a, b, n, rnd; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_12, &a, + u3x_sam_13, &b, + u3x_sam_7, &n, + 0)) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) || + c3n == u3ud(n) || + (n < 1) // crash on zero size + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: + _set_rounding(rnd); + u3_noun r_data = u3qi_la_linspace_i754(a, b, n, x_bloq); + if (r_data == u3_none) { return u3_none; } + x_shape = u3nc(u3x_atom(n), u3_nul); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_range(u3_noun cor) + { + u3_noun x_meta, a, b, d, rnd; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_12, &a, + u3x_sam_13, &b, + u3x_sam_7, &d, + 0)) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: + _set_rounding(rnd); + u3_noun r_data = u3qi_la_range_i754(a, b, d, x_bloq); + if (r_data == u3_none) { return u3_none; } + c3_d a_, b_, d_; + c3_ds n_; + switch (x_bloq) { + case 4: + u3r_bytes(0, 2, (c3_y*)&a_, a); + u3r_bytes(0, 2, (c3_y*)&b_, b); + u3r_bytes(0, 2, (c3_y*)&d_, d); + n_ = f16_to_i64(f16_ceil(f16_div(f16_sub((float16_t){b_}, (float16_t){a_}), (float16_t){d_})), softfloat_round_minMag, false) - 1; + break; + case 5: + u3r_bytes(0, 4, (c3_y*)&a_, a); + u3r_bytes(0, 4, (c3_y*)&b_, b); + u3r_bytes(0, 4, (c3_y*)&d_, d); + n_ = f32_to_i64(f32_ceil(f32_div(f32_sub((float32_t){b_}, (float32_t){a_}), (float32_t){d_})), softfloat_round_minMag, false) - 1; + break; + case 6: + u3r_bytes(0, 8, (c3_y*)&a_, a); + u3r_bytes(0, 8, (c3_y*)&b_, b); + u3r_bytes(0, 8, (c3_y*)&d_, d); + n_ = f64_to_i64(f64_ceil(f64_div(f64_sub((float64_t){b_}, (float64_t){a_}), (float64_t){d_})), softfloat_round_minMag, false) - 1; + break; + case 7: { + c3_d a__[2], b__[2], d__[2]; + u3r_bytes(0, 16, (c3_y*)&a__, a); + u3r_bytes(0, 16, (c3_y*)&b__, b); + u3r_bytes(0, 16, (c3_y*)&d__, d); + float128_t tmp; + f128M_sub((float128_t*)&b__, (float128_t*)&a__, &tmp); + f128M_div(&tmp, (float128_t*)&d__, &tmp); + f128M_ceil(&tmp, &tmp); + n_ = f128M_to_i64(&tmp, softfloat_round_minMag, false) - 1; + break;} + } + u3_noun n = u3i_chub(n_+1); + x_shape = u3nc(u3k(n), u3_nul); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_diag(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_3, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) || + c3n == _check(cor) + ) + { + return u3m_bail(c3__exit); + } else { + u3_noun r_data = u3qi_la_diag(x_data, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + c3_d len_x0 = _get_dims(x_shape)[0]; + return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); + } + } + } + + u3_noun + u3wi_la_trace(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_3, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail; + if ( c3n == u3r_mean(x_meta, + 2, &x_shape, + 6, &x_bloq, + 14, &x_kind, + 15, &x_tail, + 0) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: { + u3_noun r_data = u3qi_la_trace_i754(x_data, x_shape, x_bloq); + if (r_data == u3_none) { return u3_none; } + return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);} + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wi_la_mmul(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, + y_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &y_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_tail, + y_shape, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_tail = u3t(u3t(u3t(x_meta))); // 15 + y_shape = u3h(y_meta); // 2 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == _check(u3nc(x_meta, x_data)) || + c3n == _check(u3nc(y_meta, y_data)) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__i754: + _set_rounding(rnd); + u3_noun r_data = u3qi_la_mmul_i754(x_data, y_data, x_shape, y_shape, x_bloq); + // result is already [meta data] + return r_data; + + default: + return u3_none; + } + } + } + } diff --git a/pkg/noun/jets/q.h b/pkg/noun/jets/q.h index b4fe7cd73e..63b6bdd523 100644 --- a/pkg/noun/jets/q.h +++ b/pkg/noun/jets/q.h @@ -246,6 +246,35 @@ u3_noun u3qfp_nepo(u3_noun, u3_noun); u3_noun u3qfp_rake(u3_noun); + u3_noun u3qi_la_add_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_sub_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_mul_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_div_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_mod_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_adds_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_subs_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_muls_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_divs_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_mods_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_dot_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_diag(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_transpose(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_cumsum_i754(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_argmin_i754(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_argmax_i754(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_ravel_i754(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_min_i754(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_max_i754(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_linspace_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_range_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_abs_i754(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_gth_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_gte_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_lth_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_lte_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_trace_i754(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_mmul_i754(u3_noun, u3_noun, u3_noun, u3_noun, u3_noun); + # define u3qfu_van_fan 28 # define u3qfu_van_rib 58 # define u3qfu_van_vet 59 diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index bf06268b5b..9a6a8460c0 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -270,6 +270,12 @@ static c3_c* _140_hex_lune_ha[] = { 0 }; + +static u3j_harm _137_hex__crc32_a[] = {{".2", u3we_crc32}, {}}; + +static u3j_core _137_hex__crc_d[] = {{"crc32", 7, _137_hex__crc32_a, 0, no_hashes }, {}}; + + static u3j_harm _140_hex_coed__ed_puck_a[] = {{".2", u3wee_puck}, {}}; static c3_c* _140_hex_coed__ed_puck_ha[] = { "1bc694675842345c50b0e20a2193bb5bcbb42f163fc832431a3d1822a81e4c98", @@ -2120,8 +2126,77 @@ static u3j_core _139_hex_json_d[] = {} }; +/* /lib jets in non core +*/ +static u3j_harm _139_non__lagoon_add_a[] = {{".2", u3wi_la_add}, {}}; +static u3j_harm _139_non__lagoon_sub_a[] = {{".2", u3wi_la_sub}, {}}; +static u3j_harm _139_non__lagoon_mul_a[] = {{".2", u3wi_la_mul}, {}}; +static u3j_harm _139_non__lagoon_div_a[] = {{".2", u3wi_la_div}, {}}; +static u3j_harm _139_non__lagoon_mod_a[] = {{".2", u3wi_la_mod}, {}}; +static u3j_harm _139_non__lagoon_adds_a[] = {{".2", u3wi_la_adds}, {}}; +static u3j_harm _139_non__lagoon_subs_a[] = {{".2", u3wi_la_subs}, {}}; +static u3j_harm _139_non__lagoon_muls_a[] = {{".2", u3wi_la_muls}, {}}; +static u3j_harm _139_non__lagoon_divs_a[] = {{".2", u3wi_la_divs}, {}}; +static u3j_harm _139_non__lagoon_mods_a[] = {{".2", u3wi_la_mods}, {}}; +static u3j_harm _139_non__lagoon_dot_a[] = {{".2", u3wi_la_dot}, {}}; +static u3j_harm _139_non__lagoon_trans_a[] ={{".2", u3wi_la_transpose}, {}}; +static u3j_harm _139_non__lagoon_cumsum_a[]={{".2", u3wi_la_cumsum}, {}}; +static u3j_harm _139_non__lagoon_argmin_a[]={{".2", u3wi_la_argmin}, {}}; +static u3j_harm _139_non__lagoon_argmax_a[]={{".2", u3wi_la_argmax}, {}}; +static u3j_harm _139_non__lagoon_ravel_a[]={{".2", u3wi_la_ravel}, {}}; +static u3j_harm _139_non__lagoon_min_a[] = {{".2", u3wi_la_min}, {}}; +static u3j_harm _139_non__lagoon_max_a[] = {{".2", u3wi_la_max}, {}}; +static u3j_harm _139_non__lagoon_linspace_a[]={{".2", u3wi_la_linspace}, {}}; +static u3j_harm _139_non__lagoon_range_a[]= {{".2", u3wi_la_range}, {}}; +static u3j_harm _139_non__lagoon_abs_a[] = {{".2", u3wi_la_abs}, {}}; +static u3j_harm _139_non__lagoon_gth_a[] = {{".2", u3wi_la_gth}, {}}; +static u3j_harm _139_non__lagoon_gte_a[] = {{".2", u3wi_la_gte}, {}}; +static u3j_harm _139_non__lagoon_lth_a[] = {{".2", u3wi_la_lth}, {}}; +static u3j_harm _139_non__lagoon_lte_a[] = {{".2", u3wi_la_lte}, {}}; +static u3j_harm _139_non__lagoon_diag_a[] = {{".2", u3wi_la_diag}, {}}; +static u3j_harm _139_non__lagoon_trace_a[]= {{".2", u3wi_la_trace}, {}}; +static u3j_harm _139_non__lagoon_mmul_a[] = {{".2", u3wi_la_mmul}, {}}; +static u3j_core _139_non__la_core_d[] = + { { "add-rays", 7, _139_non__lagoon_add_a, 0, no_hashes }, + { "sub-rays", 7, _139_non__lagoon_sub_a, 0, no_hashes }, + { "mul-rays", 7, _139_non__lagoon_mul_a, 0, no_hashes }, + { "div-rays", 7, _139_non__lagoon_div_a, 0, no_hashes }, + { "mod-rays", 7, _139_non__lagoon_mod_a, 0, no_hashes }, + { "add-scal", 7, _139_non__lagoon_adds_a, 0, no_hashes }, + { "sub-scal", 7, _139_non__lagoon_subs_a, 0, no_hashes }, + { "mul-scal", 7, _139_non__lagoon_muls_a, 0, no_hashes }, + { "div-scal", 7, _139_non__lagoon_divs_a, 0, no_hashes }, + { "mod-scal", 7, _139_non__lagoon_mods_a, 0, no_hashes }, + { "dot", 7, _139_non__lagoon_dot_a, 0, no_hashes }, + { "transpose",7, _139_non__lagoon_trans_a, 0, no_hashes }, + { "cumsum", 7, _139_non__lagoon_cumsum_a, 0, no_hashes }, + { "argmin", 7, _139_non__lagoon_argmin_a, 0, no_hashes }, + { "argmax", 7, _139_non__lagoon_argmax_a, 0, no_hashes }, + { "ravel", 7, _139_non__lagoon_ravel_a, 0, no_hashes }, + { "min", 7, _139_non__lagoon_min_a, 0, no_hashes }, + { "max", 7, _139_non__lagoon_max_a, 0, no_hashes }, + { "linspace", 7, _139_non__lagoon_linspace_a, 0, no_hashes }, + { "range", 7, _139_non__lagoon_range_a, 0, no_hashes }, + { "abs", 7, _139_non__lagoon_abs_a, 0, no_hashes }, + { "gth", 7, _139_non__lagoon_gth_a, 0, no_hashes }, + { "gte", 7, _139_non__lagoon_gte_a, 0, no_hashes }, + { "lth", 7, _139_non__lagoon_lth_a, 0, no_hashes }, + { "lte", 7, _139_non__lagoon_lte_a, 0, no_hashes }, + { "diag", 7, _139_non__lagoon_diag_a, 0, no_hashes }, + { "trace", 7, _139_non__lagoon_trace_a,0, no_hashes }, + { "mmul", 7, _139_non__lagoon_mmul_a, 0, no_hashes }, + {} + }; + +static u3j_core _139_non_d[] = + { { "lagoon", 7, 0, _139_non__la_core_d, no_hashes }, + {} + }; + static u3j_core _139_hex_d[] = -{ { "lore", 63, _140_hex_lore_a, 0, no_hashes }, +{ { "non", 7, 0, _139_non_d, no_hashes }, + + { "lore", 63, _140_hex_lore_a, 0, no_hashes }, { "leer", 63, _140_hex_leer_a, 0, no_hashes }, { "loss", 63, _140_hex_loss_a, 0, no_hashes }, { "lune", 127, _140_hex_lune_a, 0, no_hashes }, @@ -2138,6 +2213,7 @@ static u3j_core _139_hex_d[] = { "secp", 6, 0, _140_hex_secp_d, no_hashes }, { "mimes", 31, 0, _140_hex_mimes_d, no_hashes }, { "json", 31, 0, _139_hex_json_d, no_hashes }, + {} }; @@ -2346,6 +2422,8 @@ static u3j_core _138_hex_d[] = { "leer", 63, _140_hex_leer_a, 0, no_hashes }, { "loss", 63, _140_hex_loss_a, 0, no_hashes }, { "lune", 127, _140_hex_lune_a, 0, no_hashes }, +// XX move me to 137 when it exists + { "crc", 31, 0, _137_hex__crc_d, no_hashes }, { "coed", 63, 0, _140_hex_coed_d, no_hashes }, { "aes", 31, 0, _140_hex_aes_d, no_hashes }, @@ -2624,6 +2702,7 @@ static u3j_core _d[] = { { "k140", 0, 0, _k140_d, _k140_ha, 0, (u3j_core*) 140, 0 }, { "k139", 0, 0, _k139_d, no_hashes, 0, (u3j_core*) 139, 0 }, { "k138", 0, 0, _k138_d, no_hashes, 0, (u3j_core*) 138, 0 }, + { "k137", 0, 0, _k138_d, no_hashes, 0, (u3j_core*) 137, 0 }, { "a50", 0, 0, _a50_d, _k140_ha, 0, (u3j_core*) c3__a50, 0 }, {} }; diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index 275740cd97..ec8daa32ed 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -292,6 +292,8 @@ u3_noun u3wes_gte(u3_noun); u3_noun u3wes_gth(u3_noun); + u3_noun u3we_crc32(u3_noun); + /** Tier 6. **/ u3_noun u3wf_bull(u3_noun); @@ -336,5 +338,35 @@ u3_noun u3wfu_repo(u3_noun); u3_noun u3wfu_rest(u3_noun); + u3_noun u3wi_la_add(u3_noun); + u3_noun u3wi_la_sub(u3_noun); + u3_noun u3wi_la_mul(u3_noun); + u3_noun u3wi_la_div(u3_noun); + u3_noun u3wi_la_mod(u3_noun); + u3_noun u3wi_la_adds(u3_noun); + u3_noun u3wi_la_subs(u3_noun); + u3_noun u3wi_la_muls(u3_noun); + u3_noun u3wi_la_divs(u3_noun); + u3_noun u3wi_la_mods(u3_noun); + u3_noun u3wi_la_dot(u3_noun); + u3_noun u3wi_la_diag(u3_noun); + u3_noun u3wi_la_transpose(u3_noun); + u3_noun u3wi_la_cumsum(u3_noun); + u3_noun u3wi_la_argmin(u3_noun); + u3_noun u3wi_la_argmax(u3_noun); + u3_noun u3wi_la_ravel(u3_noun); + u3_noun u3wi_la_min(u3_noun); + u3_noun u3wi_la_max(u3_noun); + u3_noun u3wi_la_linspace(u3_noun); + u3_noun u3wi_la_range(u3_noun); + u3_noun u3wi_la_abs(u3_noun); + u3_noun u3wi_la_gth(u3_noun); + u3_noun u3wi_la_gte(u3_noun); + u3_noun u3wi_la_lth(u3_noun); + u3_noun u3wi_la_lte(u3_noun); + + u3_noun u3wi_la_trace(u3_noun); + u3_noun u3wi_la_mmul(u3_noun); + #endif /* ifndef U3_JETS_W_H */ diff --git a/pkg/noun/manage.c b/pkg/noun/manage.c index cb6c901d27..c218b5311e 100644 --- a/pkg/noun/manage.c +++ b/pkg/noun/manage.c @@ -469,15 +469,17 @@ u3m_file(c3_c* pas_c) /* u3m_mark(): mark all nouns in the road. */ -c3_w -u3m_mark(FILE* fil_u) +u3m_quac** +u3m_mark(void) { - c3_w tot_w = 0; - tot_w += u3v_mark(fil_u); - tot_w += u3j_mark(fil_u); - tot_w += u3n_mark(fil_u); - tot_w += u3a_mark_road(fil_u); - return tot_w; + u3m_quac** qua_u = c3_malloc(sizeof(*qua_u) * 5); + qua_u[0] = u3v_mark(); + qua_u[1] = u3j_mark(); + qua_u[2] = u3n_mark(); + qua_u[3] = u3a_mark_road(); + qua_u[4] = NULL; + + return qua_u; } /* _pave_parts(): build internal tables. @@ -1536,7 +1538,7 @@ u3m_grab(u3_noun som, ...) // terminate with u3_none // u3h_free(u3R->cax.har_p); // u3R->cax.har_p = u3h_new(); - u3m_mark(0); + u3m_mark(); { va_list vap; u3_noun tur; diff --git a/pkg/noun/manage.h b/pkg/noun/manage.h index 14f9cc503d..974c5948f6 100644 --- a/pkg/noun/manage.h +++ b/pkg/noun/manage.h @@ -148,10 +148,19 @@ u3_noun u3m_soft_esc(u3_noun ref, u3_noun sam); + + /* u3m_quac: memory report. + */ + typedef struct _u3m_quac { + c3_c* nam_c; + c3_w siz_w; + struct _u3m_quac** qua_u; + } u3m_quac; + /* u3m_mark(): mark all nouns in the road. */ - c3_w - u3m_mark(FILE* fil_u); + u3m_quac** + u3m_mark(); /* u3m_grab(): garbage-collect the world, plus extra roots. */ diff --git a/pkg/noun/nock.c b/pkg/noun/nock.c index eb459d17f7..f2a6f62bb1 100644 --- a/pkg/noun/nock.c +++ b/pkg/noun/nock.c @@ -3047,16 +3047,30 @@ _n_bam(u3_noun kev, void* dat) /* u3n_mark(): mark the bytecode cache for gc. */ -c3_w -u3n_mark(FILE* fil_u) +u3m_quac* +u3n_mark() { - c3_w bam_w = 0, har_w = 0; + u3m_quac** qua_u = c3_malloc(sizeof(*qua_u) * 3); + + qua_u[0] = c3_calloc(sizeof(*qua_u[0])); + qua_u[0]->nam_c = strdup("bytecode programs"); + u3p(u3h_root) har_p = u3R->byc.har_p; - u3h_walk_with(har_p, _n_bam, &bam_w); + u3h_walk_with(har_p, _n_bam, &qua_u[0]->siz_w); + qua_u[0]->siz_w = qua_u[0]->siz_w * 4; + + qua_u[1] = c3_calloc(sizeof(*qua_u[1])); + qua_u[1]->nam_c = strdup("bytecode cache"); + qua_u[1]->siz_w = u3h_mark(har_p) * 4; + + qua_u[2] = NULL; + + u3m_quac* tot_u = c3_malloc(sizeof(*tot_u)); + tot_u->nam_c = strdup("total nock stuff"); + tot_u->siz_w = qua_u[0]->siz_w + qua_u[1]->siz_w; + tot_u->qua_u = qua_u; - bam_w = u3a_maid(fil_u, " bytecode programs", bam_w); - har_w = u3a_maid(fil_u, " bytecode cache", u3h_mark(har_p)); - return u3a_maid(fil_u, "total nock stuff", bam_w + har_w); + return tot_u; } /* u3n_reclaim(): clear ad-hoc persistent caches to reclaim memory. diff --git a/pkg/noun/nock.h b/pkg/noun/nock.h index 7baf7351d8..266438119d 100644 --- a/pkg/noun/nock.h +++ b/pkg/noun/nock.h @@ -123,8 +123,8 @@ /* u3n_mark(): mark bytecode cache. */ - c3_w - u3n_mark(FILE* fil_u); + u3m_quac* + u3n_mark(); /* u3n_reclaim(): clear ad-hoc persistent caches to reclaim memory. */ diff --git a/pkg/noun/vortex.c b/pkg/noun/vortex.c index 0bb9af8c29..287be49e8c 100644 --- a/pkg/noun/vortex.c +++ b/pkg/noun/vortex.c @@ -392,16 +392,33 @@ u3v_sway(u3_noun blu, c3_l tab_l, u3_noun tax) /* u3v_mark(): mark arvo kernel. */ -c3_w -u3v_mark(FILE* fil_u) +u3m_quac* +u3v_mark() { u3v_arvo* arv_u = &(u3H->arv_u); - c3_w tot_w = 0; - tot_w += u3a_maid(fil_u, " kernel", u3a_mark_noun(arv_u->roc)); - tot_w += u3a_maid(fil_u, " date", u3a_mark_noun(arv_u->now)); - tot_w += u3a_maid(fil_u, " wish cache", u3a_mark_noun(arv_u->yot)); - return u3a_maid(fil_u, "total arvo stuff", tot_w); + u3m_quac** qua_u = c3_malloc(sizeof(*qua_u) * 4); + + qua_u[0] = c3_calloc(sizeof(*qua_u[0])); + qua_u[0]->nam_c = strdup("kernel"); + qua_u[0]->siz_w = u3a_mark_noun(arv_u->roc) * 4; + + qua_u[1] = c3_calloc(sizeof(*qua_u[1])); + qua_u[1]->nam_c = strdup("date"); + qua_u[1]->siz_w = u3a_mark_noun(arv_u->now) * 4; + + qua_u[2] = c3_calloc(sizeof(*qua_u[2])); + qua_u[2]->nam_c = strdup("wish cache"); + qua_u[2]->siz_w = u3a_mark_noun(arv_u->yot) * 4; + + qua_u[3] = NULL; + + u3m_quac* tot_u = c3_malloc(sizeof(*tot_u)); + tot_u->nam_c = strdup("total arvo stuff"); + tot_u->siz_w = qua_u[0]->siz_w + qua_u[1]->siz_w + qua_u[2]->siz_w; + tot_u->qua_u = qua_u; + + return tot_u; } /* u3v_reclaim(): clear ad-hoc persistent caches to reclaim memory. @@ -434,4 +451,3 @@ u3v_rewrite_compact() arv_u->now = u3a_rewritten_noun(arv_u->now); arv_u->yot = u3a_rewritten_noun(arv_u->yot); } - diff --git a/pkg/noun/vortex.h b/pkg/noun/vortex.h index 2d202e831c..e2377fb9be 100644 --- a/pkg/noun/vortex.h +++ b/pkg/noun/vortex.h @@ -130,8 +130,8 @@ /* u3v_mark(): mark arvo kernel. */ - c3_w - u3v_mark(FILE* fil_u); + u3m_quac* + u3v_mark(); /* u3v_reclaim(): clear ad-hoc persistent caches to reclaim memory. */ diff --git a/pkg/vere/dawn.c b/pkg/vere/dawn.c index ba474d10d4..37758e553e 100644 --- a/pkg/vere/dawn.c +++ b/pkg/vere/dawn.c @@ -41,24 +41,6 @@ _dawn_buf_to_oct(uv_buf_t buf_u) return u3nc(len, u3i_bytes(buf_u.len, (const c3_y*)buf_u.base)); } - -/* _dawn_curl_alloc(): allocate a response buffer for curl -*/ -static size_t -_dawn_curl_alloc(void* dat_v, size_t uni_t, size_t mem_t, void* buf_v) -{ - uv_buf_t* buf_u = buf_v; - - size_t siz_t = uni_t * mem_t; - buf_u->base = c3_realloc(buf_u->base, 1 + siz_t + buf_u->len); - - memcpy(buf_u->base + buf_u->len, dat_v, siz_t); - buf_u->len += siz_t; - buf_u->base[buf_u->len] = 0; - - return siz_t; -} - /* _dawn_post_json(): POST JSON to url_c */ static uv_buf_t @@ -84,7 +66,7 @@ _dawn_post_json(c3_c* url_c, uv_buf_t lod_u) // u3K.ssl_curl_f(curl); curl_easy_setopt(curl, CURLOPT_URL, url_c); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, _dawn_curl_alloc); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, king_curl_alloc); curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void*)&buf_u); curl_easy_setopt(curl, CURLOPT_HTTPHEADER, hed_u); @@ -131,7 +113,7 @@ _dawn_get_jam(c3_c* url_c) // u3K.ssl_curl_f(curl); curl_easy_setopt(curl, CURLOPT_URL, url_c); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, _dawn_curl_alloc); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, king_curl_alloc); curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void*)&buf_u); result = curl_easy_perform(curl); diff --git a/pkg/vere/io/http.c b/pkg/vere/io/http.c index 63812a4792..094c813af6 100644 --- a/pkg/vere/io/http.c +++ b/pkg/vere/io/http.c @@ -44,7 +44,8 @@ typedef struct _u3_h2o_serv { typedef struct _u3_preq { struct _u3_hreq* req_u; // originating request (nullable) struct _u3_httd* htd_u; // device backpointer - u3_noun pax; // partial scry path + u3_noun pax; // partial scry path + c3_o las_o; // was scry at now } u3_preq; /* u3_hcon: incoming http connection. @@ -638,7 +639,235 @@ _http_seq_new(u3_hcon* hon_u, h2o_req_t* rec_u) return req_u; } -/* _http_req_dispatch(): dispatch http request to %eyre +static void +_http_cache_respond(u3_hreq* req_u, u3_noun nun); + +static void +_http_scry_respond(u3_hreq* req_u, u3_noun nun); + +typedef struct _byte_range { + c3_z beg_z; + c3_z end_z; +} byte_range; + +/* _chunk_align(): align range to a nearby chunk +*/ +static void +_chunk_align(byte_range* rng_u) +{ + c3_z siz_z = 4194304; // 4MiB + + if ( SIZE_MAX != rng_u->beg_z ) { + if ( rng_u->beg_z > rng_u->end_z ) { + rng_u->beg_z = SIZE_MAX; + rng_u->end_z = SIZE_MAX; + } + else { + // XX an out-of-bounds request could be aligned to in-bounds + // resulting in a 200 or 206 response instead of 416. + // browsers should have the total length from content-range, + // and send reasonable range requests. + // + rng_u->beg_z = (rng_u->beg_z / siz_z) * siz_z; + rng_u->end_z = (rng_u->beg_z + siz_z) - 1; + } + } + else if ( SIZE_MAX != rng_u->end_z ) { + // round up to multiple of siz_z + rng_u->end_z = siz_z * ((rng_u->end_z / siz_z) + 1); + } +} + +/* _parse_range(): get a range from '-' delimited text +*/ +static byte_range +_parse_range(c3_c* txt_c, c3_w len_w) +{ + c3_c* hep_c = memchr(txt_c, '-', len_w); + byte_range rng_u; + rng_u.beg_z = SIZE_MAX; + rng_u.end_z = SIZE_MAX; + + if ( hep_c ) { + rng_u.beg_z = h2o_strtosize(txt_c, hep_c - txt_c); + rng_u.end_z = h2o_strtosize(hep_c + 1, len_w - ((hep_c + 1) - txt_c)); + // strange -> [SIZE_MAX SIZE_MAX] + if ( ((SIZE_MAX == rng_u.beg_z) && (hep_c != txt_c)) + || ((SIZE_MAX == rng_u.end_z) && (len_w - ((hep_c + 1) - txt_c) > 0)) + || ((SIZE_MAX != rng_u.beg_z) && (rng_u.beg_z > rng_u.end_z)) ) + { + rng_u.beg_z = SIZE_MAX; + rng_u.end_z = SIZE_MAX; + } + } + return rng_u; +} + +/* _get_range(): get a _byte_range from headers +*/ +static c3_o +_get_range(h2o_headers_t req_headers, byte_range* rng_u) +{ + rng_u->beg_z = SIZE_MAX; + rng_u->end_z = SIZE_MAX; + + c3_w inx_w = h2o_find_header(&req_headers, H2O_TOKEN_RANGE, -1); + if ( UINT32_MAX == inx_w) { + return c3n; + } + + if ( (req_headers.entries[inx_w].value.len >= 6) + && (0 == memcmp("bytes=", req_headers.entries[inx_w].value.base, 6)) ) + { + byte_range tmp_u = _parse_range(req_headers.entries[inx_w].value.base + 6, + req_headers.entries[inx_w].value.len - 6); + rng_u->beg_z = tmp_u.beg_z; + rng_u->end_z = tmp_u.end_z; + } + + return c3y; +} + +/* _http_scry_cb(): respond and maybe cache scry result +*/ +static void +_http_scry_cb(void* vod_p, u3_noun nun) +{ + u3_preq* peq_u = vod_p; + u3_httd* htd_u = peq_u->htd_u; + u3_hreq* req_u = peq_u->req_u; + u3_hfig* fig_u = &req_u->hon_u->htp_u->htd_u->fig_u; + c3_o auth = _http_req_is_auth(fig_u, req_u->rec_u); + + if ( req_u ) { + u3_assert(u3_rsat_peek == req_u->sat_e); + req_u->peq_u = 0; + _http_scry_respond(req_u, u3k(nun)); + } + + // cache only if peek was not at now, and nun isn't u3_nul + if ( (c3n == peq_u->las_o) + && (u3_nul != nun) ) + { + u3_noun key = u3nc(auth, u3k(peq_u->pax)); + u3h_put(htd_u->nax_p, key, nun); + u3z(key); + } + else { + u3z(nun); + } + + u3z(peq_u->pax); + c3_free(peq_u); +} + +/* _beam: ship desk case spur +*/ +typedef struct _beam { + u3_weak who; + u3_weak des; + u3_weak cas; + u3_weak pur; +} beam; + +/* _free_beam(): free a beam +*/ +static void +_free_beam(beam* bem) +{ + u3z(bem->who); + u3z(bem->des); + u3z(bem->cas); + u3z(bem->pur); +} + +/* _get_beam(): get a _beam from url +*/ +static beam +_get_beam(u3_hreq* req_u, c3_c* txt_c, c3_w len_w) +{ + beam bem; + + // get beak + // + for ( c3_w i_w = 0; i_w < 3; ++i_w ) { + u3_noun* wer; + if ( 0 == i_w ) { + wer = &bem.who; + } + else if ( 1 == i_w ) { + wer = &bem.des; + } + else { + wer = &bem.cas; + } + + // find '//' + if ( (len_w >= 2) + && ('/' == txt_c[0]) + && ('/' == txt_c[1]) ) + { + *wer = u3_nul; + txt_c++; + len_w--; + } + // skip '/' + else if ( (len_w > 0) && ('/' == txt_c[0]) ) { + txt_c++; + len_w--; + } + + // '=' + if ( (len_w > 0) && ('=' == txt_c[0]) ) { + if ( 0 == i_w ) { + u3_http* htp_u = req_u->hon_u->htp_u; + u3_httd* htd_u = htp_u->htd_u; + *wer = u3dc("scot", 'p', u3i_chubs(2, htd_u->car_u.pir_u->who_d)); + } + else if ( 1 == i_w ) { + *wer = c3__base; + } + else { + req_u->peq_u->las_o = c3y; + } + txt_c++; + len_w--; + } + // slice cord + else { + c3_c* nex_c; + c3_c* tis_c = memchr(txt_c, '=', len_w); + c3_c* fas_c = memchr(txt_c, '/', len_w); + + if ( tis_c && fas_c ) { + nex_c = c3_min(tis_c, fas_c); + } + else { + nex_c = ( tis_c ) ? tis_c : fas_c; + } + + if ( !nex_c ) { + *wer = u3_none; + return bem; + } + else { + c3_w dif_w = (c3_p)(nex_c - txt_c); + *wer = u3i_bytes(dif_w, (const c3_y*)txt_c); + txt_c = nex_c; + len_w = len_w - dif_w; + } + } + } + + // get spur + u3_noun tmp = u3dc("rush", u3i_bytes(len_w, (const c3_y*)txt_c), u3v_wish("stap")); + bem.pur = ( u3_nul == tmp ) ? u3_none : u3k(u3t(tmp)); + u3z(tmp); + + return bem; +} + +/* _http_req_dispatch(): dispatch http request */ static void _http_req_dispatch(u3_hreq* req_u, u3_noun req) @@ -649,28 +878,137 @@ _http_req_dispatch(u3_hreq* req_u, u3_noun req) { u3_http* htp_u = req_u->hon_u->htp_u; u3_httd* htd_u = htp_u->htd_u; - u3_noun wir = _http_req_to_duct(req_u); - u3_noun cad; + c3_c* bas_c = req_u->rec_u->input.path.base; + c3_w len_w = req_u->rec_u->input.path.len; + + // check if base url starts with '/_~_/' + if ( (len_w < 6) + || (0 != memcmp("/_~_/", bas_c, 5)) ) { + // no: inject to arvo + u3_noun wir = _http_req_to_duct(req_u); + u3_noun cad; u3_noun adr = u3nc(c3__ipv4, u3i_words(1, &req_u->hon_u->ipf_w)); // XX loopback automatically secure too? - // + // u3_noun dat = u3nt(htp_u->sec, adr, req); cad = ( c3y == req_u->hon_u->htp_u->lop ) ? u3nc(u3i_string("request-local"), dat) : u3nc(u3i_string("request"), dat); + u3_auto_plan(&htd_u->car_u, u3_ovum_init(0, c3__e, wir, cad)); } + else { + // '/_~_/' found + bas_c = bas_c + 4; // retain '/' after /_~_ + len_w = len_w - 4; + + req_u->peq_u = c3_malloc(sizeof(*req_u->peq_u)); + req_u->peq_u->req_u = req_u; + req_u->peq_u->htd_u = htd_u; + req_u->peq_u->las_o = c3n; + req_u->sat_e = u3_rsat_peek; + req_u->peq_u->pax = u3_nul; + + u3_hfig* fig_u = &req_u->hon_u->htp_u->htd_u->fig_u; + h2o_req_t* rec_u = req_u->rec_u; + + // set gang to [~ ~] or ~ + u3_noun gang; + c3_o auth = _http_req_is_auth(fig_u, rec_u); + if ( auth == c3y ) { + gang = u3nc(u3_nul, u3_nul); + } + else { + gang = u3_nul; + } - u3_auto_plan(&htd_u->car_u, u3_ovum_init(0, c3__e, wir, cad)); + beam bem = _get_beam(req_u, bas_c, len_w); + if ( (u3_none == bem.who) + || (u3_none == bem.des) + || (u3_none == bem.cas) + || (u3_none == bem.pur) ) + { + c3_c* msg_c = "bad request"; + h2o_send_error_generic(req_u->rec_u, 400, msg_c, msg_c, 0); + u3z(gang); + u3z(req_u->peq_u->pax); + _free_beam(&bem); + return; + } + + h2o_headers_t req_headers = req_u->rec_u->headers; + byte_range rng_u; + c3_o rng_o = _get_range(req_headers, &rng_u); + + // prepare spur for eyre range scry + // + u3_noun spur; + if ( c3n == rng_o ) { + // full range: '/range/0//foo' + spur = u3nq(u3i_string("range"), c3_s1('0'), u3_blip, u3k(bem.pur)); + } + else { + _chunk_align(&rng_u); + + u3_atom beg = ( SIZE_MAX == rng_u.beg_z) ? + u3_blip : u3dc("scot", c3__ud, u3i_chub(rng_u.beg_z)); + u3_atom end = ( SIZE_MAX == rng_u.end_z) ? + u3_blip : u3dc("scot", c3__ud, u3i_chub(rng_u.end_z)); + + spur = u3nq(u3i_string("range"), beg, end, u3k(bem.pur)); + } + + // peek or respond from cache + // + if ( c3y == req_u->peq_u->las_o ) { + u3_noun our = u3dc("scot", 'p', u3i_chubs(2, htd_u->car_u.pir_u->who_d)); + if ( our == bem.who ) { + u3_pier_peek_last(htd_u->car_u.pir_u, gang, c3__ex, + u3k(bem.des), spur, req_u->peq_u, _http_scry_cb); + } + else { + c3_c* msg_c = "bad request"; + h2o_send_error_generic(req_u->rec_u, 400, msg_c, msg_c, 0); + u3z(gang); + u3z(spur); + u3z(req_u->peq_u->pax); + } + u3z(our); + } + else { + u3_noun bam = u3nq(u3k(bem.who), u3k(bem.des), u3k(bem.cas), spur); + u3_noun key = u3nc(auth, u3k(bam)); + u3_weak nac = u3h_get(htd_u->nax_p, key); + u3z(key); + + if ( (u3_none == nac) + || ((u3_nul == gang) && (c3y == u3r_at(14, nac))) ) + { + // maybe cache, then serve subsequent range requests from cache + u3z(req_u->peq_u->pax); + req_u->peq_u->pax = u3k(bam); + u3_pier_peek(htd_u->car_u.pir_u, gang, u3nt(0, c3__ex, bam), + req_u->peq_u, _http_scry_cb); + u3z(nac); + } + else { + _http_scry_respond(req_u, nac); + u3z(bam); + u3z(gang); + } + } + _free_beam(&bem); + } } } /* _http_cache_respond(): respond with a simple-payload:http */ static void -_http_cache_respond(u3_hreq* req_u, u3_noun nun) { +_http_cache_respond(u3_hreq* req_u, u3_noun nun) +{ h2o_req_t* rec_u = req_u->rec_u; u3_httd* htd_u = req_u->hon_u->htp_u->htd_u; @@ -694,7 +1032,52 @@ _http_cache_respond(u3_hreq* req_u, u3_noun nun) { } else { u3_noun auth, response_header, data; - u3x_qual(u3k(u3t(u3t(nun))), &auth, 0, &response_header, &data); + u3x_qual(u3t(u3t(nun)), &auth, 0, &response_header, &data); + u3_noun status, headers; + u3x_cell(response_header, &status, &headers); + + // check auth + if ( (c3y == auth) + && (c3n == _http_req_is_auth(&htd_u->fig_u, rec_u)) ) + { + h2o_send_error_403(rec_u, "Unauthorized", "unauthorized", 0); + } + else { + req_u->sat_e = u3_rsat_plan; + _http_start_respond(req_u, u3k(status), u3k(headers), u3k(data), c3y); + } + } + u3z(nun); +} + +/* _http_scry_respond(): respond with a simple-payload:http +*/ +static void +_http_scry_respond(u3_hreq* req_u, u3_noun nun) +{ + h2o_req_t* rec_u = req_u->rec_u; + u3_httd* htd_u = req_u->hon_u->htp_u->htd_u; + + if ( u3_nul == nun ) { + u3_weak req = _http_rec_to_httq(rec_u); + if ( u3_none == req ) { + if ( (u3C.wag_w & u3o_verbose) ) { + u3l_log("strange %.*s request", (c3_i)rec_u->method.len, + rec_u->method.base); + } + c3_c* msg_c = "bad request"; + h2o_send_error_generic(rec_u, 400, msg_c, msg_c, 0); + } + else { + h2o_send_error_500(rec_u, "Internal Server Error", "scry failed", 0); + } + } + else if ( u3_none == u3r_at(7, nun) ) { + h2o_send_error_500(rec_u, "Internal Server Error", "scry failed", 0); + } + else { + u3_noun auth, response_header, data; + u3x_qual(u3t(u3t(nun)), &auth, 0, &response_header, &data); u3_noun status, headers; u3x_cell(response_header, &status, &headers); diff --git a/pkg/vere/io/term.c b/pkg/vere/io/term.c index ff3f11efd2..60fa6a68fa 100644 --- a/pkg/vere/io/term.c +++ b/pkg/vere/io/term.c @@ -1603,6 +1603,13 @@ _term_io_talk(u3_auto* car_u) u3_noun wir = u3nt(c3__term, '1', u3_nul); u3_noun cad; + // send born event + // + { + cad = u3nc(c3__born, u3_nul); + _term_ovum_plan(car_u, u3k(wir), cad); + } + // send terminal dimensions // { @@ -1639,6 +1646,17 @@ _reck_orchid(u3_noun fot, u3_noun txt, c3_l* tid_l) } } +/* _term_io_quiz(): handle quiz (query to serf). +*/ +static void +_term_io_quiz(void* vod_p, u3_noun res) +{ + u3_auto* car_u = (u3_auto*)vod_p; + u3_noun wir = u3nt(c3__term, '1', u3_nul); + u3_noun cad = u3k(res); + u3_auto_plan(car_u, u3_ovum_init(0, c3__d, wir, cad)); +} + /* _term_io_kick(): apply effects. */ static c3_o @@ -1727,6 +1745,17 @@ _term_io_kick(u3_auto* car_u, u3_noun wir, u3_noun cad) ret_o = c3y; u3_pier_pack(car_u->pir_u); } break; + + case c3__quac: { + ret_o = c3y; + u3_writ* wit_u = u3_lord_writ_new(u3K.pir_u->god_u); + wit_u->typ_e = u3_writ_quiz; + wit_u->qui_u.ptr_v = car_u; + wit_u->qui_u.quiz_f = _term_io_quiz; + + u3_lord_writ_plan(u3K.pir_u->god_u, wit_u); + + } break; } } } diff --git a/pkg/vere/king.c b/pkg/vere/king.c index bd619564b2..285ac18a49 100644 --- a/pkg/vere/king.c +++ b/pkg/vere/king.c @@ -270,11 +270,11 @@ _king_pier(u3_noun pier) u3z(pier); } -/* _king_curl_alloc(): allocate a response buffer for curl +/* king_curl_alloc(): allocate a response buffer for curl ** XX deduplicate with dawn.c */ -static size_t -_king_curl_alloc(void* dat_v, size_t uni_t, size_t mem_t, void* buf_v) +size_t +king_curl_alloc(void* dat_v, size_t uni_t, size_t mem_t, void* buf_v) { uv_buf_t* buf_u = buf_v; @@ -288,11 +288,11 @@ _king_curl_alloc(void* dat_v, size_t uni_t, size_t mem_t, void* buf_v) return siz_t; } -/* _king_curl_bytes(): HTTP GET url_c, produce response body bytes. +/* king_curl_bytes(): HTTP GET url_c, produce response body bytes. ** XX deduplicate with dawn.c */ -static c3_i -_king_curl_bytes(c3_c* url_c, c3_w* len_w, c3_y** hun_y, c3_t veb_t) +c3_i +king_curl_bytes(c3_c* url_c, c3_w* len_w, c3_y** hun_y, c3_t veb_t) { c3_i ret_i = 0; CURL *cul_u; @@ -308,8 +308,9 @@ _king_curl_bytes(c3_c* url_c, c3_w* len_w, c3_y** hun_y, c3_t veb_t) u3K.ssl_curl_f(cul_u); curl_easy_setopt(cul_u, CURLOPT_URL, url_c); - curl_easy_setopt(cul_u, CURLOPT_WRITEFUNCTION, _king_curl_alloc); + curl_easy_setopt(cul_u, CURLOPT_WRITEFUNCTION, king_curl_alloc); curl_easy_setopt(cul_u, CURLOPT_WRITEDATA, (void*)&buf_u); + curl_easy_setopt(cul_u, CURLOPT_SERVER_RESPONSE_TIMEOUT, 30); while ( 5 > try_y ) { sleep(try_y++); @@ -328,6 +329,9 @@ _king_curl_bytes(c3_c* url_c, c3_w* len_w, c3_y** hun_y, c3_t veb_t) u3l_log("curl: error fetching %s: HTTP %ld", url_c, cod_i); } ret_i = -2; + if ( 400 <= cod_i && cod_i < 500 ) { + break; + } } else { *len_w = buf_u.len; @@ -351,7 +355,7 @@ _king_get_atom(c3_c* url_c) c3_y* hun_y; u3_noun pro; - if ( _king_curl_bytes(url_c, &len_w, &hun_y, 1) ) { + if ( king_curl_bytes(url_c, &len_w, &hun_y, 1) ) { u3_king_bail(); exit(1); } @@ -431,7 +435,7 @@ u3_king_next(c3_c* pac_c, c3_c** out_c) // skip printfs on failed requests (/next is usually not present) //REVIEW new retry logic means this case will take longer. make retries optional? // - if ( _king_curl_bytes(url_c, &len_w, &hun_y, 0) ) { + if ( king_curl_bytes(url_c, &len_w, &hun_y, 0) ) { c3_free(url_c); ret_i = asprintf(&url_c, "%s/%s/last", ver_hos_c, pac_c); @@ -440,7 +444,7 @@ u3_king_next(c3_c* pac_c, c3_c** out_c) // enable printfs on failed requests (/last must be present) // XX support channel redirections // - if ( _king_curl_bytes(url_c, &len_w, &hun_y, 1) ) + if ( king_curl_bytes(url_c, &len_w, &hun_y, 1) ) { c3_free(url_c); return -2; @@ -1653,7 +1657,6 @@ u3_king_bail(void) void u3_king_grab(void* vod_p) { - c3_w tot_w = 0; FILE* fil_u; u3_assert( u3R == &(u3H->rod_u) ); @@ -1689,11 +1692,32 @@ u3_king_grab(void* vod_p) } #endif - tot_w += u3m_mark(fil_u); - tot_w += u3_pier_mark(fil_u); + u3m_quac** all_u = c3_malloc(sizeof(*all_u)*6); + + u3m_quac** var_u = u3m_mark(); + all_u[0] = var_u[0]; + all_u[1] = var_u[1]; + all_u[2] = var_u[2]; + all_u[3] = var_u[3]; + c3_free(var_u); + + c3_w tot_w = all_u[0]->siz_w + all_u[1]->siz_w + + all_u[2]->siz_w + all_u[3]->siz_w; + + all_u[4] = c3_calloc(sizeof(*all_u[4])); + all_u[4]->nam_c = "total marked"; + all_u[4]->siz_w = tot_w; + + all_u[5] = c3_calloc(sizeof(*all_u[5])); + all_u[5]->nam_c = "sweep"; + all_u[5]->siz_w = u3a_sweep(); + + for ( c3_w i_w = 0; i_w < 6; i_w++ ) { + u3a_print_quac(fil_u, 0, all_u[i_w]); + u3a_quac_free(all_u[i_w]); + } - u3a_print_memory(fil_u, "total marked", tot_w); - u3a_print_memory(fil_u, "sweep", u3a_sweep()); + c3_free(all_u); #ifdef U3_MEMORY_LOG { diff --git a/pkg/vere/lord.c b/pkg/vere/lord.c index a63824ae9b..3553aa402f 100644 --- a/pkg/vere/lord.c +++ b/pkg/vere/lord.c @@ -23,6 +23,7 @@ [%peek mil=@ sam=*] :: gang (each path $%([%once @tas @tas path] [%beam @tas beam])) [%play eve=@ lit=(list ?((pair @da ovum) *))] [%work mil=@ job=(pair @da ovum)] + [%quiz $%([%quac ~])] == :: +plea: from serf to king :: @@ -31,6 +32,7 @@ [%ripe [pro=%1 hon=@ nok=@] eve=@ mug=@] [%slog pri=@ tank] [%flog cord] + [%quiz $%([%quac p=*])] $: %peek $% [%done dat=(unit (cask))] [%bail dud=goof] @@ -521,6 +523,16 @@ _lord_plea_play(u3_lord* god_u, u3_noun dat) u3z(dat); } +/* _lord_plea_quiz(): handle quiz (query to serf). + */ +static void +_lord_plea_quiz(u3_lord* god_u, u3_noun dat) +{ + u3_writ* wit_u = _lord_writ_need(god_u, u3_writ_quiz); + wit_u->qui_u.quiz_f(wit_u->qui_u.ptr_v, dat); + u3z(dat); +} + /* _lord_work_spin(): update spinner if more work is in progress. */ static void @@ -742,15 +754,19 @@ _lord_on_plea(void* ptr_v, c3_d len_d, c3_y* byt_y) case c3__ripe: { _lord_plea_ripe(god_u, u3k(dat)); } break; + + case c3__quiz: { + _lord_plea_quiz(god_u, u3k(dat)); + } break; } u3z(jar); } -/* _lord_writ_new(): allocate a new writ. +/* u3_lord_writ_new(): allocate a new writ. */ -static u3_writ* -_lord_writ_new(u3_lord* god_u) +u3_writ* +u3_lord_writ_new(u3_lord* god_u) { u3_writ* wit_u = c3_calloc(sizeof(*wit_u)); return wit_u; @@ -812,6 +828,10 @@ _lord_writ_make(u3_lord* god_u, u3_writ* wit_u) // msg = u3nt(c3__live, c3__exit, 0); } break; + + case u3_writ_quiz: { + msg = u3nt(c3__quiz, c3__quac, u3_nul); + } break; } return msg; @@ -849,10 +869,10 @@ _lord_writ_send(u3_lord* god_u, u3_writ* wit_u) } } -/* _lord_writ_plan(): enqueue a writ and send. +/* u3_lord_writ_plan(): enqueue a writ and send. */ -static void -_lord_writ_plan(u3_lord* god_u, u3_writ* wit_u) +void +u3_lord_writ_plan(u3_lord* god_u, u3_writ* wit_u) { if ( !god_u->ent_u ) { u3_assert( !god_u->ext_u ); @@ -874,7 +894,7 @@ _lord_writ_plan(u3_lord* god_u, u3_writ* wit_u) void u3_lord_peek(u3_lord* god_u, u3_pico* pic_u) { - u3_writ* wit_u = _lord_writ_new(god_u); + u3_writ* wit_u = u3_lord_writ_new(god_u); wit_u->typ_e = u3_writ_peek; wit_u->pek_u = c3_calloc(sizeof(*wit_u->pek_u)); wit_u->pek_u->ptr_v = pic_u->ptr_v; @@ -905,7 +925,7 @@ u3_lord_peek(u3_lord* god_u, u3_pico* pic_u) // XX cache check, unless last // - _lord_writ_plan(god_u, wit_u); + u3_lord_writ_plan(god_u, wit_u); } /* u3_lord_play(): recompute batch. @@ -913,7 +933,7 @@ u3_lord_peek(u3_lord* god_u, u3_pico* pic_u) void u3_lord_play(u3_lord* god_u, u3_info fon_u) { - u3_writ* wit_u = _lord_writ_new(god_u); + u3_writ* wit_u = u3_lord_writ_new(god_u); wit_u->typ_e = u3_writ_play; wit_u->fon_u = fon_u; @@ -921,7 +941,7 @@ u3_lord_play(u3_lord* god_u, u3_info fon_u) // // u3_assert( !pay_u.ent_u->nex_u ); - _lord_writ_plan(god_u, wit_u); + u3_lord_writ_plan(god_u, wit_u); } /* u3_lord_work(): attempt work. @@ -929,7 +949,7 @@ u3_lord_play(u3_lord* god_u, u3_info fon_u) void u3_lord_work(u3_lord* god_u, u3_ovum* egg_u, u3_noun job) { - u3_writ* wit_u = _lord_writ_new(god_u); + u3_writ* wit_u = u3_lord_writ_new(god_u); wit_u->typ_e = u3_writ_work; wit_u->wok_u.egg_u = egg_u; wit_u->wok_u.job = job; @@ -943,7 +963,7 @@ u3_lord_work(u3_lord* god_u, u3_ovum* egg_u, u3_noun job) god_u->pin_o = c3y; } - _lord_writ_plan(god_u, wit_u); + u3_lord_writ_plan(god_u, wit_u); } /* u3_lord_save(): save a snapshot. @@ -955,9 +975,9 @@ u3_lord_save(u3_lord* god_u) return c3n; } else { - u3_writ* wit_u = _lord_writ_new(god_u); + u3_writ* wit_u = u3_lord_writ_new(god_u); wit_u->typ_e = u3_writ_save; - _lord_writ_plan(god_u, wit_u); + u3_lord_writ_plan(god_u, wit_u); return c3y; } } @@ -971,9 +991,9 @@ u3_lord_cram(u3_lord* god_u) return c3n; } else { - u3_writ* wit_u = _lord_writ_new(god_u); + u3_writ* wit_u = u3_lord_writ_new(god_u); wit_u->typ_e = u3_writ_cram; - _lord_writ_plan(god_u, wit_u); + u3_lord_writ_plan(god_u, wit_u); return c3y; } } @@ -983,9 +1003,9 @@ u3_lord_cram(u3_lord* god_u) void u3_lord_meld(u3_lord* god_u) { - u3_writ* wit_u = _lord_writ_new(god_u); + u3_writ* wit_u = u3_lord_writ_new(god_u); wit_u->typ_e = u3_writ_meld; - _lord_writ_plan(god_u, wit_u); + u3_lord_writ_plan(god_u, wit_u); } /* u3_lord_pack(): defragment persistent state. @@ -993,9 +1013,9 @@ u3_lord_meld(u3_lord* god_u) void u3_lord_pack(u3_lord* god_u) { - u3_writ* wit_u = _lord_writ_new(god_u); + u3_writ* wit_u = u3_lord_writ_new(god_u); wit_u->typ_e = u3_writ_pack; - _lord_writ_plan(god_u, wit_u); + u3_lord_writ_plan(god_u, wit_u); } /* u3_lord_exit(): shutdown gracefully. @@ -1003,9 +1023,9 @@ u3_lord_pack(u3_lord* god_u) void u3_lord_exit(u3_lord* god_u) { - u3_writ* wit_u = _lord_writ_new(god_u); + u3_writ* wit_u = u3_lord_writ_new(god_u); wit_u->typ_e = u3_writ_exit; - _lord_writ_plan(god_u, wit_u); + u3_lord_writ_plan(god_u, wit_u); // XX set timer, then halt } diff --git a/pkg/vere/main.c b/pkg/vere/main.c index 12c64d8ecd..3e1fdb8142 100644 --- a/pkg/vere/main.c +++ b/pkg/vere/main.c @@ -1770,7 +1770,7 @@ _cw_grab(c3_i argc, c3_c* argv[]) u3m_boot(u3_Host.dir_c, (size_t)1 << u3_Host.ops_u.lom_y); u3C.wag_w |= u3o_hashless; - u3_serf_grab(); + u3z(u3_serf_grab(c3y)); u3m_stop(); } diff --git a/pkg/vere/pier.c b/pkg/vere/pier.c index 2894b9bf84..bd3190c8b1 100644 --- a/pkg/vere/pier.c +++ b/pkg/vere/pier.c @@ -6,6 +6,7 @@ #include "pace.h" #include "vere.h" #include "version.h" +#include "curl/curl.h" #define PIER_READ_BATCH 1000ULL #define PIER_PLAY_BATCH 500ULL @@ -529,6 +530,181 @@ _pier_on_scry_done(void* ptr_v, u3_noun nun) u3z(nun); } +static c3_c* +_resolve_czar(u3_work* wok_u, c3_c* who_c) +{ + u3_noun czar = u3dc("scot", 'p', wok_u->pir_u->who_d[0] & ((1 << 8) - 1)); + c3_c* czar_c = u3r_string(czar); + + c3_c url[256]; + c3_w len_w; + c3_y* hun_y; + + sprintf(url, "https://%s.urbit.org/~/sponsor/%s", czar_c+1, who_c); + + c3_i ret_i = king_curl_bytes(url, &len_w, &hun_y, 1); + if (!ret_i) { + c3_free(czar_c); + czar_c = (c3_c*)hun_y; + } + + u3z(czar); + return czar_c; +} + +static c3_o +_czar_boot_data(c3_c* czar_c, + c3_c* who_c, + c3_w* bone_w, + c3_w* czar_glx_w, + c3_w* czar_ryf_w, + c3_w* czar_lyf_w, + c3_w* czar_bon_w, + c3_w* czar_ack_w) +{ + c3_c url[256]; + c3_w len_w; + c3_y* hun_y = 0; + + if ( bone_w != NULL ) { + sprintf(url, "https://%s.urbit.org/~/boot/%s/%d", + czar_c+1, who_c, *bone_w + 1); + } else { + sprintf(url, "https://%s.urbit.org/~/boot/%s", czar_c+1, who_c); + } + + c3_o ret_o = c3n; + c3_i ret_i = king_curl_bytes(url, &len_w, &hun_y, 1); + if ( !ret_i ) { + u3_noun jamd = u3i_bytes(len_w, hun_y); + u3_noun cued = u3qe_cue(jamd); + + u3_noun czar_glx, czar_ryf, czar_lyf, czar_bon, czar_ack; + + if ( (c3y == u3r_hext(cued, 0, &czar_glx, &czar_ryf, + &czar_lyf, &czar_bon, &czar_ack)) && + (c3y == u3r_safe_word(czar_glx, czar_glx_w)) && + (c3y == u3r_safe_word(czar_ryf, czar_ryf_w)) && + (c3y == u3r_safe_word(czar_lyf, czar_lyf_w)) ) { + if ( c3y == u3du(czar_bon) ) u3r_safe_word(u3t(czar_bon), czar_bon_w); + if ( c3y == u3du(czar_ack) ) u3r_safe_word(u3t(czar_ack), czar_ack_w); + ret_o = c3y; + } + + u3z(jamd); + u3z(cued); + c3_free(hun_y); + } + + return ret_o; +} + +static void +_boot_scry_cb(void* vod_p, u3_noun nun) +{ + u3_work* wok_u = (u3_work*)vod_p; + + u3_atom who = u3dc("scot", c3__p, u3i_chubs(2, wok_u->pir_u->who_d)); + c3_c* who_c = u3r_string(who); + + u3_noun rem, glx, ryf, bon, cur, nex; + c3_w glx_w, ryf_w, bon_w, cur_w, nex_w; + + c3_w czar_glx_w, czar_ryf_w, czar_lyf_w, czar_bon_w, czar_ack_w = 0xFFFFFFFF; + + if ( (c3y == u3r_qual(nun, 0, 0, 0, &rem)) && + (c3y == u3r_hext(rem, &glx, &ryf, 0, &bon, &cur, &nex)) ) { + /* + * Boot scry succeeded. Proceed to cross reference networking state against + * sponsoring galaxy. + */ + glx_w = u3r_word(0, glx); ryf_w = u3r_word(0, ryf); + bon_w = u3r_word(0, bon); cur_w = u3r_word(0, cur); + nex_w = u3r_word(0, nex); + + u3_atom czar = u3dc("scot", c3__p, glx_w); + c3_c* czar_c = u3r_string(czar); + + if ( c3n == _czar_boot_data(czar_c, who_c, &bon_w, + &czar_glx_w, &czar_ryf_w, + &czar_lyf_w, &czar_bon_w, + &czar_ack_w) ) { + u3l_log("boot: peer-state unvailable on czar, cannot protect from double-boot"); + _pier_work(wok_u); + } else { + if ( czar_ryf_w == ryf_w ) { + c3_w ack_w = cur_w - 1; + if ( czar_ack_w == 0xFFFFFFFF ) { + // This codepath should never be hit + u3l_log("boot: message-sink-state unvailable on czar, cannot protect from double-boot"); + _pier_work(wok_u); + } else if ( (czar_ack_w == ack_w) || + ((nex_w > cur_w) && (czar_ack_w - 1 == ack_w)) ) { + _pier_work(wok_u); + } else { + u3l_log("boot: failed: double-boot detected, refusing to boot %s\r\n" + "this pier is an old copy, boot the latest pier or breach\r\n" + "read more: https://docs.urbit.org/glossary/double-boot", + who_c); + u3_king_bail(); + } + } else { + // Trying to boot old ship after breach + u3l_log("boot: failed: double-boot detected, refusing to boot %s\r\n" + "this ship has been breached since its initialization, " + "boot the latest pier or breach again\r\n" + "read more: https://docs.urbit.org/glossary/double-boot", + who_c); + u3_king_bail(); + } + } + + u3z(czar); + c3_free(czar_c); + } else if ( c3y == u3r_trel(nun, 0, 0, &rem) && rem == 0 ) { + /* + * Data not available for boot scry. Check against sponsoring galaxy. + * If peer state exists exit(1) unless ship has breached, + * otherwise continue boot. + */ + c3_c* czar_c = _resolve_czar(wok_u, who_c); + + if ( c3n == _czar_boot_data(czar_c, who_c, 0, + &czar_glx_w, &czar_ryf_w, + &czar_lyf_w, 0, 0) ) { + c3_free(czar_c); + _pier_work(wok_u); + } else { + // Peer state found under czar + c3_free(czar_c); + u3_weak kf_ryf = wok_u->pir_u->ryf; + if ( kf_ryf == u3_none ) { + u3l_log("boot: keyfile rift unavailable, cannot protect from double-boot"); + _pier_work(wok_u); + } else if ( kf_ryf > czar_ryf_w ) { + // Ship has breached, continue boot + _pier_work(wok_u); + } else { + u3l_log("boot: failed: double-boot detected, refusing to boot %s\r\n" + "this ship has already been booted elsewere, " + "boot the existing pier or breach\r\n" + "read more: https://docs.urbit.org/glossary/double-boot", + who_c); + u3_king_bail(); + } + } + } else { + /* + * Boot scry endpoint doesn't exists. Most likely old arvo. + * Continue boot and hope for the best. + */ + u3l_log("boot: %%boot scry endpoint doesn't exist, cannot protect from double-boot"); + _pier_work(wok_u); + } + u3z(nun); u3z(who); + c3_free(who_c); +} + /* _pier_work_init(): begin processing new events */ static void @@ -615,7 +791,20 @@ _pier_work_init(u3_pier* pir_u) u3_auto_talk(wok_u->car_u); } - _pier_work(wok_u); + c3_d pi_d = wok_u->pir_u->who_d[0]; + c3_d pt_d = wok_u->pir_u->who_d[1]; + + if ( (pi_d < 256 && pt_d == 0) || (c3n == u3_Host.ops_u.net) ) { + // Skip double boot protection for galaxies and local mode ships + // + _pier_work(wok_u); + } else { + // Double boot protection + // + u3_noun pex = u3nc(u3i_string("boot"), u3_nul); + u3_pier_peek_last(pir_u, u3nc(u3_nul, u3_nul), c3__ax, u3_nul, pex, + pir_u->wok_u, _boot_scry_cb); + } } /* _pier_wyrd_good(): %wyrd version negotation succeeded. @@ -675,7 +864,7 @@ _pier_wyrd_fail(u3_pier* pir_u, u3_ovum* egg_u, u3_noun lud) // XX organizing version constants // #define VERE_NAME "vere" -#define VERE_ZUSE 411 +#define VERE_ZUSE 410 #define VERE_LULL 322 /* _pier_wyrd_aver(): check for %wend effect and version downgrade. RETAIN @@ -815,7 +1004,7 @@ _pier_wyrd_card(u3_pier* pir_u) u3_noun kel = u3nl(u3nc(c3__zuse, VERE_ZUSE), // XX from both king and serf? u3nc(c3__lull, VERE_LULL), // XX from both king and serf? u3nc(c3__arvo, 236), // XX from both king and serf? - u3nc(c3__hoon, 138), // god_u->hon_y + u3nc(c3__hoon, 137), // god_u->hon_y u3nc(c3__nock, 4), // god_u->noc_y u3_none); u3_noun wir = u3nc(c3__arvo, u3_nul); @@ -1609,7 +1798,7 @@ u3_pier_slog(u3_pier* pir_u) /* _pier_init(): create a pier, loading existing. */ static u3_pier* -_pier_init(c3_w wag_w, c3_c* pax_c) +_pier_init(c3_w wag_w, c3_c* pax_c, u3_weak ryf) { // create pier // @@ -1618,6 +1807,7 @@ _pier_init(c3_w wag_w, c3_c* pax_c) pir_u->pax_c = pax_c; pir_u->sat_e = u3_psat_init; pir_u->liv_o = c3n; + pir_u->ryf = ryf; // XX remove // @@ -1691,8 +1881,9 @@ u3_pier* u3_pier_stay(c3_w wag_w, u3_noun pax) { u3_pier* pir_u; + u3_weak rift = u3_none; - if ( !(pir_u = _pier_init(wag_w, u3r_string(pax))) ) { + if ( !(pir_u = _pier_init(wag_w, u3r_string(pax), rift)) ) { fprintf(stderr, "pier: stay: init fail\r\n"); u3_king_bail(); return 0; @@ -1848,7 +2039,7 @@ _pier_boot_make(u3_noun who, // include additional key configuration events if we have multiple keys // - if ( (u3_none != fed) && (c3y == u3du(u3h(fed))) ) { + if ( (u3_none != fed) && (c3y == u3du(u3h(fed))) && (u3h(u3h(fed))) == 1) { u3_noun wir = u3nt(c3__j, c3__seed, u3_nul); u3_noun tag = u3i_string("rekey"); u3_noun kyz = u3t(u3t(fed)); @@ -2027,8 +2218,13 @@ u3_pier_boot(c3_w wag_w, // config flags u3_noun mor) // extra boot sequence props { u3_pier* pir_u; + u3_weak rift = u3_none; + if (fed != u3_none && c3y == u3du(u3h(fed)) && u3h(u3h(fed)) == 2) { + rift = u3h(u3t(u3t(fed))); + u3k(rift); + } - if ( !(pir_u = _pier_init(wag_w, u3r_string(pax))) ) { + if ( !(pir_u = _pier_init(wag_w, u3r_string(pax), rift)) ) { fprintf(stderr, "pier: boot: init fail\r\n"); u3_king_bail(); return 0; diff --git a/pkg/vere/serf.c b/pkg/vere/serf.c index cec8bece55..c410212db9 100644 --- a/pkg/vere/serf.c +++ b/pkg/vere/serf.c @@ -26,12 +26,14 @@ :: next steps: [%peek mil=@ sam=*] :: gang (each path $%([%once @tas @tas path] [beam @tas beam])) [%play eve=@ lit=(list ?((pair @da ovum) *))] [%work mil=@ job=(pair @da ovum)] + [%quiz $%([%quac ~])] == :: +plea: from serf to king :: +$ plea $% [%live ~] [%ripe [pro=%1 hon=@ nok=@] eve=@ mug=@] + [%quiz $%([%quac p=*])] [%slog pri=@ tank] [%flog cord] $: %peek @@ -70,18 +72,70 @@ enum { _serf_fag_vega = 1 << 4 // kernel reset }; +/* _serf_quac: convert a quac to a noun. +*/ +u3_noun +_serf_quac(u3m_quac* mas_u) +{ + u3_noun list = u3_nul; + c3_w i_w = 0; + if ( mas_u->qua_u != NULL ) { + while ( mas_u->qua_u[i_w] != NULL ) { + list = u3nc(_serf_quac(mas_u->qua_u[i_w]), list); + i_w++; + } + } + list = u3kb_flop(list); + + u3_noun mas = u3nt(u3i_string(mas_u->nam_c), u3i_word(mas_u->siz_w), list); + + c3_free(mas_u->nam_c); + c3_free(mas_u->qua_u); + c3_free(mas_u); + + return mas; +} + +/* _serf_quacs: convert an array of quacs to a noun list. +*/ +u3_noun +_serf_quacs(u3m_quac** all_u) +{ + u3_noun list = u3_nul; + c3_w i_w = 0; + while ( all_u[i_w] != NULL ) { + list = u3nc(_serf_quac(all_u[i_w]), list); + i_w++; + } + c3_free(all_u); + return u3kb_flop(list); +} + +/* _serf_print_quacs: print an array of quacs. +*/ +void +_serf_print_quacs(FILE* fil_u, u3m_quac** all_u) +{ + fprintf(fil_u, "\r\n"); + c3_w i_w = 0; + while ( all_u[i_w] != NULL ) { + u3a_print_quac(fil_u, 0, all_u[i_w]); + i_w++; + } +} + /* _serf_grab(): garbage collect, checking for profiling. RETAIN. */ -static void -_serf_grab(u3_noun sac) +static u3_noun +_serf_grab(u3_noun sac, c3_o pri_o) { if ( u3_nul == sac) { if ( u3C.wag_w & (u3o_debug_ram | u3o_check_corrupt) ) { u3m_grab(sac, u3_none); } + return u3_nul; } else { - c3_w tot_w = 0; FILE* fil_u; #ifdef U3_MEMORY_LOG @@ -113,36 +167,77 @@ _serf_grab(u3_noun sac) #endif u3_assert( u3R == &(u3H->rod_u) ); - fprintf(fil_u, "\r\n"); - tot_w += u3a_maid(fil_u, "total userspace", u3a_prof(fil_u, 0, sac)); - tot_w += u3m_mark(fil_u); - tot_w += u3a_maid(fil_u, "space profile", u3a_mark_noun(sac)); + u3m_quac* pro_u = u3a_prof(fil_u, sac); + + if ( NULL == pro_u ) { + fflush(fil_u); + u3z(sac); + return u3_nul; + } else { + u3m_quac** all_u = c3_malloc(sizeof(*all_u) * 11); + all_u[0] = pro_u; + + u3m_quac** var_u = u3m_mark(); + all_u[1] = var_u[0]; + all_u[2] = var_u[1]; + all_u[3] = var_u[2]; + all_u[4] = var_u[3]; + c3_free(var_u); + + c3_w tot_w = all_u[0]->siz_w + all_u[1]->siz_w + all_u[2]->siz_w + + all_u[3]->siz_w + all_u[4]->siz_w; - u3a_print_memory(fil_u, "total marked", tot_w); - u3a_print_memory(fil_u, "free lists", u3a_idle(u3R)); - u3a_print_memory(fil_u, "sweep", u3a_sweep()); + all_u[5] = c3_calloc(sizeof(*all_u[5])); + all_u[5]->nam_c = strdup("space profile"); + all_u[5]->siz_w = u3a_mark_noun(sac) * 4; - fflush(fil_u); + tot_w += all_u[5]->siz_w; + + all_u[6] = c3_calloc(sizeof(*all_u[6])); + all_u[6]->nam_c = strdup("total marked"); + all_u[6]->siz_w = tot_w; + + all_u[7] = c3_calloc(sizeof(*all_u[7])); + all_u[7]->nam_c = strdup("free lists"); + all_u[7]->siz_w = u3a_idle(u3R) * 4; + + all_u[8] = c3_calloc(sizeof(*all_u[8])); + all_u[8]->nam_c = strdup("sweep"); + all_u[8]->siz_w = u3a_sweep() * 4; + + all_u[9] = c3_calloc(sizeof(*all_u[9])); + all_u[9]->nam_c = strdup("loom"); + all_u[9]->siz_w = u3C.wor_i * 4; + + all_u[10] = NULL; + + if ( c3y == pri_o ) { + _serf_print_quacs(fil_u, all_u); + } + fflush(fil_u); #ifdef U3_MEMORY_LOG - { - fclose(fil_u); - } + { + fclose(fil_u); + } #endif - u3z(sac); + u3_noun mas = _serf_quacs( all_u); + u3z(sac); - u3l_log(""); + return mas; + } } } /* u3_serf_grab(): garbage collect. */ -void -u3_serf_grab(void) +u3_noun +u3_serf_grab(c3_o pri_o) { u3_noun sac = u3_nul; + u3_noun res = u3_nul; u3_assert( u3R == &(u3H->rod_u) ); @@ -173,19 +268,31 @@ u3_serf_grab(void) u3z(gon); } - fprintf(stderr, "serf: measuring memory:\r\n"); - if ( u3_nul != sac ) { - _serf_grab(sac); + res = _serf_grab(sac, pri_o); } else { - u3a_print_memory(stderr, "total marked", u3m_mark(stderr)); + fprintf(stderr, "sac is empty\r\n"); + u3m_quac** var_u = u3m_mark(); + + c3_w tot_w = 0; + c3_w i_w = 0; + while ( var_u[i_w] != NULL ) { + tot_w += var_u[i_w]->siz_w; + u3a_quac_free(var_u[i_w]); + i_w++; + } + c3_free(var_u); + + u3a_print_memory(stderr, "total marked", tot_w / 4); u3a_print_memory(stderr, "free lists", u3a_idle(u3R)); u3a_print_memory(stderr, "sweep", u3a_sweep()); fprintf(stderr, "\r\n"); } fflush(stderr); + + return res; } /* u3_serf_post(): update serf state post-writ. @@ -213,7 +320,7 @@ u3_serf_post(u3_serf* sef_u) // XX this runs on replay too, |mass s/b elsewhere // if ( sef_u->fag_w & _serf_fag_mute ) { - _serf_grab(sef_u->sac); + u3z(_serf_grab(sef_u->sac, c3y)); sef_u->sac = u3_nul; } @@ -906,7 +1013,7 @@ u3_serf_live(u3_serf* sef_u, u3_noun com, u3_noun* ret) } u3m_save(); - u3_serf_grab(); + u3_serf_grab(c3y); *ret = u3nc(c3__live, u3_nul); return c3y; @@ -1024,10 +1131,22 @@ u3_serf_writ(u3_serf* sef_u, u3_noun wit, u3_noun* pel) ret_o = c3y; } } break; + case c3__quiz: { + u3z(wit); + u3_noun res = u3_serf_grab(c3n); + if ( u3_none == res ) { + ret_o = c3n; + } else { + *pel = u3nt(c3__quiz, c3__quac, res); + ret_o = c3y; + } + } break; } } - u3z(wit); + if ( tag != c3__quiz ) { + u3z(wit); + } return ret_o; } diff --git a/pkg/vere/serf.h b/pkg/vere/serf.h index 7cd2ca47d0..0645434015 100644 --- a/pkg/vere/serf.h +++ b/pkg/vere/serf.h @@ -56,7 +56,8 @@ /* u3_serf_grab(): garbage collect. */ - void - u3_serf_grab(void); + u3_noun + u3_serf_grab(c3_o pri_o); + #endif /* ifndef U3_VERE_SERF_H */ diff --git a/pkg/vere/vere.h b/pkg/vere/vere.h index dab95c5080..e669703d96 100644 --- a/pkg/vere/vere.h +++ b/pkg/vere/vere.h @@ -457,7 +457,8 @@ u3_writ_cram = 4, u3_writ_meld = 5, u3_writ_pack = 6, - u3_writ_exit = 7 + u3_writ_exit = 7, + u3_writ_quiz = 8 } u3_writ_type; /* u3_writ: ipc message from king to serf @@ -473,6 +474,10 @@ u3_peek* pek_u; // peek u3_info fon_u; // recompute c3_d eve_d; // save/pack at + struct { // serf query: + void* ptr_v; // driver + void (*quiz_f)(void*, u3_noun); // callback + } qui_u; // }; } u3_writ; @@ -663,6 +668,7 @@ u3_disk* log_u; // event log u3_lord* god_u; // computer u3_psat sat_e; // type-tagged + u3_weak ryf; // rift union { // u3_boot* bot_u; // bootstrap u3_play* pay_u; // recompute @@ -767,7 +773,16 @@ u3_atom u3_time_t_in_ts(time_t tim); #endif + /* u3_lord_writ_new(): allocate a new writ. + */ + u3_writ* + u3_lord_writ_new(u3_lord* god_u); + /* u3_lord_writ_plan(): enqueue a writ and send. + */ + void + u3_lord_writ_plan(u3_lord* god_u, u3_writ* wit_u); + /* u3_time_out_ts(): struct timespec from urbit time. */ void @@ -1566,6 +1581,16 @@ darwin_register_mach_exception_handler(); #endif + /* king_curl_alloc(): allocate a response buffer for curl + */ + size_t + king_curl_alloc(void* dat_v, size_t uni_t, size_t mem_t, void* buf_v); + + /* king_curl_bytes(): HTTP GET url_c, produce response body bytes. + */ + c3_i + king_curl_bytes(c3_c* url_c, c3_w* len_w, c3_y** hun_y, c3_t veb_t); + /* u3_write_fd(): retry interrupts, continue partial writes, assert errors. */ void