diff --git a/src/arb.h b/src/arb.h index 2e7a23ab7d..103a67c361 100644 --- a/src/arb.h +++ b/src/arb.h @@ -29,6 +29,7 @@ extern "C" { #define arb_radref(x) (&(x)->rad) #define ARB_IS_LAGOM(x) (ARF_IS_LAGOM(arb_midref(x)) && MAG_IS_LAGOM(arb_radref(x))) +#define ARB_IS_SPECIAL(x) (ARF_IS_SPECIAL(arb_midref(x)) || MAG_IS_SPECIAL(arb_radref(x))) #define ARB_RND ARF_RND_DOWN diff --git a/src/arb/mul.c b/src/arb/mul.c index e32538846c..b7e562d3a6 100644 --- a/src/arb/mul.c +++ b/src/arb/mul.c @@ -69,6 +69,80 @@ arb_mul_arf(arb_t z, const arb_t x, const arf_t y, slong prec) } } +#if FLINT_HAVE_ARF_MUL_RND_SLOPPY +FLINT_STATIC_NOINLINE +void _arb_mul_special(arb_t z, const arb_t x, const arb_t y) +{ + /* FIXME: Treat special cases. */ +} + +void +arb_mul(arb_t z, const arb_t x, const arb_t y, slong prec) +{ + if (FLINT_LIKELY(!ARB_IS_SPECIAL(x) && !ARB_IS_SPECIAL(y))) + { + /* Neither x or y are zero, exact, NaN or infinity. */ + mag_t zr, xm, ym; + int is_lagom; + + is_lagom = (ARB_IS_LAGOM(x) && ARB_IS_LAGOM(y) && ARB_IS_LAGOM(z)); + mag_init(zr); + + if (is_lagom) + { + mag_fast_init_set_arf(xm, arb_midref(x)); + mag_fast_init_set_arf(ym, arb_midref(y)); + + mag_fast_mul(zr, xm, arb_radref(y)); + mag_fast_addmul(zr, ym, arb_radref(x)); + mag_fast_addmul(zr, arb_radref(x), arb_radref(y)); + } + else + { + mag_init_set_arf(xm, arb_midref(x)); + mag_init_set_arf(ym, arb_midref(y)); + + mag_mul(zr, xm, arb_radref(y)); + mag_addmul(zr, ym, arb_radref(x)); + mag_addmul(zr, arb_radref(x), arb_radref(y)); + } + + /* NOTE: Should we check if precision is less than what x and y has to + offer? */ + /* NOTE: This is inexact by nature (unless tails are zero). */ + arf_mul_rnd_sloppy(arb_midref(z), arb_midref(x), arb_midref(y)); + + /* FIXME: arf_mul_rnd_sloppy uses rounded inputs in order to only + * evaluate an n-by-n high multiplication. This needs to be reflected on + * the mag. */ + + if (is_lagom) + { + arf_mag_fast_add_ulp(zr, zr, arb_midref(z), prec); + *arb_radref(z) = *zr; + } + else + { + arf_mag_add_ulp(arb_radref(z), zr, arb_midref(z), prec); + mag_clear(xm); + mag_clear(ym); + mag_clear(zr); + } + } + else if (arb_is_exact(x) || arb_is_exact(y)) + { + if (arb_is_exact(x)) + arb_mul_arf(z, y, arb_midref(x), prec); + else + arb_mul_arf(z, x, arb_midref(y), prec); + } + else + { + /* x and y cannot be exact, and some value has to special. */ + _arb_mul_special(z, x, y); + } +} +#else void arb_mul(arb_t z, const arb_t x, const arb_t y, slong prec) { @@ -132,6 +206,7 @@ arb_mul(arb_t z, const arb_t x, const arb_t y, slong prec) mag_clear(zr); } } +#endif void arb_mul_si(arb_t z, const arb_t x, slong y, slong prec) diff --git a/src/arf.h b/src/arf.h index cd88e366a8..7cd92ade67 100644 --- a/src/arf.h +++ b/src/arf.h @@ -776,6 +776,11 @@ int arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, slong prec); ? arf_mul_rnd_down(z, x, y, prec) \ : arf_mul_rnd_any(z, x, y, prec, rnd)) +#if FLINT_HAVE_MPN_MULHIGH_NORMALISED +# define FLINT_HAVE_ARF_MUL_RND_SLOPPY 1 +void arf_mul_rnd_sloppy(arf_ptr, arf_srcptr, arf_srcptr); +#endif + ARF_INLINE int arf_neg_mul(arf_t z, const arf_t x, const arf_t y, slong prec, arf_rnd_t rnd) { diff --git a/src/arf/mul_rnd_sloppy.c b/src/arf/mul_rnd_sloppy.c new file mode 100644 index 0000000000..2a8a439d27 --- /dev/null +++ b/src/arf/mul_rnd_sloppy.c @@ -0,0 +1,86 @@ +/* + Copyright (C) 2024 Albin Ahlbäck + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "mpn_extras.h" +#include "arf.h" + +#if FLINT_HAVE_MPN_MULHIGH_NORMALISED +/* NOTE: Assumes no special values */ +void +arf_mul_rnd_sloppy(arf_ptr z, arf_srcptr x, arf_srcptr y) +{ + mp_size_t xn, yn, zn; + mp_srcptr xp, yp; + mp_ptr zp; + int sgnbit; + struct mp_limb_pair_t mulret; + + xn = ARF_XSIZE(x); + yn = ARF_XSIZE(y); + zn = ARF_XSIZE(z); + sgnbit = (xn ^ yn) & 1; + xn >>= 1; + yn >>= 1; + zn >>= 1; + + if (yn > xn) + { + FLINT_SWAP(arf_srcptr, x, y); + FLINT_SWAP(mp_size_t, xn, yn); + } + + if (yn > 2) + { + xp = ARF_PTR_D(x); + yp = ARF_PTR_D(y); + } + else + { + yp = ARF_NOPTR_D(y); + if (xn <= 2) + xp = ARF_NOPTR_D(x); + else + xp = ARF_PTR_D(x); + } + + xp += xn - yn; /* Make xp have the same length as yp */ + + if (zn < yn) + { + _arf_promote(z, yn); + zp = ARF_PTR_D(z); + } + else if (zn > 2 && yn <= 2) + { + _arf_demote(z); + zp = ARF_NOPTR_D(z); + } + + if (!FLINT_HAVE_MULHIGH_NORMALISED_FUNC(yn) && (zp == xp || zp == yp)) + { + /* These cases do not allow aliasing */ + mp_ptr tmp = flint_malloc(sizeof(mp_limb_t) * yn); + mulret = flint_mpn_mulhigh_normalised(tmp, xp, yp, yn); + flint_mpn_copyi(zp, tmp, yn); + flint_free(tmp); + } + else + { + /* Here we actually allow aliasing. */ + mulret = flint_mpn_mulhigh_normalised(zp, xp, yp, yn); + } + + ARF_XSIZE(z) = ARF_MAKE_XSIZE(yn, sgnbit); + _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), mulret.m2); +} +#else +typedef int fileisempty; +#endif diff --git a/src/mag.h b/src/mag.h index 0122941051..7fc041ae05 100644 --- a/src/mag.h +++ b/src/mag.h @@ -109,6 +109,7 @@ _fmpz_sub2_fast(fmpz_t z, const fmpz_t x, const fmpz_t y, slong c) /* Finite and with lagom big exponents. */ #define MAG_IS_LAGOM(x) (MAG_EXP(x) >= MAG_MIN_LAGOM_EXP && \ MAG_EXP(x) <= MAG_MAX_LAGOM_EXP) +#define MAG_IS_SPECIAL(x) (MAG_MAN(x) == 0) #define MAG_EXPREF(x) (&(x)->exp) #define MAG_EXP(x) ((x)->exp) @@ -222,7 +223,7 @@ mag_one(mag_t x) MAG_INLINE int mag_is_special(const mag_t x) { - return MAG_MAN(x) == 0; + return MAG_IS_SPECIAL(x); } MAG_INLINE int diff --git a/src/mpn_extras.h b/src/mpn_extras.h index 0df2521597..5ca5f46c55 100644 --- a/src/mpn_extras.h +++ b/src/mpn_extras.h @@ -268,6 +268,9 @@ FLINT_DLL extern const flint_mpn_mulhigh_normalised_func_t flint_mpn_mulhigh_nor # define FLINT_HAVE_NATIVE_MPN_MULHIGH_BASECASE 1 # define FLINT_HAVE_NATIVE_MPN_SQRHIGH_BASECASE 1 +# define FLINT_HAVE_MPN_MULHIGH 1 +# define FLINT_HAVE_MPN_MULHIGH_NORMALISED 1 + /* NOTE: This function only works for n >= 6 */ mp_limb_t _flint_mpn_mulhigh_basecase(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); @@ -318,6 +321,8 @@ struct mp_limb_pair_t flint_mpn_mulhigh_normalised(mp_ptr rp, mp_srcptr xp, mp_s { struct mp_limb_pair_t ret; + /* FIXME: Currently _flint_mpn_mulhigh allows aliasing above a certain + * size. */ FLINT_ASSERT(rp != xp && rp != yp); ret.m1 = _flint_mpn_mulhigh(rp, xp, yp, n);