Add convertion function between binary float16 and float32 in softfloat

This commit is contained in:
Weiwei Li
2023-04-14 22:35:12 +08:00
parent 740e6353a1
commit afe3987685
8 changed files with 316 additions and 0 deletions

80
softfloat/bf16_to_f32.c Normal file
View File

@@ -0,0 +1,80 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3d, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"
float32_t bf16_to_f32( bfloat16_t a )
{
union ui16_f16 uA;
uint_fast16_t uiA;
bool sign;
int_fast16_t exp;
uint_fast16_t frac;
struct commonNaN commonNaN;
uint_fast32_t uiZ;
union ui32_f32 uZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uA.f = a;
uiA = uA.ui;
sign = signBF16UI( uiA );
exp = expBF16UI( uiA );
frac = fracBF16UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp == 0xFF ) {
if ( frac ) {
softfloat_bf16UIToCommonNaN( uiA, &commonNaN );
uiZ = softfloat_commonNaNToF32UI( &commonNaN );
} else {
uiZ = packToF32UI( sign, 0xFF, 0 );
}
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiZ = packToF32UI( sign, exp, (uint_fast32_t) frac<<16 );
uiZ:
uZ.ui = uiZ;
return uZ.f;
}

92
softfloat/f32_to_bf16.c Normal file
View File

@@ -0,0 +1,92 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3d, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"
bfloat16_t f32_to_bf16( float32_t a )
{
union ui32_f32 uA;
uint_fast32_t uiA;
bool sign;
int_fast16_t exp;
uint_fast32_t frac;
struct commonNaN commonNaN;
struct exp16_sig32 normExpSig;
uint_fast16_t uiZ, frac16;
union ui16_f16 uZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uA.f = a;
uiA = uA.ui;
sign = signF32UI( uiA );
exp = expF32UI( uiA );
frac = fracF32UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp == 0xFF ) {
if ( frac ) {
softfloat_f32UIToCommonNaN( uiA, &commonNaN );
uiZ = softfloat_commonNaNToBF16UI( &commonNaN );
} else {
uiZ = packToBF16UI( sign, 0xFF, 0 );
}
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! (exp | frac) ) {
uiZ = packToBF16UI( sign, 0, 0 );
goto uiZ;
} else if ( !exp ) {
normExpSig = softfloat_normSubnormalF32Sig( frac );
exp = normExpSig.exp;
frac = normExpSig.sig;
}
frac16 = frac>>9 | ((frac & 0x1FF) != 0);
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
return softfloat_roundPackToBF16( sign, exp - 1, frac16 | 0x4000 );
uiZ:
uZ.ui = uiZ;
return uZ.f;
}

View File

@@ -89,6 +89,11 @@ int_fast64_t softfloat_roundMToI64( bool, uint32_t *, uint_fast8_t, bool );
#define fracF16UI( a ) ((a) & 0x03FF)
#define packToF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<10) + (sig))
#define signBF16UI( a ) ((bool) ((uint16_t) (a)>>15))
#define expBF16UI( a ) ((int_fast16_t) ((a)>>7) & 0xFF)
#define fracBF16UI( a ) ((a) & 0x07F)
#define packToBF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<7) + (sig))
#define isNaNF16UI( a ) (((~(a) & 0x7C00) == 0) && ((a) & 0x03FF))
struct exp8_sig16 { int_fast8_t exp; uint_fast16_t sig; };
@@ -103,6 +108,7 @@ float16_t
softfloat_mulAddF16(
uint_fast16_t, uint_fast16_t, uint_fast16_t, uint_fast8_t );
bfloat16_t softfloat_roundPackToBF16( bool, int_fast16_t, uint_fast16_t );
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
#define signF32UI( a ) ((bool) ((uint32_t) (a)>>31))

View File

@@ -0,0 +1,113 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3d, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of
California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"
bfloat16_t
softfloat_roundPackToBF16( bool sign, int_fast16_t exp, uint_fast16_t sig )
{
uint_fast8_t roundingMode;
bool roundNearEven;
uint_fast8_t roundIncrement, roundBits;
bool isTiny;
uint_fast16_t uiZ;
union ui16_f16 uZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
roundingMode = softfloat_roundingMode;
roundNearEven = (roundingMode == softfloat_round_near_even);
roundIncrement = 0x40;
if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
roundIncrement =
(roundingMode
== (sign ? softfloat_round_min : softfloat_round_max))
? 0x7F
: 0;
}
roundBits = sig & 0x7F;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( 0xFD <= (unsigned int) exp ) {
if ( exp < 0 ) {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
isTiny =
(softfloat_detectTininess == softfloat_tininess_beforeRounding)
|| (exp < -1) || (sig + roundIncrement < 0x8000);
sig = softfloat_shiftRightJam32( sig, -exp );
exp = 0;
roundBits = sig & 0x7F;
if ( isTiny && roundBits ) {
softfloat_raiseFlags( softfloat_flag_underflow );
}
} else if ( (0xFD < exp) || (0x8000 <= sig + roundIncrement) ) {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
softfloat_raiseFlags(
softfloat_flag_overflow | softfloat_flag_inexact );
uiZ = packToBF16UI( sign, 0xFF, 0 ) - ! roundIncrement;
goto uiZ;
}
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sig = (sig + roundIncrement)>>7;
if ( roundBits ) {
softfloat_exceptionFlags |= softfloat_flag_inexact;
#ifdef SOFTFLOAT_ROUND_ODD
if ( roundingMode == softfloat_round_odd ) {
sig |= 1;
goto packReturn;
}
#endif
}
sig &= ~(uint_fast16_t) (! (roundBits ^ 0x40) & roundNearEven);
if ( ! sig ) exp = 0;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
packReturn:
uiZ = packToBF16UI( sign, exp, sig );
uiZ:
uZ.ui = uiZ;
return uZ.f;
}

View File

@@ -154,6 +154,7 @@ uint_fast64_t f16_to_ui64_r_minMag( float16_t, bool );
int_fast32_t f16_to_i32_r_minMag( float16_t, bool );
int_fast64_t f16_to_i64_r_minMag( float16_t, bool );
float32_t f16_to_f32( float16_t );
float32_t bf16_to_f32( bfloat16_t );
float64_t f16_to_f64( float16_t );
#ifdef SOFTFLOAT_FAST_INT64
extFloat80_t f16_to_extF80( float16_t );
@@ -196,6 +197,7 @@ uint_fast64_t f32_to_ui64_r_minMag( float32_t, bool );
int_fast32_t f32_to_i32_r_minMag( float32_t, bool );
int_fast64_t f32_to_i64_r_minMag( float32_t, bool );
float16_t f32_to_f16( float32_t );
bfloat16_t f32_to_bf16( float32_t );
float64_t f32_to_f64( float32_t );
#ifdef SOFTFLOAT_FAST_INT64
extFloat80_t f32_to_extF80( float32_t );

View File

@@ -45,6 +45,7 @@ softfloat_c_srcs = \
f16_sqrt.c \
f16_sub.c \
f16_to_f128.c \
bf16_to_f32.c \
f16_to_f32.c \
f16_to_f64.c \
f16_to_i8.c \
@@ -76,6 +77,7 @@ softfloat_c_srcs = \
f32_sqrt.c \
f32_sub.c \
f32_to_f128.c \
f32_to_bf16.c \
f32_to_f16.c \
f32_to_f64.c \
f32_to_i16.c \
@@ -181,6 +183,7 @@ softfloat_c_srcs = \
s_roundMToUI64.c \
s_roundPackMToI64.c \
s_roundPackMToUI64.c \
s_roundPackToBF16.c \
s_roundPackToF128.c \
s_roundPackToF16.c \
s_roundPackToF32.c \

View File

@@ -48,6 +48,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
| (typically 'float' and 'double', and possibly 'long double').
*----------------------------------------------------------------------------*/
typedef struct { uint16_t v; } float16_t;
typedef float16_t bfloat16_t;
typedef struct { uint32_t v; } float32_t;
typedef struct { uint64_t v; } float64_t;
typedef struct { uint64_t v[2]; } float128_t;

View File

@@ -98,6 +98,11 @@ struct commonNaN { char _unused; };
*----------------------------------------------------------------------------*/
#define defaultNaNF16UI 0x7E00
/*----------------------------------------------------------------------------
| The bit pattern for a default generated binary 16-bit floating-point NaN.
*----------------------------------------------------------------------------*/
#define defaultNaNBF16UI 0x7FC0
/*----------------------------------------------------------------------------
| Returns true when 16-bit unsigned integer `uiA' has the bit pattern of a
| 16-bit floating-point signaling NaN.
@@ -113,6 +118,20 @@ struct commonNaN { char _unused; };
*----------------------------------------------------------------------------*/
#define softfloat_f16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0200) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid )
/*----------------------------------------------------------------------------
| Assuming `uiA' has the bit pattern of a binary 16-bit floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
#define softfloat_bf16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x040) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid )
/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by `aPtr' into a binary 16-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
#define softfloat_commonNaNToBF16UI( aPtr ) ((uint_fast16_t) defaultNaNBF16UI)
/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by `aPtr' into a 16-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.