-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
Copy pathieee754.c
83 lines (70 loc) · 1.81 KB
/
ieee754.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
/*
* Shamelessly lifted from Beej's Guide to Network Programming, found here:
*
* http://beej.us/guide/bgnet/output/html/singlepage/bgnet.html#serialization
*
* Below code was granted to the public domain.
*/
#include "ieee754.h"
uint64_t pack754(long double f, unsigned bits, unsigned expbits)
{
long double fnorm;
int shift;
long long sign, exp, significand;
unsigned significandbits = bits - expbits - 1; // -1 for sign bit
// get this special case out of the way
if (f == 0.0)
return 0;
// check sign and begin normalization
if (f < 0) {
sign = 1;
fnorm = -f;
} else {
sign = 0;
fnorm = f;
}
// get the normalized form of f and track the exponent
shift = 0;
while (fnorm >= 2.0) {
fnorm /= 2.0;
shift++;
}
while (fnorm < 1.0) {
fnorm *= 2.0;
shift--;
}
fnorm = fnorm - 1.0;
// calculate the binary form (non-float) of the significand data
significand = fnorm * ((1LL << significandbits) + 0.5f);
// get the biased exponent
exp = shift + ((1 << (expbits - 1)) - 1); // shift + bias
// return the final answer
return (sign << (bits - 1)) | (exp << (bits-expbits - 1)) | significand;
}
long double unpack754(uint64_t i, unsigned bits, unsigned expbits)
{
long double result;
long long shift;
unsigned bias;
unsigned significandbits = bits - expbits - 1; // -1 for sign bit
if (i == 0)
return 0.0;
// pull the significand
result = (i & ((1LL << significandbits) - 1)); // mask
result /= (1LL << significandbits); // convert back to float
result += 1.0f; // add the one back on
// deal with the exponent
bias = (1 << (expbits - 1)) - 1;
shift = ((i >> significandbits) & ((1LL << expbits) - 1)) - bias;
while (shift > 0) {
result *= 2.0;
shift--;
}
while (shift < 0) {
result /= 2.0;
shift++;
}
// sign it
result *= (i >> (bits - 1)) & 1 ? -1.0 : 1.0;
return result;
}