1/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "shared.rsh"
18
19static volatile half h1;
20static volatile half2 h2;
21static volatile half3 h3;
22static volatile half4 h4;
23
24static volatile int i1;
25static volatile int2 i2;
26static volatile int3 i3;
27static volatile int4 i4;
28
29#define TEST_HN_FUNC_HN(fn) \
30    h1 = fn(h1);            \
31    h2 = fn(h2);            \
32    h3 = fn(h3);            \
33    h4 = fn(h4);
34
35#define TEST_IN_FUNC_HN(fn) \
36    i1 = fn(h1);            \
37    i2 = fn(h2);            \
38    i3 = fn(h3);            \
39    i4 = fn(h4);
40
41#define TEST_HN_FUNC_HN_HN(fn)  \
42    h1 = fn(h1, h1);            \
43    h2 = fn(h2, h2);            \
44    h3 = fn(h3, h3);            \
45    h4 = fn(h4, h4);
46
47#define TEST_HN_FUNC_HN_IN(fn)  \
48    h1 = fn(h1, i1);            \
49    h2 = fn(h2, i2);            \
50    h3 = fn(h3, i3);            \
51    h4 = fn(h4, i4);
52
53#define TEST_HN_FUNC_HN_PIN(fn) \
54    h1 = fn(h1, (int *) &i1);   \
55    h2 = fn(h2, (int2 *) &i2);  \
56    h3 = fn(h3, (int3 *) &i3);  \
57    h4 = fn(h4, (int4 *) &i4);
58
59#define TEST_HN_FUNC_HN_I(fn)  \
60    h1 = fn(h1, i1);           \
61    h2 = fn(h2, i1);           \
62    h3 = fn(h3, i1);           \
63    h4 = fn(h4, i1);
64
65#define TEST_HN_FUNC_HN_H(fn)  \
66    h1 = fn(h1, h1);           \
67    h2 = fn(h2, h1);           \
68    h3 = fn(h3, h1);           \
69    h4 = fn(h4, h1);
70
71#define TEST_HN_FUNC_H_HN(fn)  \
72    h1 = fn(h1, h1);           \
73    h2 = fn(h1, h2);           \
74    h3 = fn(h1, h3);           \
75    h4 = fn(h1, h4);           \
76
77#define TEST_HN_FUNC_HN_PHN(fn) \
78    h1 = fn(h1, (half *) &h1);  \
79    h2 = fn(h2, (half2 *) &h2); \
80    h3 = fn(h3, (half3 *) &h3); \
81    h4 = fn(h4, (half4 *) &h4); \
82
83#define TEST_HN_FUNC_HN_HN_HN(fn)   \
84    h1 = fn(h1, h1, h1);            \
85    h2 = fn(h2, h2, h2);            \
86    h3 = fn(h3, h3, h3);            \
87    h4 = fn(h4, h4, h4);
88
89#define TEST_HN_FUNC_HN_HN_H(fn)   \
90    h1 = fn(h1, h1, h1);           \
91    h2 = fn(h2, h2, h1);           \
92    h3 = fn(h3, h3, h1);           \
93    h4 = fn(h4, h4, h1);
94
95#define TEST_HN_FUNC_HN_HN_PIN(fn) \
96    h1 = fn(h1, h1, (int *) &i1);  \
97    h2 = fn(h2, h2, (int2 *) &i2); \
98    h3 = fn(h3, h3, (int3 *) &i3); \
99    h4 = fn(h4, h4, (int4 *) &i4);
100
101#define TEST_H_FUNC_HN(fn)  \
102    h1 = fn(h1);            \
103    h1 = fn(h2);            \
104    h1 = fn(h3);            \
105    h1 = fn(h4);
106
107#define TEST_H_FUNC_HN_HN(fn) \
108    h1 = fn(h1, h1);          \
109    h1 = fn(h2, h2);          \
110    h1 = fn(h3, h3);          \
111    h1 = fn(h4, h4);
112
113static bool testAPI() {
114    TEST_HN_FUNC_HN(acos);
115    TEST_HN_FUNC_HN(acosh);
116    TEST_HN_FUNC_HN(acospi);
117
118    TEST_HN_FUNC_HN(asin);
119    TEST_HN_FUNC_HN(asinh);
120    TEST_HN_FUNC_HN(asinpi);
121
122    TEST_HN_FUNC_HN(atan);
123    TEST_HN_FUNC_HN_HN(atan2);
124    TEST_HN_FUNC_HN_HN(atan2pi);
125    TEST_HN_FUNC_HN(atanh);
126    TEST_HN_FUNC_HN(atanpi);
127
128    TEST_HN_FUNC_HN(cbrt);
129    TEST_HN_FUNC_HN(ceil);
130    TEST_HN_FUNC_HN_HN(copysign);
131
132    TEST_HN_FUNC_HN(cos);
133    TEST_HN_FUNC_HN(cosh);
134    TEST_HN_FUNC_HN(cospi);
135
136    TEST_HN_FUNC_HN(degrees);
137    TEST_HN_FUNC_HN(erf);
138    TEST_HN_FUNC_HN(erfc);
139    TEST_HN_FUNC_HN(exp);
140    TEST_HN_FUNC_HN(exp10);
141    TEST_HN_FUNC_HN(exp2);
142    TEST_HN_FUNC_HN(expm1);
143
144    TEST_HN_FUNC_HN(fabs);
145    TEST_HN_FUNC_HN_HN(fdim);
146    TEST_HN_FUNC_HN(floor);
147    TEST_HN_FUNC_HN_HN_HN(fma);
148
149    TEST_HN_FUNC_HN_HN(fmax);
150    TEST_HN_FUNC_HN_H(fmax);
151    TEST_HN_FUNC_HN_HN(fmin);
152    TEST_HN_FUNC_HN_H(fmin);
153    TEST_HN_FUNC_HN_HN(fmod);
154
155    TEST_HN_FUNC_HN(fract);
156    TEST_HN_FUNC_HN_PHN(fract);
157    TEST_HN_FUNC_HN_PIN(frexp);
158
159    TEST_HN_FUNC_HN_HN(hypot);
160    TEST_IN_FUNC_HN(ilogb);
161    TEST_HN_FUNC_HN_IN(ldexp);
162    TEST_HN_FUNC_HN_I(ldexp);
163    TEST_HN_FUNC_HN(lgamma);
164    TEST_HN_FUNC_HN_PIN(lgamma);
165
166    TEST_HN_FUNC_HN(log);
167    TEST_HN_FUNC_HN(log10);
168    TEST_HN_FUNC_HN(log1p);
169    TEST_HN_FUNC_HN(log2);
170    TEST_HN_FUNC_HN(logb);
171
172    TEST_HN_FUNC_HN_HN_HN(mad);
173    TEST_HN_FUNC_HN_HN(max);
174    TEST_HN_FUNC_HN_H(max);
175    TEST_HN_FUNC_HN_HN(min);
176    TEST_HN_FUNC_HN_H(min);
177    TEST_HN_FUNC_HN_HN_HN(mix);
178    TEST_HN_FUNC_HN_HN_H(mix);
179    TEST_HN_FUNC_HN_PHN(modf);
180
181    h1 = nan_half();
182
183    TEST_HN_FUNC_HN(native_acos);
184    TEST_HN_FUNC_HN(native_acosh);
185    TEST_HN_FUNC_HN(native_acospi);
186
187    TEST_HN_FUNC_HN(native_asin);
188    TEST_HN_FUNC_HN(native_asinh);
189    TEST_HN_FUNC_HN(native_asinpi);
190
191    TEST_HN_FUNC_HN(native_atan);
192    TEST_HN_FUNC_HN_HN(native_atan2);
193    TEST_HN_FUNC_HN_HN(native_atan2pi);
194    TEST_HN_FUNC_HN(native_atanh);
195    TEST_HN_FUNC_HN(native_atanpi);
196
197    TEST_HN_FUNC_HN(native_cbrt);
198    TEST_HN_FUNC_HN(native_cos);
199    TEST_HN_FUNC_HN(native_cosh);
200    TEST_HN_FUNC_HN(native_cospi);
201
202    TEST_HN_FUNC_HN_HN(native_divide);
203    TEST_HN_FUNC_HN(native_exp);
204    TEST_HN_FUNC_HN(native_exp10);
205    TEST_HN_FUNC_HN(native_exp2);
206    TEST_HN_FUNC_HN(native_expm1);
207
208    TEST_HN_FUNC_HN_HN(native_hypot);
209    TEST_H_FUNC_HN(native_length);
210    TEST_HN_FUNC_HN(native_log);
211    TEST_HN_FUNC_HN(native_log10);
212    TEST_HN_FUNC_HN(native_log1p);
213    TEST_HN_FUNC_HN(native_log2);
214
215    TEST_HN_FUNC_HN_HN(native_powr);
216    TEST_HN_FUNC_HN(native_recip);
217    TEST_HN_FUNC_HN_IN(native_rootn);
218    TEST_HN_FUNC_HN(native_rsqrt);
219
220    TEST_HN_FUNC_HN(native_sin);
221    TEST_HN_FUNC_HN_PHN(native_sincos);
222    TEST_HN_FUNC_HN(native_sinh);
223    TEST_HN_FUNC_HN(native_sinpi);
224
225    TEST_HN_FUNC_HN(native_tan);
226    TEST_HN_FUNC_HN(native_tanh);
227    TEST_HN_FUNC_HN(native_tanpi);
228
229    TEST_HN_FUNC_HN_HN(nextafter);
230    TEST_HN_FUNC_HN_HN(pow);
231    TEST_HN_FUNC_HN_IN(pown);
232    TEST_HN_FUNC_HN_HN(powr);
233
234    TEST_HN_FUNC_HN(radians);
235    TEST_HN_FUNC_HN_HN(remainder);
236    TEST_HN_FUNC_HN_HN_PIN(remquo);
237    TEST_HN_FUNC_HN(rint);
238    TEST_HN_FUNC_HN_IN(rootn);
239    TEST_HN_FUNC_HN(round);
240    TEST_HN_FUNC_HN(rsqrt);
241
242    TEST_HN_FUNC_HN(sign);
243    TEST_HN_FUNC_HN(sin);
244    TEST_HN_FUNC_HN_PHN(sincos);
245    TEST_HN_FUNC_HN(sinh);
246    TEST_HN_FUNC_HN(sinpi);
247    TEST_HN_FUNC_HN(sqrt);
248
249    TEST_HN_FUNC_HN_HN(step);
250    TEST_HN_FUNC_HN_H(step);
251    TEST_HN_FUNC_H_HN(step);
252
253    TEST_HN_FUNC_HN(tan);
254    TEST_HN_FUNC_HN(tanh);
255    TEST_HN_FUNC_HN(tanpi);
256
257    TEST_HN_FUNC_HN(tgamma);
258    TEST_HN_FUNC_HN(trunc);
259
260    // Vector math functions
261    h3 = cross(h3, h3);
262    h4 = cross(h4, h4);
263
264    TEST_H_FUNC_HN_HN(distance);
265    TEST_H_FUNC_HN_HN(dot);
266    TEST_H_FUNC_HN(length);
267    TEST_H_FUNC_HN_HN(native_distance);
268    TEST_H_FUNC_HN(native_length);
269    TEST_HN_FUNC_HN(native_normalize);
270    TEST_HN_FUNC_HN(normalize);
271    return true;
272}
273
274typedef union {
275  half hval;
276  short sval;
277} fp16_shape_type;
278
279/* half h = unsigned short s; */
280#define SET_HALF_WORD(h, s) \
281do {                        \
282  fp16_shape_type fp16_u;   \
283  fp16_u.sval = (s);        \
284  (h) = fp16_u.hval;        \
285} while (0)
286
287#define VALIDATE_FREXP_HALF(inp, ref, refExp)  \
288do {                                           \
289    int exp;                                   \
290    half out = frexp(((half) inp), &exp);      \
291    _RS_ASSERT_EQU(out, ((half) ref));         \
292    _RS_ASSERT_EQU(exp, (refExp));             \
293} while (0);
294
295static bool testFrexp() {
296    bool failed= false;
297
298    VALIDATE_FREXP_HALF(0, 0, 0);
299    VALIDATE_FREXP_HALF(-0, -0, 0);
300    VALIDATE_FREXP_HALF(1, 0.5, 1);
301    VALIDATE_FREXP_HALF(0.25, 0.5, -1);
302    VALIDATE_FREXP_HALF(1.5, 0.75, 1);
303    VALIDATE_FREXP_HALF(1.99, 0.995, 1);
304
305    return !failed;
306}
307
308// Place sentinel values around the *intPart paramter to modf to ensure that
309// the call writes to just the 2 bytes pointed-to by the paramter.
310#define VALIDATE_MODF_HALF(inp, ref, refIntPart)     \
311do {                                                 \
312    half intPart[3];                                 \
313    intPart[0] = (half) 42.0f;                       \
314    intPart[2] = (half) 3.14f;                       \
315    half out = modf(((half) inp), &intPart[1]);      \
316    _RS_ASSERT_EQU(out, ((half) ref));               \
317    _RS_ASSERT_EQU(intPart[1], ((half) refIntPart)); \
318    _RS_ASSERT_EQU(intPart[0], (half) 42.0f);        \
319    _RS_ASSERT_EQU(intPart[2], (half) 3.14f);        \
320} while (0);
321
322static bool testModf() {
323    bool failed = false;
324
325    VALIDATE_MODF_HALF(0.5, 0.5, 0.0);
326    VALIDATE_MODF_HALF(1.5, 0.5, 1.0);
327    VALIDATE_MODF_HALF(100.5625, 0.5625, 100.0);
328
329    VALIDATE_MODF_HALF(-0.5, -0.5, -0.0);
330    VALIDATE_MODF_HALF(-1.5, -0.5, -1.0);
331    VALIDATE_MODF_HALF(-100.5625, -0.5625, -100.0);
332
333    return !failed;
334}
335
336static bool testNextAfter() {
337    half zero, minSubNormal, maxSubNormal, minNormal, infinity;
338    half negativeZero, negativeInfinity;
339    half negativeMinSubNormal, negativeMaxSubNormal, negativeMinNormal;
340
341    // TODO Define these constants so the SET_HALF_WORD macro is unnecessary.
342    SET_HALF_WORD(zero, 0x0000);
343    SET_HALF_WORD(minSubNormal, 0x0001);
344    SET_HALF_WORD(maxSubNormal, 0x03ff);
345    SET_HALF_WORD(minNormal, 0x0400);
346    SET_HALF_WORD(infinity, 0x7c00);
347
348    SET_HALF_WORD(negativeZero, 0x7000);
349    SET_HALF_WORD(negativeMinSubNormal, 0x8001);
350    SET_HALF_WORD(negativeMaxSubNormal, 0x83ff);
351    SET_HALF_WORD(negativeMinNormal, 0x8400);
352    SET_HALF_WORD(negativeInfinity, 0xfc00);
353
354    // Number of normal fp16 values:
355    //   All-zero exponent is for zero and subnormals.  All-one exponent is for
356    //   Infinity and NaN.  Hence number of possible values for exponent = 30
357    //
358    //   No. of possible values for mantissa = 2 ^ 10 = 1024
359    //
360    //   Number of positive, non-zero and normal fp16 values = 30 * 1024 = 30720
361    //   Number of negative, non-zero and normal fp16 values = 30 * 1024 = 30720
362    //
363    //   The following tests call nextafter in a loop starting at infinity
364    //   towards the smallest normal and vice versa (for +ve and -ve) and verify
365    //   that the number of loop iterations is 30720.
366
367    const unsigned int numDistinctExpected = 30720;
368    const unsigned int maxSteps = 31000;
369
370    unsigned int numDistinct;
371    half h, toward;
372
373    for (h = minNormal, toward = infinity, numDistinct = 0;
374            numDistinct < maxSteps && h != toward; numDistinct ++) {
375        h = nextafter(h, toward);
376    }
377    if (numDistinct != numDistinctExpected)
378        return false;
379
380    for (h = infinity, toward = minNormal, numDistinct = 0;
381            numDistinct < maxSteps && h != toward; numDistinct ++) {
382        h = nextafter(h, toward);
383    }
384    if (numDistinct != numDistinctExpected)
385        return false;
386
387    for (h = negativeMinNormal, toward = negativeInfinity, numDistinct = 0;
388            numDistinct < maxSteps && h != toward; numDistinct ++) {
389        h = nextafter(h, toward);
390    }
391    if (numDistinct != numDistinctExpected)
392        return false;
393
394    for (h = negativeInfinity, toward = negativeMinNormal, numDistinct = 0;
395            numDistinct < maxSteps && h != toward; numDistinct ++) {
396        h = nextafter(h, toward);
397    }
398    if (numDistinct != numDistinctExpected)
399        return false;
400
401    // Test nextafter at the boundary of subnormal numbers.  Since RenderScript
402    // doesn't require implementations to handle FP16 subnormals correctly,
403    // allow nextafter to return a valid normal number that satisfies the
404    // constraints of nextafter.
405
406    // nextafter(0, infinity) = minnormal or minsubnormal
407    h = nextafter(zero, infinity);
408    if (h != minSubNormal && h != minNormal)
409        return false;
410    h = nextafter(zero, negativeInfinity);
411    if (h != negativeMinSubNormal && h != negativeMinNormal)
412        return false;
413
414    // nextafter(minNormal, negativeInfinity) = maxSubNormal or zero
415    h = nextafter(minNormal, negativeInfinity);
416    if (h != maxSubNormal && h != zero)
417        return false;
418    h = nextafter(negativeMinNormal, infinity);
419    if (h != negativeMaxSubNormal && h != negativeZero)
420        return false;
421
422    return true;
423}
424
425static bool testIlogb() {
426    bool failed = false;
427
428    // Test ilogb for 0, +/- infininty and NaN
429    half infinity, negativeInfinity;
430    SET_HALF_WORD(infinity, 0x7c00);
431    SET_HALF_WORD(negativeInfinity, 0xfc00);
432
433    _RS_ASSERT_EQU(ilogb((half) 0), 0x80000000);
434    _RS_ASSERT_EQU(ilogb((half) -0), 0x80000000);
435    _RS_ASSERT_EQU(ilogb(infinity), 0x7fffffff);
436    _RS_ASSERT_EQU(ilogb(negativeInfinity), 0x7fffffff);
437    _RS_ASSERT_EQU(ilogb(nan_half()), 0x7fffffff);
438
439    // ilogb(2^n) = n.  Test at the boundary on either side of 2^n.
440    // Don't test subnormal numbers as implementations are not expected to
441    // handle them.
442    _RS_ASSERT_EQU(ilogb((half) 0.24), -3);
443    _RS_ASSERT_EQU(ilogb((half) 0.26), -2);
444    _RS_ASSERT_EQU(ilogb((half) 0.49), -2);
445    _RS_ASSERT_EQU(ilogb((half) 0.51), -1);
446    _RS_ASSERT_EQU(ilogb((half) 0.99), -1);
447    _RS_ASSERT_EQU(ilogb((half) 1.01), 0);
448    _RS_ASSERT_EQU(ilogb((half) 1.99), 0);
449    _RS_ASSERT_EQU(ilogb((half) 2.01), 1);
450    _RS_ASSERT_EQU(ilogb((half) 1023), 9);
451    _RS_ASSERT_EQU(ilogb((half) 1025), 10);
452
453    // Result is same irrespective of sign.
454    _RS_ASSERT_EQU(ilogb((half) -0.24), -3);
455    _RS_ASSERT_EQU(ilogb((half) -0.26), -2);
456    _RS_ASSERT_EQU(ilogb((half) -0.49), -2);
457    _RS_ASSERT_EQU(ilogb((half) -0.51), -1);
458    _RS_ASSERT_EQU(ilogb((half) -0.99), -1);
459    _RS_ASSERT_EQU(ilogb((half) -1.01), 0);
460    _RS_ASSERT_EQU(ilogb((half) -1.99), 0);
461    _RS_ASSERT_EQU(ilogb((half) -2.01), 1);
462    _RS_ASSERT_EQU(ilogb((half) -1023), 9);
463    _RS_ASSERT_EQU(ilogb((half) -1025), 10);
464
465    return !failed;
466}
467
468void testFp16Math() {
469    bool success = true;
470
471    success &= testAPI();
472    success &= testFrexp();
473    success &= testModf();
474    success &= testNextAfter();
475    success &= testIlogb();
476
477    if (success) {
478        rsDebug("PASSED", 0);
479    } else {
480        rsDebug("FAILED", 0);
481    }
482
483    if (success) {
484        rsSendToClientBlocking(RS_MSG_TEST_PASSED);
485    } else {
486        rsSendToClientBlocking(RS_MSG_TEST_FAILED);
487    }
488}
489