Bug 1689998 - Update pixman sources to release 0.40.0. r=jrmuizel

Differential Revision: https://phabricator.services.mozilla.com/D103684
This commit is contained in:
Jonathan Kew 2021-02-02 16:14:40 +00:00
Родитель bbeee51ee1
Коммит 837c97f4a4
65 изменённых файлов: 9338 добавлений и 13367 удалений

Просмотреть файл

@ -0,0 +1,77 @@
/* WARNING: This file is generated by make-blue-noise.c
* Please edit that file instead of this one.
*/
#ifndef BLUE_NOISE_64X64_H
#define BLUE_NOISE_64X64_H
#include <stdint.h>
static const uint16_t dither_blue_noise_64x64[4096] = {
3039, 1368, 3169, 103, 2211, 1248, 2981, 668, 2633, 37, 3963, 2903, 384, 2564, 3115, 1973, 3348, 830, 2505, 1293, 3054, 1060, 1505, 3268, 400, 1341, 593, 3802, 3384, 429, 4082, 1411, 2503, 3863, 126, 1292, 1887, 2855, 205, 2094, 2977, 1899, 3924, 356, 3088, 2500, 3942, 1409, 2293, 1734, 3732, 1291, 3227, 277, 2054, 786, 2871, 411, 2425, 1678, 3986, 455, 2879, 2288,
388, 1972, 3851, 778, 2768, 3697, 944, 2123, 1501, 3533, 937, 1713, 1381, 3888, 156, 1242, 516, 2888, 1607, 3676, 632, 2397, 3804, 2673, 1898, 3534, 2593, 1777, 1170, 2299, 3013, 1838, 523, 3053, 1647, 3601, 3197, 959, 1520, 3633, 893, 2437, 3367, 2187, 1258, 137, 1965, 401, 3546, 643, 3087, 2498, 733, 2786, 3371, 4053, 1266, 1977, 3663, 183, 2570, 2107, 1183, 3708,
907, 2473, 1151, 3363, 1527, 1902, 232, 3903, 3060, 496, 2486, 3206, 2165, 861, 2387, 3653, 2101, 3972, 132, 2162, 3437, 1827, 215, 895, 3114, 271, 969, 2932, 197, 1598, 878, 3696, 1140, 2120, 904, 2431, 302, 3846, 2675, 481, 3187, 66, 1440, 650, 3833, 2826, 3435, 901, 2936, 2111, 250, 1875, 3609, 1174, 1747, 162, 2346, 3420, 913, 3172, 1383, 752, 3298, 1735,
3540, 2938, 249, 2324, 526, 3099, 2561, 1324, 2347, 1861, 1200, 3702, 257, 3442, 1514, 2999, 992, 1766, 2735, 1163, 478, 2943, 1279, 3635, 2177, 1464, 3672, 2386, 3871, 3340, 2690, 64, 3489, 2811, 3999, 633, 1948, 1243, 2269, 1807, 1143, 2750, 3729, 1790, 2363, 1053, 1537, 2636, 4065, 1076, 1476, 3869, 450, 2200, 2676, 658, 2979, 1548, 544, 1913, 2838, 3911, 116, 2698,
517, 1295, 3997, 1739, 3665, 1083, 3509, 599, 3400, 118, 2956, 720, 2689, 1907, 567, 2523, 284, 3397, 711, 3219, 2450, 3985, 1665, 2549, 562, 3011, 1855, 729, 1355, 528, 1908, 2456, 1384, 337, 1540, 2654, 3138, 3513, 703, 4080, 3314, 2047, 855, 3037, 209, 3317, 577, 1828, 17, 2336, 3193, 2748, 962, 3441, 1450, 3246, 1075, 3878, 2615, 3497, 1033, 2310, 1442, 2183,
1654, 3254, 2061, 738, 2832, 148, 2030, 1670, 909, 3850, 2109, 1533, 4046, 1085, 3098, 3897, 1378, 2248, 3829, 1495, 1966, 23, 797, 3427, 1124, 4057, 95, 2787, 2190, 3074, 3950, 742, 3194, 1999, 3386, 1113, 16, 1657, 2804, 201, 1543, 383, 2559, 1325, 3604, 2068, 2493, 3771, 1284, 3460, 710, 1716, 2447, 80, 3811, 2032, 347, 2227, 15, 1689, 397, 3084, 662, 3798,
973, 43, 2608, 3143, 1459, 2423, 4066, 2770, 3191, 1283, 2630, 314, 3235, 2289, 72, 1822, 2840, 924, 350, 2653, 1057, 3715, 2235, 2775, 346, 2083, 1553, 3292, 1081, 274, 1686, 1188, 2327, 3743, 578, 2234, 3916, 2519, 1011, 3056, 2207, 3438, 3890, 537, 1617, 837, 3094, 373, 2795, 1980, 276, 3951, 1353, 3015, 844, 1724, 3651, 2923, 1316, 4092, 2504, 3627, 1936, 2854,
2461, 3929, 1193, 421, 3746, 820, 1180, 286, 2261, 532, 3625, 1812, 802, 1327, 3527, 670, 3730, 2025, 3124, 3565, 529, 2960, 1769, 1390, 3196, 2494, 3756, 796, 3618, 2602, 3463, 2847, 166, 953, 1745, 2900, 438, 2070, 1418, 3741, 639, 1205, 1891, 2882, 2282, 4012, 1182, 1696, 3630, 951, 2904, 2170, 3530, 375, 2320, 2742, 1132, 701, 3216, 2023, 847, 1230, 310, 3431,
770, 1961, 3531, 1702, 2181, 3370, 1877, 3072, 1571, 3389, 1071, 2415, 3782, 2803, 1610, 2454, 1211, 182, 1655, 2322, 1282, 3372, 287, 3935, 704, 1232, 415, 1910, 2286, 1399, 556, 1964, 4068, 2444, 3605, 1272, 3345, 816, 3526, 256, 2402, 2777, 955, 345, 3289, 111, 2727, 635, 2396, 1488, 3331, 600, 1032, 1575, 4026, 515, 3507, 2433, 1605, 460, 3364, 2783, 1810, 1397,
2334, 223, 2945, 688, 2533, 99, 2705, 624, 3944, 2073, 46, 2978, 508, 2132, 269, 3173, 3453, 2631, 4076, 694, 1892, 2586, 972, 2178, 3470, 1695, 2849, 3141, 77, 3884, 994, 3029, 1536, 673, 3083, 124, 2583, 1722, 2821, 1944, 4027, 1661, 3176, 3728, 1337, 1813, 3503, 2035, 3930, 157, 2537, 1865, 3096, 2646, 1941, 3252, 1449, 135, 2836, 3758, 2139, 84, 3678, 3106,
3862, 1545, 3307, 1320, 3955, 1031, 3664, 1306, 2460, 776, 1487, 3294, 1187, 3990, 1903, 1021, 549, 1484, 943, 3027, 97, 3853, 1499, 2880, 198, 2575, 3995, 1089, 1587, 2475, 3282, 339, 2657, 1158, 2105, 1493, 3943, 580, 3232, 1287, 846, 48, 2480, 2112, 771, 2534, 459, 3134, 850, 1298, 3790, 325, 3652, 1249, 193, 940, 2202, 3895, 1829, 911, 1366, 2577, 1069, 534,
2104, 1009, 2667, 392, 1983, 2917, 1645, 324, 3439, 2869, 3705, 1767, 2592, 756, 2916, 3683, 2276, 2850, 2053, 3594, 2403, 3181, 634, 3699, 1933, 906, 519, 2150, 3673, 764, 1770, 2220, 3795, 3336, 502, 3547, 2339, 1110, 301, 2210, 3354, 3643, 569, 1518, 2940, 3973, 1138, 1613, 2773, 2127, 2983, 1671, 769, 2161, 3800, 2730, 3127, 1179, 533, 3259, 2284, 4014, 1651, 2820,
3566, 653, 1839, 3455, 2399, 789, 3149, 2244, 1863, 1099, 474, 2307, 158, 3541, 1312, 1711, 0, 3902, 360, 1629, 1091, 395, 1781, 1191, 2374, 3353, 1419, 3225, 206, 2931, 3553, 1046, 54, 1646, 2470, 910, 1860, 3137, 3770, 2635, 1562, 2809, 1215, 3788, 222, 2199, 3335, 67, 3606, 524, 1001, 3309, 2410, 3473, 591, 1619, 291, 2502, 3629, 2891, 335, 741, 3378, 168,
2384, 3129, 4051, 22, 1444, 3613, 543, 3893, 186, 2665, 4062, 933, 3058, 2142, 449, 2711, 3224, 849, 1330, 3349, 2195, 2670, 3484, 2993, 32, 3774, 2722, 1859, 2548, 1268, 583, 2027, 3165, 2807, 4029, 227, 2897, 1434, 721, 1816, 195, 905, 2066, 3258, 1754, 970, 2674, 1880, 2338, 3915, 1485, 2660, 14, 1313, 2914, 2046, 4074, 791, 1917, 1301, 1725, 2687, 2019, 1443,
418, 1186, 1664, 2859, 1049, 2056, 2741, 1226, 1589, 3186, 2042, 1377, 3449, 1574, 3941, 1063, 1930, 2501, 3751, 2930, 671, 4031, 888, 2081, 1544, 684, 1117, 351, 4052, 1698, 2393, 3881, 1439, 785, 1277, 2013, 3488, 441, 2459, 3980, 3061, 3481, 2543, 419, 3020, 609, 3515, 1350, 799, 2878, 348, 2034, 3966, 1824, 950, 3281, 1394, 2239, 3452, 55, 3922, 3119, 892, 3785,
3023, 2140, 782, 2492, 3817, 241, 3355, 2424, 856, 3639, 612, 2556, 245, 2858, 705, 2316, 3562, 495, 1748, 128, 1912, 1454, 280, 2552, 3905, 3130, 2274, 3472, 834, 3055, 240, 2692, 471, 2272, 3301, 2632, 1080, 3693, 2136, 1029, 1364, 590, 1611, 4067, 1190, 2360, 3827, 261, 3180, 1768, 3471, 1103, 3003, 520, 3674, 151, 2571, 555, 3033, 982, 2353, 504, 1259, 2555,
149, 3889, 3380, 493, 3178, 1681, 663, 1924, 2990, 49, 1792, 3861, 1192, 1987, 3273, 297, 1457, 3043, 1177, 2292, 3249, 2829, 3682, 1154, 1758, 428, 2872, 1993, 1500, 3703, 1129, 3421, 1840, 3754, 163, 659, 1733, 3182, 38, 2875, 1957, 3614, 2237, 78, 1873, 2801, 1513, 2121, 1074, 2516, 667, 3710, 1429, 2430, 2088, 2830, 1072, 3557, 1531, 2733, 1955, 3286, 3590, 1826,
2778, 1068, 1932, 1452, 2279, 1185, 3564, 3952, 1391, 2726, 3313, 2331, 870, 3709, 1674, 2772, 4085, 808, 2596, 3848, 927, 538, 2335, 3334, 773, 3597, 1347, 109, 2663, 608, 2108, 2994, 936, 1524, 2922, 3968, 2422, 1467, 845, 3870, 321, 2704, 1073, 3308, 3680, 823, 430, 3375, 4030, 112, 2171, 2695, 267, 3374, 731, 1627, 3919, 1871, 352, 3839, 1370, 234, 794, 1532,
3245, 647, 3575, 74, 3045, 2766, 285, 2174, 498, 1059, 1551, 385, 3125, 2598, 143, 1128, 2095, 3395, 318, 1590, 3524, 1345, 1969, 242, 2759, 2092, 947, 3926, 3244, 2356, 1658, 6, 3593, 2554, 1172, 1995, 371, 2755, 3417, 2294, 1570, 3164, 748, 2517, 1401, 3111, 2420, 1662, 2910, 1276, 3276, 854, 1804, 4000, 1253, 2987, 229, 2344, 3184, 649, 2196, 2921, 4095, 2389,
1289, 2193, 2579, 4023, 757, 1858, 986, 3199, 2514, 3475, 4021, 2154, 651, 1432, 3468, 2404, 574, 1799, 3105, 2145, 86, 2614, 3218, 1565, 4088, 2481, 3079, 1815, 323, 1212, 3837, 759, 2159, 435, 3223, 784, 3659, 1114, 1888, 550, 1221, 3786, 1803, 499, 2117, 185, 3763, 942, 589, 2001, 3838, 1483, 3154, 2256, 468, 2544, 3403, 898, 1208, 2610, 3622, 967, 1929, 378,
3781, 220, 1656, 1115, 3347, 2428, 3822, 1577, 712, 1959, 110, 2765, 1762, 3854, 979, 2928, 3714, 1371, 746, 3969, 2884, 975, 3779, 641, 1142, 159, 1460, 702, 3485, 2866, 2495, 3330, 1305, 3937, 1635, 2229, 2962, 146, 4055, 3091, 2417, 100, 3508, 2933, 4006, 1167, 1920, 2760, 3552, 2545, 433, 2845, 142, 1056, 1886, 3616, 1435, 2099, 3803, 1749, 27, 1446, 3350, 2843,
884, 3310, 2948, 2103, 447, 1351, 187, 2895, 3655, 1256, 3036, 932, 3325, 2257, 451, 1915, 40, 2780, 2438, 1112, 1814, 423, 2290, 1905, 2898, 3419, 2306, 3760, 1938, 486, 1019, 1791, 3010, 2628, 203, 3408, 1269, 2507, 1606, 862, 2779, 2078, 952, 1529, 2638, 708, 3332, 1413, 2, 1726, 1156, 3500, 2392, 3791, 3076, 812, 107, 2861, 501, 3050, 3487, 2455, 594, 1731,
2685, 1498, 680, 3908, 2621, 3529, 1786, 2236, 342, 2569, 1526, 3722, 230, 1290, 3203, 3947, 1609, 3516, 467, 3267, 3685, 1461, 3140, 3569, 367, 1759, 928, 2754, 1332, 2219, 4034, 260, 655, 1984, 978, 3814, 617, 2086, 3525, 279, 3841, 1373, 3361, 319, 2251, 3066, 407, 2382, 3918, 3133, 2168, 762, 1523, 507, 2641, 1677, 4025, 2413, 1584, 793, 2049, 1109, 3962, 2218,
1194, 3692, 266, 1687, 981, 3103, 740, 3983, 1005, 3434, 570, 2383, 1942, 2718, 676, 2462, 1007, 2089, 1308, 2222, 233, 2568, 829, 1241, 2669, 3987, 514, 3303, 69, 3142, 1603, 3560, 2295, 3288, 1497, 2696, 1764, 2865, 1058, 3271, 1914, 477, 2529, 3927, 1736, 1273, 3752, 2029, 1012, 565, 2798, 4078, 1949, 3305, 1175, 2179, 380, 3366, 1195, 3849, 2637, 416, 2959, 125,
3396, 2467, 2036, 3234, 2340, 68, 2819, 1436, 2011, 3139, 1704, 4073, 860, 3582, 1468, 2969, 211, 3157, 4056, 866, 2935, 2000, 3923, 31, 2157, 1477, 2429, 1147, 3792, 2557, 774, 2802, 1153, 3747, 464, 3192, 42, 3904, 539, 1474, 2283, 803, 2876, 1061, 75, 3477, 747, 2893, 1538, 3626, 251, 1322, 2506, 189, 2791, 3667, 939, 2991, 1971, 175, 3195, 1416, 3648, 1857,
3052, 454, 851, 3789, 1271, 1906, 3694, 2484, 406, 2757, 26, 1189, 2909, 296, 2215, 3784, 1864, 637, 2715, 1673, 3445, 581, 1572, 3059, 3469, 761, 2984, 1737, 2058, 440, 1414, 1921, 121, 2527, 894, 2223, 1302, 2377, 3077, 2666, 3759, 3198, 1811, 3661, 2166, 2731, 1883, 359, 3285, 2458, 1805, 3459, 926, 3834, 675, 1893, 1496, 2612, 657, 3523, 1763, 2354, 564, 961,
1367, 3977, 1588, 2714, 322, 3446, 1088, 625, 3887, 1354, 3535, 2090, 3316, 1760, 1127, 483, 3491, 1421, 2301, 94, 1202, 3740, 2311, 1014, 1878, 3836, 180, 3412, 991, 2868, 3953, 3450, 3081, 1632, 4071, 1882, 3543, 726, 1719, 179, 1171, 364, 1420, 622, 3090, 1490, 946, 4007, 2212, 1102, 619, 2739, 2189, 1669, 2937, 3426, 39, 3940, 2191, 1264, 887, 4091, 2792, 2135,
4, 2883, 2281, 631, 3044, 1641, 2232, 3243, 1773, 2319, 827, 2591, 629, 3938, 2426, 3222, 2629, 1044, 3879, 3293, 1952, 2749, 275, 2590, 472, 1372, 2496, 660, 3669, 2264, 208, 915, 2167, 561, 2828, 307, 3265, 1104, 3964, 2155, 3425, 1951, 4077, 2391, 283, 3387, 2581, 115, 1415, 3069, 3896, 141, 3158, 1214, 442, 2405, 1349, 3085, 425, 2528, 3002, 312, 1602, 3588,
1137, 3323, 1963, 1002, 3578, 2521, 127, 925, 2970, 273, 3737, 1573, 167, 2863, 1509, 800, 147, 2059, 2942, 409, 921, 3151, 1451, 3909, 3333, 2844, 2096, 1512, 3136, 1210, 1798, 2709, 1331, 3586, 1034, 1521, 2441, 2926, 488, 2585, 775, 3031, 2693, 879, 3602, 1173, 2028, 3654, 2781, 841, 1975, 1507, 3646, 768, 3991, 2012, 996, 3544, 1666, 3810, 1990, 3360, 753, 2597,
3736, 304, 1473, 3828, 485, 1334, 4008, 2072, 3495, 1136, 2806, 2004, 3236, 1010, 2130, 3819, 1750, 3567, 644, 2515, 1794, 3636, 698, 2137, 1162, 832, 3761, 326, 2613, 513, 3302, 3820, 357, 3163, 2259, 3733, 101, 1922, 1386, 3587, 1640, 28, 1286, 2141, 1761, 2918, 693, 1639, 457, 3250, 2434, 365, 2599, 1729, 3284, 2643, 306, 2793, 689, 1090, 104, 1309, 2305, 1831,
2776, 859, 2446, 2915, 1778, 3337, 2677, 614, 1508, 2409, 469, 4033, 1321, 3563, 402, 3131, 2720, 1093, 1569, 4042, 1229, 2277, 216, 3046, 1817, 57, 3006, 1684, 4059, 2016, 795, 2440, 1652, 1960, 610, 2763, 920, 3864, 3110, 1026, 2326, 3762, 3233, 521, 3856, 173, 2457, 3939, 2138, 1262, 3572, 989, 3021, 2238, 119, 1445, 3832, 1809, 2297, 3467, 2700, 3684, 3102, 394,
4036, 2050, 3256, 89, 2198, 1079, 248, 1845, 3805, 3104, 880, 1779, 2688, 717, 2373, 1375, 262, 2249, 3071, 13, 2813, 3429, 1600, 3984, 2416, 3603, 1299, 2298, 998, 3492, 1393, 2951, 10, 4009, 1247, 3462, 1679, 2204, 414, 2736, 316, 1894, 2816, 1050, 3373, 1462, 3107, 817, 3464, 21, 1835, 4070, 568, 1178, 3718, 875, 3168, 466, 2974, 1458, 2084, 616, 1564, 1018,
1693, 546, 1244, 3899, 716, 3160, 3608, 2877, 1220, 334, 3443, 2270, 44, 3000, 1843, 3928, 3405, 766, 3686, 2040, 587, 993, 2647, 387, 930, 2753, 630, 3274, 150, 2808, 453, 3638, 1092, 2352, 3030, 239, 2562, 700, 3240, 1257, 4016, 730, 1515, 2203, 2551, 417, 1866, 1123, 2348, 2902, 1550, 2678, 2075, 3238, 1630, 2531, 2115, 1255, 4054, 840, 290, 3874, 2477, 3399,
2250, 3577, 2817, 1626, 2576, 1356, 2315, 792, 2087, 2618, 1612, 3855, 1263, 3637, 1036, 494, 1535, 2553, 1198, 1715, 3867, 3170, 1359, 1954, 3483, 1539, 2069, 3886, 1772, 2487, 1534, 2045, 3242, 806, 1578, 2018, 3948, 1423, 3596, 2076, 2466, 3424, 139, 3688, 871, 4049, 2852, 3342, 547, 3719, 327, 852, 3505, 207, 2794, 542, 3600, 45, 2411, 3324, 1788, 3012, 1235, 61,
2655, 917, 253, 1986, 3738, 313, 1706, 4072, 120, 3229, 957, 597, 2024, 3262, 2453, 2857, 2002, 3190, 210, 2784, 2206, 300, 2400, 3766, 553, 3152, 218, 1150, 2988, 883, 3753, 627, 2664, 3831, 437, 3385, 1008, 2957, 60, 1636, 891, 2899, 1776, 3062, 1315, 2026, 194, 1643, 2079, 1296, 3201, 2465, 1379, 1927, 3898, 1125, 1847, 2846, 1552, 1028, 2725, 2169, 787, 3202,
1441, 3982, 3032, 1052, 3251, 605, 2639, 3073, 1431, 3642, 2329, 2949, 341, 1634, 833, 129, 4020, 916, 3571, 669, 1506, 3411, 821, 2856, 1207, 2337, 2683, 3448, 340, 2214, 3128, 235, 1738, 1288, 2833, 2419, 606, 1884, 2668, 552, 3765, 1176, 399, 2302, 596, 3591, 2634, 767, 3845, 2767, 995, 3967, 491, 3057, 814, 2300, 3422, 691, 3797, 254, 3645, 509, 3478, 1836,
2119, 475, 2445, 1525, 2175, 3539, 914, 1926, 473, 1157, 1800, 3971, 2701, 3739, 2129, 3486, 1333, 1784, 2366, 2982, 1070, 4089, 1802, 73, 1642, 3958, 835, 1837, 1480, 4043, 1217, 2469, 3416, 2113, 88, 3668, 1240, 3255, 3920, 2355, 3167, 2003, 2645, 3936, 3228, 1592, 1144, 3474, 2394, 79, 1820, 2241, 1594, 3656, 2584, 153, 1448, 3034, 2005, 2511, 1692, 1335, 3913, 217,
2822, 3391, 745, 3813, 192, 1274, 2941, 3847, 2489, 3440, 744, 161, 1422, 1086, 572, 3004, 2617, 338, 3807, 2031, 236, 2472, 3065, 2098, 3358, 362, 2163, 3574, 497, 2788, 1970, 948, 3885, 685, 3100, 1712, 2228, 292, 1408, 1016, 164, 3537, 1417, 941, 34, 2172, 3001, 358, 1491, 3147, 699, 3356, 258, 1149, 2946, 1787, 3931, 382, 1146, 3291, 818, 2890, 2379, 1096,
3679, 1328, 1901, 3162, 2747, 1730, 2253, 5, 1556, 2818, 2093, 3166, 2522, 3410, 2287, 1701, 956, 3237, 620, 1596, 3300, 1307, 511, 3701, 1020, 2939, 1362, 2532, 3208, 749, 3641, 160, 1522, 2624, 1095, 4086, 826, 2841, 3583, 2173, 1727, 723, 2925, 1911, 2482, 3726, 863, 1962, 4028, 1111, 2835, 3773, 2449, 2022, 582, 3278, 923, 2619, 2152, 4039, 92, 1934, 3145, 677,
2530, 53, 2303, 1003, 458, 3989, 739, 3321, 1064, 369, 3556, 877, 1900, 426, 3876, 1, 3617, 2106, 1197, 2805, 3634, 857, 2706, 1504, 2418, 682, 3868, 20, 1139, 1688, 2333, 3311, 2907, 1945, 265, 2385, 3433, 1601, 636, 2620, 3095, 4044, 386, 3382, 1184, 527, 2814, 3414, 2342, 465, 1889, 1343, 874, 3479, 1502, 2233, 3689, 1385, 559, 2745, 1463, 3465, 376, 1718,
3217, 4045, 1580, 3612, 2525, 1228, 3018, 1958, 3725, 2358, 1361, 3996, 1581, 3063, 1224, 2737, 1475, 2442, 3946, 191, 1796, 2128, 3975, 134, 1916, 3318, 1597, 2071, 3749, 2672, 403, 1278, 602, 3745, 3220, 1374, 445, 2064, 3830, 243, 1252, 2390, 1563, 2724, 3875, 1818, 1346, 165, 1650, 3264, 2680, 117, 2998, 4081, 343, 2799, 9, 3122, 1743, 3724, 1040, 2231, 3842, 1209,
900, 398, 2851, 697, 1797, 3482, 293, 2679, 1649, 566, 2954, 91, 2697, 714, 2060, 3211, 781, 480, 3040, 1038, 2611, 666, 2989, 3458, 1201, 2796, 548, 2975, 839, 3121, 1850, 4001, 2208, 1631, 790, 2558, 2972, 1148, 3213, 1849, 3624, 971, 2102, 108, 772, 3101, 2589, 3777, 1042, 656, 3907, 2097, 1615, 2540, 805, 1935, 1231, 3494, 2451, 268, 2995, 750, 2682, 2020,
3024, 1392, 2124, 3279, 106, 2217, 1387, 822, 3214, 3825, 2160, 1000, 2395, 3691, 228, 4038, 1872, 3413, 1608, 2225, 3536, 303, 1653, 886, 2541, 224, 4037, 2252, 1428, 172, 3504, 958, 2848, 113, 3628, 1834, 3979, 19, 2317, 779, 2797, 518, 3174, 3549, 1482, 2266, 444, 2014, 3555, 2439, 1213, 3113, 535, 1135, 3204, 3858, 2309, 931, 623, 2009, 3359, 1566, 140, 3550,
1808, 3872, 2488, 1152, 3764, 2892, 3960, 2412, 353, 1223, 1825, 3444, 3116, 1717, 1082, 2313, 1280, 2661, 82, 3852, 1389, 3200, 2330, 3812, 2038, 3581, 1728, 1039, 3339, 2427, 586, 2580, 1238, 3328, 2280, 1047, 595, 2662, 1363, 3338, 1620, 3934, 2497, 1881, 1054, 3954, 3215, 864, 2887, 1801, 320, 3519, 2378, 3704, 1753, 424, 2958, 1660, 4005, 2601, 1116, 3912, 2381, 573,
2740, 200, 828, 1667, 432, 1931, 1035, 1616, 3598, 2640, 728, 264, 1437, 557, 3501, 2966, 372, 3734, 974, 1978, 758, 2719, 1145, 452, 1433, 725, 2681, 408, 3843, 1918, 1547, 3906, 1996, 503, 1456, 3019, 3493, 1700, 3742, 355, 2134, 176, 1311, 615, 2867, 315, 1680, 1314, 8, 3297, 1494, 783, 1950, 83, 2656, 1382, 3561, 138, 2834, 1404, 330, 1904, 3156, 1027,
1357, 3381, 3041, 3666, 2729, 734, 3415, 177, 3051, 2021, 4079, 2823, 3775, 2186, 2616, 869, 1668, 3148, 2367, 3315, 393, 4075, 1870, 2920, 3343, 2362, 3188, 1303, 2782, 825, 3171, 259, 2905, 3717, 2538, 184, 2074, 838, 2860, 2407, 1024, 3496, 3008, 3706, 1985, 2349, 3623, 2582, 4058, 2184, 2694, 3873, 2964, 990, 3346, 690, 2033, 1066, 2201, 3490, 2971, 718, 3700, 2188,
4061, 391, 1989, 2325, 1430, 3150, 2125, 2526, 592, 1403, 976, 2351, 1165, 1851, 114, 3921, 2063, 613, 1358, 2785, 1623, 2254, 25, 3542, 1045, 246, 1852, 3554, 87, 2243, 3615, 1169, 727, 1705, 968, 3957, 3185, 1251, 500, 4063, 1751, 2622, 842, 1519, 90, 3393, 819, 490, 1874, 999, 571, 1275, 2271, 1586, 4040, 2448, 3126, 3731, 436, 885, 1708, 2421, 24, 1599,
889, 2563, 1199, 645, 70, 4013, 1237, 3723, 1694, 3499, 3, 3266, 484, 2997, 3390, 1233, 2842, 3687, 152, 3480, 1084, 3698, 881, 2490, 1542, 3992, 2209, 692, 1690, 3022, 1470, 2625, 2114, 3512, 2359, 381, 2684, 1897, 3368, 1395, 3080, 289, 2065, 3981, 2758, 1141, 3097, 1472, 2870, 3352, 3707, 225, 3159, 505, 1895, 214, 1222, 1774, 2686, 3978, 3275, 1196, 3518, 2825,
3270, 1720, 3796, 3466, 2650, 1841, 298, 899, 2862, 2091, 2671, 1744, 3735, 801, 1560, 349, 2262, 903, 1833, 2524, 512, 3117, 1793, 2827, 476, 3038, 1216, 2550, 3826, 980, 431, 4048, 35, 2992, 1265, 1595, 765, 3675, 76, 2247, 696, 3456, 1254, 2452, 664, 1757, 2133, 3750, 145, 2332, 1554, 1981, 3580, 2712, 868, 3640, 2919, 638, 2275, 1427, 309, 2595, 2006, 492,
2226, 178, 2911, 836, 1528, 3028, 2240, 3327, 404, 3970, 707, 1294, 2464, 2131, 4032, 2600, 3319, 1406, 2913, 3974, 2156, 1425, 221, 3877, 2017, 811, 3662, 272, 3287, 1988, 2408, 3357, 1746, 598, 3239, 3823, 2182, 2934, 1078, 2604, 3840, 1697, 2906, 413, 3210, 3880, 331, 2644, 1260, 848, 3042, 2535, 1077, 1438, 3261, 2365, 1561, 3799, 85, 3082, 1876, 674, 3932, 1101,
3644, 1344, 1943, 2401, 390, 3835, 1048, 2572, 1541, 1133, 3075, 3584, 308, 2889, 1065, 1869, 601, 3783, 282, 1181, 736, 3312, 2368, 1126, 3383, 1675, 2734, 1426, 628, 2873, 1317, 843, 2717, 2048, 1004, 2536, 333, 1782, 3295, 1517, 219, 2153, 815, 3502, 1579, 2268, 987, 3409, 1780, 4018, 354, 665, 3914, 47, 1956, 456, 1006, 2010, 3406, 1130, 3621, 2894, 1549, 3092,
2485, 640, 3993, 3179, 1270, 3436, 585, 1925, 3757, 2304, 136, 1976, 1486, 646, 3520, 50, 3155, 1637, 2435, 3522, 1937, 2756, 3748, 661, 2224, 58, 3230, 2357, 1830, 3892, 170, 3607, 1447, 3949, 190, 3392, 1336, 584, 4010, 918, 3016, 3670, 1155, 2406, 52, 1304, 3009, 607, 2085, 2699, 3205, 1848, 2291, 3402, 2764, 3865, 3048, 2508, 735, 2710, 443, 2341, 897, 263,
1785, 2769, 983, 56, 2197, 1685, 2703, 202, 2944, 810, 3377, 2626, 3787, 3047, 2055, 1236, 2752, 2122, 945, 3093, 96, 1624, 439, 3014, 1388, 4015, 977, 448, 3506, 1098, 2242, 3026, 506, 2361, 2952, 1862, 3619, 2790, 1992, 2483, 525, 1868, 2652, 4093, 1998, 3595, 2478, 3816, 122, 1412, 929, 3716, 1166, 1648, 813, 1300, 199, 1489, 3998, 1771, 1310, 3808, 2052, 3423,
434, 3712, 1625, 3558, 2955, 853, 4019, 1348, 3511, 1732, 1246, 487, 934, 1672, 2510, 3965, 788, 3711, 396, 1369, 4090, 1055, 2603, 1879, 3528, 2518, 2067, 3005, 1516, 2588, 751, 1740, 3418, 1131, 1576, 686, 2296, 1118, 18, 3263, 1365, 3401, 294, 737, 3177, 410, 867, 1633, 2963, 3579, 2375, 252, 2881, 479, 2471, 3576, 2180, 3306, 332, 2255, 3035, 41, 2648, 1396,
2929, 2230, 1219, 2512, 446, 2008, 3189, 2388, 626, 2164, 2831, 4047, 2376, 174, 3272, 368, 1469, 3226, 2578, 1991, 2874, 2263, 3681, 876, 188, 1239, 683, 3776, 226, 3183, 4083, 2148, 63, 2649, 3859, 299, 3086, 3933, 1585, 2185, 3767, 988, 1707, 2908, 1407, 1844, 2771, 2245, 1161, 560, 1755, 3376, 2051, 4064, 3135, 1832, 652, 2853, 1051, 3649, 760, 3290, 1105, 3945,
872, 154, 3207, 713, 3780, 1453, 281, 1087, 3695, 30, 3299, 1919, 1400, 3551, 1119, 1890, 2314, 618, 1703, 3428, 724, 295, 3146, 1557, 3341, 2896, 1683, 2723, 1974, 1017, 541, 1380, 3720, 804, 3280, 2082, 997, 2567, 777, 2961, 213, 2707, 2328, 3632, 1025, 3891, 3304, 255, 4003, 3108, 2587, 1323, 743, 1479, 105, 1013, 3901, 1618, 2044, 2627, 1465, 1846, 576, 1994,
2560, 3521, 1742, 2118, 2800, 3404, 1783, 2609, 2968, 1582, 1022, 412, 2713, 687, 2976, 3857, 2761, 3620, 62, 1108, 3844, 1340, 2100, 540, 2345, 3925, 405, 3457, 1319, 2468, 3362, 2815, 1867, 2372, 1281, 1714, 3690, 482, 3498, 1842, 1285, 3994, 558, 2039, 81, 2499, 678, 1481, 1923, 964, 12, 3824, 2980, 2205, 2762, 3432, 2398, 181, 3247, 462, 4094, 2350, 3589, 3089,
1555, 1094, 4041, 247, 1267, 908, 3959, 2041, 732, 3860, 2343, 3132, 3769, 2144, 1621, 237, 912, 1329, 3025, 2146, 2642, 1775, 3721, 2746, 1121, 1953, 902, 2285, 130, 3671, 1659, 278, 3153, 522, 2721, 123, 2996, 1466, 2380, 377, 3231, 873, 1510, 3476, 3123, 1250, 2147, 3650, 2839, 3451, 2323, 1122, 3545, 379, 1765, 1218, 603, 3768, 1360, 938, 2885, 133, 1245, 363,
2364, 554, 2743, 3344, 2474, 530, 3112, 169, 1297, 3430, 536, 1741, 98, 1043, 2574, 3253, 2246, 1854, 4022, 510, 3283, 204, 858, 3398, 36, 3118, 1478, 3794, 2986, 706, 2176, 922, 3559, 1097, 3976, 3322, 2149, 1160, 2810, 3883, 2007, 2513, 2953, 328, 1721, 3793, 422, 2566, 807, 329, 1638, 1967, 648, 2520, 3727, 3109, 2116, 2927, 2491, 1939, 3365, 1709, 2728, 3815,
2037, 3120, 831, 1405, 1896, 3592, 1622, 2369, 2864, 2151, 1107, 2542, 3532, 1410, 3917, 427, 3568, 709, 2509, 1503, 1037, 2973, 2436, 1604, 4035, 2594, 563, 1819, 2659, 1234, 4004, 2565, 1511, 2273, 1823, 336, 882, 3772, 575, 1628, 171, 3570, 1120, 2260, 2716, 935, 3064, 1806, 1342, 3144, 3900, 2744, 3296, 985, 1546, 238, 896, 1663, 305, 3660, 695, 2213, 960, 3407,
144, 1795, 3894, 2267, 51, 2708, 1023, 3818, 366, 1821, 4087, 2985, 755, 2057, 2912, 949, 1583, 2774, 231, 3447, 2258, 3866, 1982, 672, 1225, 2077, 3320, 1062, 370, 3241, 1968, 7, 3068, 681, 3631, 2573, 1567, 3175, 2321, 1067, 3070, 722, 1856, 3744, 642, 1471, 4084, 131, 3514, 2443, 531, 1227, 155, 2265, 4024, 2658, 3326, 3910, 1168, 3078, 1530, 3956, 489, 1424,
3647, 1203, 420, 2924, 3755, 719, 3248, 1376, 3067, 890, 196, 1559, 3269, 270, 2432, 1885, 3212, 1164, 3778, 1752, 579, 1338, 344, 3585, 3017, 288, 3658, 2371, 3882, 1691, 611, 2789, 3809, 1339, 389, 2950, 2015, 59, 3548, 2751, 2158, 4011, 1352, 29, 3388, 2370, 2812, 1946, 954, 2110, 1558, 2947, 3573, 1909, 1326, 679, 1853, 2312, 551, 2702, 33, 2414, 3209, 2824,
2547, 2143, 3379, 966, 1492, 1979, 2479, 463, 2194, 3657, 2738, 2318, 1261, 3713, 604, 4002, 11, 2192, 2967, 919, 2607, 3369, 2837, 1676, 2539, 984, 1568, 93, 2901, 1318, 3538, 1041, 2216, 1756, 3454, 1030, 4050, 1402, 798, 1723, 311, 3277, 2546, 2886, 2043, 461, 1206, 3677, 361, 3260, 3988, 809, 2605, 470, 3007, 3517, 102, 3221, 1398, 2062, 3611, 1134, 1928, 865,
4060, 621, 1710, 2606, 3510, 317, 4017, 1682, 3329, 1159, 1940, 654, 3461, 1789, 1015, 2691, 1455, 3599, 374, 1947, 4069, 71, 2126, 763, 3961, 2278, 3161, 1997, 824, 2623, 2080, 244, 3257, 780, 2732, 2308, 545, 3351, 2476, 3806, 1204, 588, 1591, 963, 3610, 1699, 754, 3049, 2651, 1106, 65, 2221, 1644, 3821, 1100, 2463, 1614, 3801, 965, 2965, 715, 3394, 1593, 212,
};
#endif /* BLUE_NOISE_64X64_H */

Просмотреть файл

@ -0,0 +1,679 @@
/* Blue noise generation using the void-and-cluster method as described in
*
* The void-and-cluster method for dither array generation
* Ulichney, Robert A (1993)
*
* http://cv.ulichney.com/papers/1993-void-cluster.pdf
*
* Note that running with openmp (-DUSE_OPENMP) will trigger additional
* randomness due to computing reductions in parallel, and is not recommended
* unless generating very large dither arrays.
*/
#include <assert.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
#include <stdio.h>
/* Booleans and utility functions */
#ifndef TRUE
# define TRUE 1
#endif
#ifndef FALSE
# define FALSE 0
#endif
typedef int bool_t;
int
imin (int x, int y)
{
return x < y ? x : y;
}
/* Memory allocation */
void *
malloc_abc (unsigned int a, unsigned int b, unsigned int c)
{
if (a >= INT32_MAX / b)
return NULL;
else if (a * b >= INT32_MAX / c)
return NULL;
else
return malloc (a * b * c);
}
/* Random number generation */
typedef uint32_t xorwow_state_t[5];
uint32_t
xorwow_next (xorwow_state_t *state)
{
uint32_t s = (*state)[0],
t = (*state)[3];
(*state)[3] = (*state)[2];
(*state)[2] = (*state)[1];
(*state)[1] = s;
t ^= t >> 2;
t ^= t << 1;
t ^= s ^ (s << 4);
(*state)[0] = t;
(*state)[4] += 362437;
return t + (*state)[4];
}
float
xorwow_float (xorwow_state_t *s)
{
return (xorwow_next (s) >> 9) / (float)((1 << 23) - 1);
}
/* Floating point matrices
*
* Used to cache the cluster sizes.
*/
typedef struct matrix_t {
int width;
int height;
float *buffer;
} matrix_t;
bool_t
matrix_init (matrix_t *matrix, int width, int height)
{
float *buffer;
if (!matrix)
return FALSE;
buffer = malloc_abc (width, height, sizeof (float));
if (!buffer)
return FALSE;
matrix->buffer = buffer;
matrix->width = width;
matrix->height = height;
return TRUE;
}
bool_t
matrix_copy (matrix_t *dst, matrix_t const *src)
{
float *srcbuf = src->buffer,
*srcend = src->buffer + src->width * src->height,
*dstbuf = dst->buffer;
if (dst->width != src->width || dst->height != src->height)
return FALSE;
while (srcbuf < srcend)
*dstbuf++ = *srcbuf++;
return TRUE;
}
float *
matrix_get (matrix_t *matrix, int x, int y)
{
return &matrix->buffer[y * matrix->width + x];
}
void
matrix_destroy (matrix_t *matrix)
{
free (matrix->buffer);
}
/* Binary patterns */
typedef struct pattern_t {
int width;
int height;
bool_t *buffer;
} pattern_t;
bool_t
pattern_init (pattern_t *pattern, int width, int height)
{
bool_t *buffer;
if (!pattern)
return FALSE;
buffer = malloc_abc (width, height, sizeof (bool_t));
if (!buffer)
return FALSE;
pattern->buffer = buffer;
pattern->width = width;
pattern->height = height;
return TRUE;
}
bool_t
pattern_copy (pattern_t *dst, pattern_t const *src)
{
bool_t *srcbuf = src->buffer,
*srcend = src->buffer + src->width * src->height,
*dstbuf = dst->buffer;
if (dst->width != src->width || dst->height != src->height)
return FALSE;
while (srcbuf < srcend)
*dstbuf++ = *srcbuf++;
return TRUE;
}
bool_t *
pattern_get (pattern_t *pattern, int x, int y)
{
return &pattern->buffer[y * pattern->width + x];
}
void
pattern_fill_white_noise (pattern_t *pattern, float fraction,
xorwow_state_t *s)
{
bool_t *buffer = pattern->buffer;
bool_t *end = buffer + (pattern->width * pattern->height);
while (buffer < end)
*buffer++ = xorwow_float (s) < fraction;
}
void
pattern_destroy (pattern_t *pattern)
{
free (pattern->buffer);
}
/* Dither arrays */
typedef struct array_t {
int width;
int height;
uint32_t *buffer;
} array_t;
bool_t
array_init (array_t *array, int width, int height)
{
uint32_t *buffer;
if (!array)
return FALSE;
buffer = malloc_abc (width, height, sizeof (uint32_t));
if (!buffer)
return FALSE;
array->buffer = buffer;
array->width = width;
array->height = height;
return TRUE;
}
uint32_t *
array_get (array_t *array, int x, int y)
{
return &array->buffer[y * array->width + x];
}
bool_t
array_save_ppm (array_t *array, const char *filename)
{
FILE *f = fopen(filename, "wb");
int i = 0;
int bpp = 2;
uint8_t buffer[1024];
if (!f)
return FALSE;
if (array->width * array->height - 1 < 256)
bpp = 1;
fprintf(f, "P5 %d %d %d\n", array->width, array->height,
array->width * array->height - 1);
while (i < array->width * array->height)
{
int j = 0;
for (; j < 1024 / bpp && j < array->width * array->height; ++j)
{
uint32_t v = array->buffer[i + j];
if (bpp == 2)
{
buffer[2 * j] = v & 0xff;
buffer[2 * j + 1] = (v & 0xff00) >> 8;
} else {
buffer[j] = v;
}
}
fwrite((void *)buffer, bpp, j, f);
i += j;
}
if (fclose(f) != 0)
return FALSE;
return TRUE;
}
bool_t
array_save (array_t *array, const char *filename)
{
int x, y;
FILE *f = fopen(filename, "wb");
if (!f)
return FALSE;
fprintf (f,
"/* WARNING: This file is generated by make-blue-noise.c\n"
" * Please edit that file instead of this one.\n"
" */\n"
"\n"
"#ifndef BLUE_NOISE_%dX%d_H\n"
"#define BLUE_NOISE_%dX%d_H\n"
"\n"
"#include <stdint.h>\n"
"\n", array->width, array->height, array->width, array->height);
fprintf (f, "static const uint16_t dither_blue_noise_%dx%d[%d] = {\n",
array->width, array->height, array->width * array->height);
for (y = 0; y < array->height; ++y)
{
fprintf (f, " ");
for (x = 0; x < array->width; ++x)
{
if (x != 0)
fprintf (f, ", ");
fprintf (f, "%d", *array_get (array, x, y));
}
fprintf (f, ",\n");
}
fprintf (f, "};\n");
fprintf (f, "\n#endif /* BLUE_NOISE_%dX%d_H */\n",
array->width, array->height);
if (fclose(f) != 0)
return FALSE;
return TRUE;
}
void
array_destroy (array_t *array)
{
free (array->buffer);
}
/* Dither array generation */
bool_t
compute_cluster_sizes (pattern_t *pattern, matrix_t *matrix)
{
int width = pattern->width,
height = pattern->height;
if (matrix->width != width || matrix->height != height)
return FALSE;
int px, py, qx, qy, dx, dy;
float tsqsi = 2.f * 1.5f * 1.5f;
#ifdef USE_OPENMP
#pragma omp parallel for default (none) \
private (py, px, qy, qx, dx, dy) \
shared (height, width, pattern, matrix, tsqsi)
#endif
for (py = 0; py < height; ++py)
{
for (px = 0; px < width; ++px)
{
bool_t pixel = *pattern_get (pattern, px, py);
float dist = 0.f;
for (qx = 0; qx < width; ++qx)
{
dx = imin (abs (qx - px), width - abs (qx - px));
dx = dx * dx;
for (qy = 0; qy < height; ++qy)
{
dy = imin (abs (qy - py), height - abs (qy - py));
dy = dy * dy;
dist += (pixel == *pattern_get (pattern, qx, qy))
* expf (- (dx + dy) / tsqsi);
}
}
*matrix_get (matrix, px, py) = dist;
}
}
return TRUE;
}
bool_t
swap_pixel (pattern_t *pattern, matrix_t *matrix, int x, int y)
{
int width = pattern->width,
height = pattern->height;
bool_t new;
float f,
dist = 0.f,
tsqsi = 2.f * 1.5f * 1.5f;
int px, py, dx, dy;
bool_t b;
new = !*pattern_get (pattern, x, y);
*pattern_get (pattern, x, y) = new;
if (matrix->width != width || matrix->height != height)
return FALSE;
#ifdef USE_OPENMP
#pragma omp parallel for reduction (+:dist) default (none) \
private (px, py, dx, dy, b, f) \
shared (x, y, width, height, pattern, matrix, new, tsqsi)
#endif
for (py = 0; py < height; ++py)
{
dy = imin (abs (py - y), height - abs (py - y));
dy = dy * dy;
for (px = 0; px < width; ++px)
{
dx = imin (abs (px - x), width - abs (px - x));
dx = dx * dx;
b = (*pattern_get (pattern, px, py) == new);
f = expf (- (dx + dy) / tsqsi);
*matrix_get (matrix, px, py) += (2 * b - 1) * f;
dist += b * f;
}
}
*matrix_get (matrix, x, y) = dist;
return TRUE;
}
void
largest_cluster (pattern_t *pattern, matrix_t *matrix,
bool_t pixel, int *xmax, int *ymax)
{
int width = pattern->width,
height = pattern->height;
int x, y;
float vmax = -INFINITY;
#ifdef USE_OPENMP
#pragma omp parallel default (none) \
private (x, y) \
shared (height, width, pattern, matrix, pixel, xmax, ymax, vmax)
#endif
{
int xbest = -1,
ybest = -1;
#ifdef USE_OPENMP
float vbest = -INFINITY;
#pragma omp for reduction (max: vmax) collapse (2)
#endif
for (y = 0; y < height; ++y)
{
for (x = 0; x < width; ++x)
{
if (*pattern_get (pattern, x, y) != pixel)
continue;
if (*matrix_get (matrix, x, y) > vmax)
{
vmax = *matrix_get (matrix, x, y);
#ifdef USE_OPENMP
vbest = vmax;
#endif
xbest = x;
ybest = y;
}
}
}
#ifdef USE_OPENMP
#pragma omp barrier
#pragma omp critical
{
if (vmax == vbest)
{
*xmax = xbest;
*ymax = ybest;
}
}
#else
*xmax = xbest;
*ymax = ybest;
#endif
}
assert (vmax > -INFINITY);
}
void
generate_initial_binary_pattern (pattern_t *pattern, matrix_t *matrix)
{
int xcluster = 0,
ycluster = 0,
xvoid = 0,
yvoid = 0;
for (;;)
{
largest_cluster (pattern, matrix, TRUE, &xcluster, &ycluster);
assert (*pattern_get (pattern, xcluster, ycluster) == TRUE);
swap_pixel (pattern, matrix, xcluster, ycluster);
largest_cluster (pattern, matrix, FALSE, &xvoid, &yvoid);
assert (*pattern_get (pattern, xvoid, yvoid) == FALSE);
swap_pixel (pattern, matrix, xvoid, yvoid);
if (xcluster == xvoid && ycluster == yvoid)
return;
}
}
bool_t
generate_dither_array (array_t *array,
pattern_t const *prototype, matrix_t const *matrix,
pattern_t *temp_pattern, matrix_t *temp_matrix)
{
int width = prototype->width,
height = prototype->height;
int x, y, rank;
int initial_rank = 0;
if (array->width != width || array->height != height)
return FALSE;
// Make copies of the prototype and associated sizes matrix since we will
// trash them
if (!pattern_copy (temp_pattern, prototype))
return FALSE;
if (!matrix_copy (temp_matrix, matrix))
return FALSE;
// Compute initial rank
for (y = 0; y < height; ++y)
{
for (x = 0; x < width; ++x)
{
if (*pattern_get (temp_pattern, x, y))
initial_rank += 1;
*array_get (array, x, y) = 0;
}
}
// Phase 1
for (rank = initial_rank; rank > 0; --rank)
{
largest_cluster (temp_pattern, temp_matrix, TRUE, &x, &y);
swap_pixel (temp_pattern, temp_matrix, x, y);
*array_get (array, x, y) = rank - 1;
}
// Make copies again for phases 2 & 3
if (!pattern_copy (temp_pattern, prototype))
return FALSE;
if (!matrix_copy (temp_matrix, matrix))
return FALSE;
// Phase 2 & 3
for (rank = initial_rank; rank < width * height; ++rank)
{
largest_cluster (temp_pattern, temp_matrix, FALSE, &x, &y);
swap_pixel (temp_pattern, temp_matrix, x, y);
*array_get (array, x, y) = rank;
}
return TRUE;
}
bool_t
generate (int size, xorwow_state_t *s,
char const *c_filename, char const *ppm_filename)
{
bool_t ok = TRUE;
pattern_t prototype, temp_pattern;
array_t array;
matrix_t matrix, temp_matrix;
printf ("Generating %dx%d blue noise...\n", size, size);
if (!pattern_init (&prototype, size, size))
return FALSE;
if (!pattern_init (&temp_pattern, size, size))
{
pattern_destroy (&prototype);
return FALSE;
}
if (!matrix_init (&matrix, size, size))
{
pattern_destroy (&temp_pattern);
pattern_destroy (&prototype);
return FALSE;
}
if (!matrix_init (&temp_matrix, size, size))
{
matrix_destroy (&matrix);
pattern_destroy (&temp_pattern);
pattern_destroy (&prototype);
return FALSE;
}
if (!array_init (&array, size, size))
{
matrix_destroy (&temp_matrix);
matrix_destroy (&matrix);
pattern_destroy (&temp_pattern);
pattern_destroy (&prototype);
return FALSE;
}
printf("Filling initial binary pattern with white noise...\n");
pattern_fill_white_noise (&prototype, .1, s);
printf("Initializing cluster sizes...\n");
if (!compute_cluster_sizes (&prototype, &matrix))
{
fprintf (stderr, "Error while computing cluster sizes\n");
ok = FALSE;
goto out;
}
printf("Generating initial binary pattern...\n");
generate_initial_binary_pattern (&prototype, &matrix);
printf("Generating dither array...\n");
if (!generate_dither_array (&array, &prototype, &matrix,
&temp_pattern, &temp_matrix))
{
fprintf (stderr, "Error while generating dither array\n");
ok = FALSE;
goto out;
}
printf("Saving dither array...\n");
if (!array_save (&array, c_filename))
{
fprintf (stderr, "Error saving dither array\n");
ok = FALSE;
goto out;
}
#if SAVE_PPM
if (!array_save_ppm (&array, ppm_filename))
{
fprintf (stderr, "Error saving dither array PPM\n");
ok = FALSE;
goto out;
}
#else
(void)ppm_filename;
#endif
printf("All done!\n");
out:
array_destroy (&array);
matrix_destroy (&temp_matrix);
matrix_destroy (&matrix);
pattern_destroy (&temp_pattern);
pattern_destroy (&prototype);
return ok;
}
int
main (void)
{
xorwow_state_t s = {1185956906, 12385940, 983948, 349208051, 901842};
if (!generate (64, &s, "blue-noise-64x64.h", "blue-noise-64x64.ppm"))
return -1;
return 0;
}

Просмотреть файл

@ -0,0 +1,412 @@
/* The gcc-provided loongson intrinsic functions are way too fucking broken
* to be of any use, otherwise I'd use them.
*
* - The hardware instructions are very similar to MMX or iwMMXt. Certainly
* close enough that they could have implemented the _mm_*-style intrinsic
* interface and had a ton of optimized code available to them. Instead they
* implemented something much, much worse.
*
* - pshuf takes a dead first argument, causing extra instructions to be
* generated.
*
* - There are no 64-bit shift or logical intrinsics, which means you have
* to implement them with inline assembly, but this is a nightmare because
* gcc doesn't understand that the integer vector datatypes are actually in
* floating-point registers, so you end up with braindead code like
*
* punpcklwd $f9,$f9,$f5
* dmtc1 v0,$f8
* punpcklwd $f19,$f19,$f5
* dmfc1 t9,$f9
* dmtc1 v0,$f9
* dmtc1 t9,$f20
* dmfc1 s0,$f19
* punpcklbh $f20,$f20,$f2
*
* where crap just gets copied back and forth between integer and floating-
* point registers ad nauseum.
*
* Instead of trying to workaround the problems from these crap intrinsics, I
* just implement the _mm_* intrinsics needed for pixman-mmx.c using inline
* assembly.
*/
#include <stdint.h>
/* vectors are stored in 64-bit floating-point registers */
typedef double __m64;
/* having a 32-bit datatype allows us to use 32-bit loads in places like load8888 */
typedef float __m32;
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_si64 (void)
{
return 0.0;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pi16 (__m64 __m1, __m64 __m2)
{
__m64 ret;
asm("paddh %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pi32 (__m64 __m1, __m64 __m2)
{
__m64 ret;
asm("paddw %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pu16 (__m64 __m1, __m64 __m2)
{
__m64 ret;
asm("paddush %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pu8 (__m64 __m1, __m64 __m2)
{
__m64 ret;
asm("paddusb %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_and_si64 (__m64 __m1, __m64 __m2)
{
__m64 ret;
asm("and %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
{
__m64 ret;
asm("pcmpeqw %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2)
);
return ret;
}
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_empty (void)
{
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_madd_pi16 (__m64 __m1, __m64 __m2)
{
__m64 ret;
asm("pmaddhw %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhi_pu16 (__m64 __m1, __m64 __m2)
{
__m64 ret;
asm("pmulhuh %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
{
__m64 ret;
asm("pmullh %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_or_si64 (__m64 __m1, __m64 __m2)
{
__m64 ret;
asm("or %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_pu16 (__m64 __m1, __m64 __m2)
{
__m64 ret;
asm("packushb %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_pi32 (__m64 __m1, __m64 __m2)
{
__m64 ret;
asm("packsswh %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2)
);
return ret;
}
#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi16 (uint16_t __w3, uint16_t __w2, uint16_t __w1, uint16_t __w0)
{
if (__builtin_constant_p (__w3) &&
__builtin_constant_p (__w2) &&
__builtin_constant_p (__w1) &&
__builtin_constant_p (__w0))
{
uint64_t val = ((uint64_t)__w3 << 48)
| ((uint64_t)__w2 << 32)
| ((uint64_t)__w1 << 16)
| ((uint64_t)__w0 << 0);
return *(__m64 *)&val;
}
else if (__w3 == __w2 && __w2 == __w1 && __w1 == __w0)
{
/* TODO: handle other cases */
uint64_t val = __w3;
uint64_t imm = _MM_SHUFFLE (0, 0, 0, 0);
__m64 ret;
asm("pshufh %0, %1, %2\n\t"
: "=f" (ret)
: "f" (*(__m64 *)&val), "f" (*(__m64 *)&imm)
);
return ret;
} else {
uint64_t val = ((uint64_t)__w3 << 48)
| ((uint64_t)__w2 << 32)
| ((uint64_t)__w1 << 16)
| ((uint64_t)__w0 << 0);
return *(__m64 *)&val;
}
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi32 (unsigned __i1, unsigned __i0)
{
if (__builtin_constant_p (__i1) &&
__builtin_constant_p (__i0))
{
uint64_t val = ((uint64_t)__i1 << 32)
| ((uint64_t)__i0 << 0);
return *(__m64 *)&val;
}
else if (__i1 == __i0)
{
uint64_t imm = _MM_SHUFFLE (1, 0, 1, 0);
__m64 ret;
asm("pshufh %0, %1, %2\n\t"
: "=f" (ret)
: "f" (*(__m32 *)&__i1), "f" (*(__m64 *)&imm)
);
return ret;
} else {
uint64_t val = ((uint64_t)__i1 << 32)
| ((uint64_t)__i0 << 0);
return *(__m64 *)&val;
}
}
#undef _MM_SHUFFLE
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_pi16 (__m64 __m, int64_t __n)
{
__m64 ret;
asm("pshufh %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m), "f" (*(__m64 *)&__n)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_pi16 (__m64 __m, int64_t __count)
{
__m64 ret;
asm("psllh %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m), "f" (*(__m64 *)&__count)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_si64 (__m64 __m, int64_t __count)
{
__m64 ret;
asm("dsll %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m), "f" (*(__m64 *)&__count)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_pi16 (__m64 __m, int64_t __count)
{
__m64 ret;
asm("psrlh %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m), "f" (*(__m64 *)&__count)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_pi32 (__m64 __m, int64_t __count)
{
__m64 ret;
asm("psrlw %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m), "f" (*(__m64 *)&__count)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_si64 (__m64 __m, int64_t __count)
{
__m64 ret;
asm("dsrl %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m), "f" (*(__m64 *)&__count)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pi16 (__m64 __m1, __m64 __m2)
{
__m64 ret;
asm("psubh %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
{
__m64 ret;
asm("punpckhbh %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
{
__m64 ret;
asm("punpckhhw %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
{
__m64 ret;
asm("punpcklbh %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2)
);
return ret;
}
/* Since punpcklbh doesn't care about the high 32-bits, we use the __m32 datatype which
* allows load8888 to use 32-bit loads */
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pi8_f (__m32 __m1, __m64 __m2)
{
__m64 ret;
asm("punpcklbh %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
{
__m64 ret;
asm("punpcklhw %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_si64 (__m64 __m1, __m64 __m2)
{
__m64 ret;
asm("xor %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
loongson_extract_pi16 (__m64 __m, int64_t __pos)
{
__m64 ret;
asm("pextrh %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m), "f" (*(__m64 *)&__pos)
);
return ret;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
loongson_insert_pi16 (__m64 __m1, __m64 __m2, int64_t __pos)
{
__m64 ret;
asm("pinsrh_%3 %0, %1, %2\n\t"
: "=f" (ret)
: "f" (__m1), "f" (__m2), "i" (__pos)
);
return ret;
}

Просмотреть файл

@ -1,86 +0,0 @@
$usage = "Usage: combine.pl { 8 | 16 } < pixman-combine.c.template";
$#ARGV == 0 or die $usage;
# Get the component size.
$size = int($ARGV[0]);
$size == 8 or $size == 16 or die $usage;
$pixel_size = $size * 4;
$half_pixel_size = $size * 2;
sub mask {
my $str = shift;
my $suffix;
$suffix = "ULL" if $size > 8;
return "0x" . $str . $suffix;
}
# Generate mask strings.
$nibbles = $size / 4;
$mask = "f" x $nibbles;
$zero_mask = "0" x $nibbles;
$one_half = "8" . "0" x ($nibbles - 1);
print "/* WARNING: This file is generated by combine.pl from combine.inc.\n";
print " Please edit one of those files rather than this one. */\n";
print "\n";
print "#line 1 \"pixman-combine.c.template\"\n";
$mask_ = mask($mask);
$one_half_ = mask($one_half);
$g_mask = mask($mask . $zero_mask);
$b_mask = mask($mask . $zero_mask x 2);
$a_mask = mask($mask . $zero_mask x 3);
$rb_mask = mask($mask . $zero_mask . $mask);
$ag_mask = mask($mask . $zero_mask . $mask . $zero_mask);
$rb_one_half = mask($one_half . $zero_mask . $one_half);
$rb_mask_plus_one = mask("1" . $zero_mask x 2 . "1" . $zero_mask);
while (<STDIN>) {
# Mask and 1/2 value for a single component.
s/#define COMPONENT_SIZE\b/$& $size/;
s/#define MASK\b/$& $mask_/;
s/#define ONE_HALF\b/$& $one_half_/;
# Shifts and masks for green, blue, and alpha.
s/#define G_SHIFT\b/$& $size/;
s/#define R_SHIFT\b/$& $size * 2/;
s/#define A_SHIFT\b/$& $size * 3/;
s/#define G_MASK\b/$& $g_mask/;
s/#define R_MASK\b/$& $b_mask/;
s/#define A_MASK\b/$& $a_mask/;
# Special values for dealing with red + blue at the same time.
s/#define RB_MASK\b/$& $rb_mask/;
s/#define AG_MASK\b/$& $ag_mask/;
s/#define RB_ONE_HALF\b/$& $rb_one_half/;
s/#define RB_MASK_PLUS_ONE\b/$& $rb_mask_plus_one/;
# Add 32/64 suffix to combining function types.
s/\bCombineFunc\b/CombineFunc$pixel_size/;
s/\bFbComposeFunctions\b/FbComposeFunctions$pixel_size/;
s/combine_width/combine_$pixel_size/;
s/_pixman_setup_combiner_functions_width/_pixman_setup_combiner_functions_$pixel_size/;
s/UNc/UN$size/g;
s/ALPHA_c/ALPHA_$size/g;
s/RED_c/RED_$size/g;
s/GREEN_c/GREEN_$size/g;
s/BLUE_c/BLUE_$size/g;
# Convert comp*_t values into the appropriate real types.
s/comp1_t/uint${size}_t/g;
s/comp2_t/uint${half_pixel_size}_t/g;
s/comp4_t/uint${pixel_size}_t/g;
# Change the function table name for the 64-bit version.
s/pixman_composeFunctions/pixman_composeFunctions64/ if $size == 16;
# Change the header for the 64-bit version
s/pixman-combine.h/pixman-combine64.h/ if $size == 16;
s/pixman-combine.h/pixman-combine32.h/ if $size == 8;
print;
}

Просмотреть файл

@ -0,0 +1,129 @@
# Copyright © 2018 Intel Corporation
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
config_h = configure_file(
configuration : config,
output : 'config.h'
)
version_h = configure_file(
configuration : version_conf,
input : 'pixman-version.h.in',
output : 'pixman-version.h',
install_dir : join_paths(get_option('prefix'), get_option('includedir'), 'pixman-1')
)
libpixman_extra_cargs = []
if cc.has_function_attribute('dllexport')
libpixman_extra_cargs = ['-DPIXMAN_API=__declspec(dllexport)']
endif
pixman_simd_libs = []
simds = [
# the mmx library can be compiled with mmx on x86/x86_64, iwmmxt on
# some arm cores, or loongson mmi on loongson mips systems. The
# libraries will all have the same name, "pixman-mmx", but there is
# no chance of more than one version being built in the same build
# because no system could have mmx, iwmmxt, and mmi, and it
# simplifies the build logic to give them the same name.
['mmx', have_mmx, mmx_flags, []],
['mmx', have_loongson_mmi, loongson_mmi_flags, []],
['mmx', have_iwmmxt, iwmmxt_flags, []],
['sse2', have_sse2, sse2_flags, []],
['ssse3', have_ssse3, ssse3_flags, []],
['vmx', have_vmx, vmx_flags, []],
['arm-simd', have_armv6_simd, [],
['pixman-arm-simd-asm.S', 'pixman-arm-simd-asm-scaled.S']],
['arm-neon', have_neon, [],
['pixman-arm-neon-asm.S', 'pixman-arm-neon-asm-bilinear.S']],
['mips-dspr2', have_mips_dspr2, mips_dspr2_flags,
['pixman-mips-dspr2-asm.S', 'pixman-mips-memcpy-asm.S']],
]
foreach simd : simds
if simd[1]
name = 'pixman-' + simd[0]
pixman_simd_libs += static_library(
name,
[name + '.c', config_h, version_h, simd[3]],
c_args : simd[2]
)
endif
endforeach
pixman_files = files(
'pixman.c',
'pixman-access.c',
'pixman-access-accessors.c',
'pixman-bits-image.c',
'pixman-combine32.c',
'pixman-combine-float.c',
'pixman-conical-gradient.c',
'pixman-filter.c',
'pixman-x86.c',
'pixman-mips.c',
'pixman-arm.c',
'pixman-ppc.c',
'pixman-edge.c',
'pixman-edge-accessors.c',
'pixman-fast-path.c',
'pixman-glyph.c',
'pixman-general.c',
'pixman-gradient-walker.c',
'pixman-image.c',
'pixman-implementation.c',
'pixman-linear-gradient.c',
'pixman-matrix.c',
'pixman-noop.c',
'pixman-radial-gradient.c',
'pixman-region16.c',
'pixman-region32.c',
'pixman-solid-fill.c',
'pixman-timer.c',
'pixman-trap.c',
'pixman-utils.c',
)
# We cannot use 'link_with' or 'link_whole' because meson wont do the right
# thing for static archives.
_obs = []
foreach l : pixman_simd_libs
_obs += l.extract_all_objects()
endforeach
libpixman = library(
'pixman-1',
[pixman_files, config_h, version_h],
objects : _obs,
c_args : libpixman_extra_cargs,
dependencies : [dep_m, dep_threads],
version : meson.project_version(),
install : true,
)
inc_pixman = include_directories('.')
idep_pixman = declare_dependency(
link_with: libpixman,
include_directories : inc_pixman,
)
install_headers('pixman.h', subdir : 'pixman-1')

Просмотреть файл

@ -68,14 +68,14 @@
#ifdef WORDS_BIGENDIAN
#define FETCH_24(img,l,o) \
((READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 16) | \
(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \
(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 0))
((uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 16) | \
(uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \
(uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 0))
#else
#define FETCH_24(img,l,o) \
((READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 0) | \
(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \
(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 16))
((uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 0) | \
(uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \
(uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 16))
#endif
/* Store macros */
@ -87,7 +87,7 @@
uint32_t *__d = ((uint32_t *)(l)) + ((o) >> 5); \
uint32_t __m, __v; \
\
__m = 1 << (0x1f - ((o) & 0x1f)); \
__m = 1U << (0x1f - ((o) & 0x1f)); \
__v = (v)? __m : 0; \
\
WRITE((img), __d, (READ((img), __d) & ~__m) | __v); \
@ -100,7 +100,7 @@
uint32_t *__d = ((uint32_t *)(l)) + ((o) >> 5); \
uint32_t __m, __v; \
\
__m = 1 << ((o) & 0x1f); \
__m = 1U << ((o) & 0x1f); \
__v = (v)? __m : 0; \
\
WRITE((img), __d, (READ((img), __d) & ~__m) | __v); \
@ -294,14 +294,14 @@ convert_pixel (pixman_format_code_t from, pixman_format_code_t to, uint32_t pixe
}
static force_inline uint32_t
convert_pixel_to_a8r8g8b8 (pixman_image_t *image,
convert_pixel_to_a8r8g8b8 (bits_image_t *image,
pixman_format_code_t format,
uint32_t pixel)
{
if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY ||
PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR)
{
return image->bits.indexed->rgba[pixel];
return image->indexed->rgba[pixel];
}
else
{
@ -332,7 +332,7 @@ convert_pixel_from_a8r8g8b8 (pixman_image_t *image,
}
static force_inline uint32_t
fetch_and_convert_pixel (pixman_image_t * image,
fetch_and_convert_pixel (bits_image_t * image,
const uint8_t * bits,
int offset,
pixman_format_code_t format)
@ -417,7 +417,7 @@ convert_and_store_pixel (bits_image_t * image,
#define MAKE_ACCESSORS(format) \
static void \
fetch_scanline_ ## format (pixman_image_t *image, \
fetch_scanline_ ## format (bits_image_t *image, \
int x, \
int y, \
int width, \
@ -425,7 +425,7 @@ convert_and_store_pixel (bits_image_t * image,
const uint32_t *mask) \
{ \
uint8_t *bits = \
(uint8_t *)(image->bits.bits + y * image->bits.rowstride); \
(uint8_t *)(image->bits + y * image->rowstride); \
int i; \
\
for (i = 0; i < width; ++i) \
@ -461,8 +461,8 @@ convert_and_store_pixel (bits_image_t * image,
uint8_t *bits = \
(uint8_t *)(image->bits + line * image->rowstride); \
\
return fetch_and_convert_pixel ((pixman_image_t *)image, \
bits, offset, PIXMAN_ ## format); \
return fetch_and_convert_pixel ( \
image, bits, offset, PIXMAN_ ## format); \
} \
\
static const void *const __dummy__ ## format
@ -583,14 +583,14 @@ to_srgb (float f)
}
static void
fetch_scanline_a8r8g8b8_sRGB_float (pixman_image_t *image,
fetch_scanline_a8r8g8b8_sRGB_float (bits_image_t * image,
int x,
int y,
int width,
uint32_t * b,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint32_t *bits = image->bits + y * image->rowstride;
const uint32_t *pixel = bits + x;
const uint32_t *end = pixel + width;
argb_t *buffer = (argb_t *)b;
@ -612,14 +612,14 @@ fetch_scanline_a8r8g8b8_sRGB_float (pixman_image_t *image,
/* Expects a float buffer */
static void
fetch_scanline_a2r10g10b10_float (pixman_image_t *image,
fetch_scanline_a2r10g10b10_float (bits_image_t * image,
int x,
int y,
int width,
uint32_t * b,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint32_t *bits = image->bits + y * image->rowstride;
const uint32_t *pixel = bits + x;
const uint32_t *end = pixel + width;
argb_t *buffer = (argb_t *)b;
@ -642,15 +642,57 @@ fetch_scanline_a2r10g10b10_float (pixman_image_t *image,
}
/* Expects a float buffer */
#ifndef PIXMAN_FB_ACCESSORS
static void
fetch_scanline_x2r10g10b10_float (pixman_image_t *image,
fetch_scanline_rgbf_float (bits_image_t *image,
int x,
int y,
int width,
uint32_t * b,
const uint32_t *mask)
{
const float *bits = (float *)image->bits + y * image->rowstride;
const float *pixel = bits + x * 3;
argb_t *buffer = (argb_t *)b;
for (; width--; buffer++) {
buffer->r = *pixel++;
buffer->g = *pixel++;
buffer->b = *pixel++;
buffer->a = 1.f;
}
}
static void
fetch_scanline_rgbaf_float (bits_image_t *image,
int x,
int y,
int width,
uint32_t * b,
const uint32_t *mask)
{
const float *bits = (float *)image->bits + y * image->rowstride;
const float *pixel = bits + x * 4;
argb_t *buffer = (argb_t *)b;
for (; width--; buffer++) {
buffer->r = *pixel++;
buffer->g = *pixel++;
buffer->b = *pixel++;
buffer->a = *pixel++;
}
}
#endif
static void
fetch_scanline_x2r10g10b10_float (bits_image_t *image,
int x,
int y,
int width,
uint32_t * b,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint32_t *bits = image->bits + y * image->rowstride;
const uint32_t *pixel = (uint32_t *)bits + x;
const uint32_t *end = pixel + width;
argb_t *buffer = (argb_t *)b;
@ -673,14 +715,14 @@ fetch_scanline_x2r10g10b10_float (pixman_image_t *image,
/* Expects a float buffer */
static void
fetch_scanline_a2b10g10r10_float (pixman_image_t *image,
fetch_scanline_a2b10g10r10_float (bits_image_t *image,
int x,
int y,
int width,
uint32_t * b,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint32_t *bits = image->bits + y * image->rowstride;
const uint32_t *pixel = bits + x;
const uint32_t *end = pixel + width;
argb_t *buffer = (argb_t *)b;
@ -704,14 +746,14 @@ fetch_scanline_a2b10g10r10_float (pixman_image_t *image,
/* Expects a float buffer */
static void
fetch_scanline_x2b10g10r10_float (pixman_image_t *image,
fetch_scanline_x2b10g10r10_float (bits_image_t *image,
int x,
int y,
int width,
uint32_t * b,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint32_t *bits = image->bits + y * image->rowstride;
const uint32_t *pixel = (uint32_t *)bits + x;
const uint32_t *end = pixel + width;
argb_t *buffer = (argb_t *)b;
@ -733,14 +775,14 @@ fetch_scanline_x2b10g10r10_float (pixman_image_t *image,
}
static void
fetch_scanline_yuy2 (pixman_image_t *image,
fetch_scanline_yuy2 (bits_image_t *image,
int x,
int line,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + image->bits.rowstride * line;
const uint32_t *bits = image->bits + image->rowstride * line;
int i;
for (i = 0; i < width; i++)
@ -767,7 +809,7 @@ fetch_scanline_yuy2 (pixman_image_t *image,
}
static void
fetch_scanline_yv12 (pixman_image_t *image,
fetch_scanline_yv12 (bits_image_t *image,
int x,
int line,
int width,
@ -805,6 +847,40 @@ fetch_scanline_yv12 (pixman_image_t *image,
/**************************** Pixel wise fetching *****************************/
#ifndef PIXMAN_FB_ACCESSORS
static argb_t
fetch_pixel_rgbf_float (bits_image_t *image,
int offset,
int line)
{
float *bits = (float *)image->bits + line * image->rowstride;
argb_t argb;
argb.r = bits[offset * 3];
argb.g = bits[offset * 3 + 1];
argb.b = bits[offset * 3 + 2];
argb.a = 1.f;
return argb;
}
static argb_t
fetch_pixel_rgbaf_float (bits_image_t *image,
int offset,
int line)
{
float *bits = (float *)image->bits + line * image->rowstride;
argb_t argb;
argb.r = bits[offset * 4];
argb.g = bits[offset * 4 + 1];
argb.b = bits[offset * 4 + 2];
argb.a = bits[offset * 4 + 3];
return argb;
}
#endif
static argb_t
fetch_pixel_x2r10g10b10_float (bits_image_t *image,
int offset,
@ -962,6 +1038,45 @@ fetch_pixel_yv12 (bits_image_t *image,
/*********************************** Store ************************************/
#ifndef PIXMAN_FB_ACCESSORS
static void
store_scanline_rgbaf_float (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *v)
{
float *bits = (float *)image->bits + image->rowstride * y + 4 * x;
const argb_t *values = (argb_t *)v;
for (; width; width--, values++)
{
*bits++ = values->r;
*bits++ = values->g;
*bits++ = values->b;
*bits++ = values->a;
}
}
static void
store_scanline_rgbf_float (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *v)
{
float *bits = (float *)image->bits + image->rowstride * y + 3 * x;
const argb_t *values = (argb_t *)v;
for (; width; width--, values++)
{
*bits++ = values->r;
*bits++ = values->g;
*bits++ = values->b;
}
}
#endif
static void
store_scanline_a2r10g10b10_float (bits_image_t * image,
int x,
@ -976,7 +1091,7 @@ store_scanline_a2r10g10b10_float (bits_image_t * image,
for (i = 0; i < width; ++i)
{
uint16_t a, r, g, b;
uint32_t a, r, g, b;
a = pixman_float_to_unorm (values[i].a, 2);
r = pixman_float_to_unorm (values[i].r, 10);
@ -1002,7 +1117,7 @@ store_scanline_x2r10g10b10_float (bits_image_t * image,
for (i = 0; i < width; ++i)
{
uint16_t r, g, b;
uint32_t r, g, b;
r = pixman_float_to_unorm (values[i].r, 10);
g = pixman_float_to_unorm (values[i].g, 10);
@ -1027,7 +1142,7 @@ store_scanline_a2b10g10r10_float (bits_image_t * image,
for (i = 0; i < width; ++i)
{
uint16_t a, r, g, b;
uint32_t a, r, g, b;
a = pixman_float_to_unorm (values[i].a, 2);
r = pixman_float_to_unorm (values[i].r, 10);
@ -1053,7 +1168,7 @@ store_scanline_x2b10g10r10_float (bits_image_t * image,
for (i = 0; i < width; ++i)
{
uint16_t r, g, b;
uint32_t r, g, b;
r = pixman_float_to_unorm (values[i].r, 10);
g = pixman_float_to_unorm (values[i].g, 10);
@ -1078,7 +1193,7 @@ store_scanline_a8r8g8b8_sRGB_float (bits_image_t * image,
for (i = 0; i < width; ++i)
{
uint8_t a, r, g, b;
uint32_t a, r, g, b;
a = pixman_float_to_unorm (values[i].a, 8);
r = to_srgb (values[i].r);
@ -1090,44 +1205,6 @@ store_scanline_a8r8g8b8_sRGB_float (bits_image_t * image,
}
}
static void
store_scanline_16 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *v)
{
uint16_t *bits = (uint16_t*)(image->bits + image->rowstride * y);
uint16_t *values = (uint16_t *)v;
uint16_t *pixel = bits + x;
int i;
for (i = 0; i < width; ++i)
{
WRITE (image, pixel++, values[i]);
}
}
static void
fetch_scanline_16 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * b,
const uint32_t *mask)
{
const uint16_t *bits = (uint16_t*)(image->bits.bits + y * image->bits.rowstride);
const uint16_t *pixel = bits + x;
int i;
uint16_t *buffer = (uint16_t *)b;
for (i = 0; i < width; ++i)
{
*buffer++ = READ (image, pixel++);
}
}
/*
* Contracts a floating point image to 32bpp and then stores it using a
* regular 32-bit store proc. Despite the type, this function expects an
@ -1159,37 +1236,37 @@ store_scanline_generic_float (bits_image_t * image,
}
static void
fetch_scanline_generic_float (pixman_image_t *image,
fetch_scanline_generic_float (bits_image_t * image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
image->bits.fetch_scanline_32 (image, x, y, width, buffer, NULL);
image->fetch_scanline_32 (image, x, y, width, buffer, NULL);
pixman_expand_to_float ((argb_t *)buffer, buffer, image->bits.format, width);
pixman_expand_to_float ((argb_t *)buffer, buffer, image->format, width);
}
/* The 32_sRGB paths should be deleted after narrow processing
* is no longer invoked for formats that are considered wide.
* (Also see fetch_pixel_generic_lossy_32) */
static void
fetch_scanline_a8r8g8b8_32_sRGB (pixman_image_t *image,
fetch_scanline_a8r8g8b8_32_sRGB (bits_image_t *image,
int x,
int y,
int width,
uint32_t *buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint32_t *bits = image->bits + y * image->rowstride;
const uint32_t *pixel = (uint32_t *)bits + x;
const uint32_t *end = pixel + width;
uint32_t tmp;
while (pixel < end)
{
uint8_t a, r, g, b;
uint32_t a, r, g, b;
tmp = READ (image, pixel++);
@ -1213,7 +1290,7 @@ fetch_pixel_a8r8g8b8_32_sRGB (bits_image_t *image,
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t tmp = READ (image, bits + offset);
uint8_t a, r, g, b;
uint32_t a, r, g, b;
a = (tmp >> 24) & 0xff;
r = (tmp >> 16) & 0xff;
@ -1242,7 +1319,7 @@ store_scanline_a8r8g8b8_32_sRGB (bits_image_t *image,
for (i = 0; i < width; ++i)
{
uint8_t a, r, g, b;
uint32_t a, r, g, b;
tmp = values[i];
@ -1294,12 +1371,10 @@ fetch_pixel_generic_lossy_32 (bits_image_t *image,
typedef struct
{
pixman_format_code_t format;
fetch_scanline_t fetch_scanline_16;
fetch_scanline_t fetch_scanline_32;
fetch_scanline_t fetch_scanline_float;
fetch_pixel_32_t fetch_pixel_32;
fetch_pixel_float_t fetch_pixel_float;
store_scanline_t store_scanline_16;
store_scanline_t store_scanline_32;
store_scanline_t store_scanline_float;
} format_info_t;
@ -1307,28 +1382,13 @@ typedef struct
#define FORMAT_INFO(format) \
{ \
PIXMAN_ ## format, \
NULL, \
fetch_scanline_ ## format, \
fetch_scanline_generic_float, \
fetch_pixel_ ## format, \
fetch_pixel_generic_float, \
NULL, \
store_scanline_ ## format, \
store_scanline_generic_float \
}
#define FORMAT_INFO16(format) \
{ \
PIXMAN_ ## format, \
fetch_scanline_16, \
fetch_scanline_ ## format, \
fetch_scanline_generic_float, \
fetch_pixel_ ## format, \
fetch_pixel_generic_float, \
store_scanline_16, \
store_scanline_ ## format, \
store_scanline_generic_float \
}
static const format_info_t accessors[] =
{
@ -1345,10 +1405,8 @@ static const format_info_t accessors[] =
/* sRGB formats */
{ PIXMAN_a8r8g8b8_sRGB,
NULL,
fetch_scanline_a8r8g8b8_32_sRGB, fetch_scanline_a8r8g8b8_sRGB_float,
fetch_pixel_a8r8g8b8_32_sRGB, fetch_pixel_a8r8g8b8_sRGB_float,
NULL,
store_scanline_a8r8g8b8_32_sRGB, store_scanline_a8r8g8b8_sRGB_float,
},
@ -1357,8 +1415,8 @@ static const format_info_t accessors[] =
FORMAT_INFO (b8g8r8),
/* 16bpp formats */
FORMAT_INFO16 (r5g6b5),
FORMAT_INFO16 (b5g6r5),
FORMAT_INFO (r5g6b5),
FORMAT_INFO (b5g6r5),
FORMAT_INFO (a1r5g5b5),
FORMAT_INFO (x1r5g5b5),
@ -1408,37 +1466,48 @@ static const format_info_t accessors[] =
FORMAT_INFO (g1),
/* Wide formats */
#ifndef PIXMAN_FB_ACCESSORS
{ PIXMAN_rgba_float,
NULL, fetch_scanline_rgbaf_float,
fetch_pixel_generic_lossy_32, fetch_pixel_rgbaf_float,
NULL, store_scanline_rgbaf_float },
{ PIXMAN_rgb_float,
NULL, fetch_scanline_rgbf_float,
fetch_pixel_generic_lossy_32, fetch_pixel_rgbf_float,
NULL, store_scanline_rgbf_float },
#endif
{ PIXMAN_a2r10g10b10,
NULL, NULL, fetch_scanline_a2r10g10b10_float,
NULL, fetch_scanline_a2r10g10b10_float,
fetch_pixel_generic_lossy_32, fetch_pixel_a2r10g10b10_float,
NULL, NULL, store_scanline_a2r10g10b10_float },
NULL, store_scanline_a2r10g10b10_float },
{ PIXMAN_x2r10g10b10,
NULL, NULL, fetch_scanline_x2r10g10b10_float,
NULL, fetch_scanline_x2r10g10b10_float,
fetch_pixel_generic_lossy_32, fetch_pixel_x2r10g10b10_float,
NULL, NULL, store_scanline_x2r10g10b10_float },
NULL, store_scanline_x2r10g10b10_float },
{ PIXMAN_a2b10g10r10,
NULL, NULL, fetch_scanline_a2b10g10r10_float,
NULL, fetch_scanline_a2b10g10r10_float,
fetch_pixel_generic_lossy_32, fetch_pixel_a2b10g10r10_float,
NULL, NULL, store_scanline_a2b10g10r10_float },
NULL, store_scanline_a2b10g10r10_float },
{ PIXMAN_x2b10g10r10,
NULL, NULL, fetch_scanline_x2b10g10r10_float,
NULL, fetch_scanline_x2b10g10r10_float,
fetch_pixel_generic_lossy_32, fetch_pixel_x2b10g10r10_float,
NULL, NULL, store_scanline_x2b10g10r10_float },
NULL, store_scanline_x2b10g10r10_float },
/* YUV formats */
{ PIXMAN_yuy2,
NULL, fetch_scanline_yuy2, fetch_scanline_generic_float,
fetch_scanline_yuy2, fetch_scanline_generic_float,
fetch_pixel_yuy2, fetch_pixel_generic_float,
NULL, NULL, NULL },
NULL, NULL },
{ PIXMAN_yv12,
NULL, fetch_scanline_yv12, fetch_scanline_generic_float,
fetch_scanline_yv12, fetch_scanline_generic_float,
fetch_pixel_yv12, fetch_pixel_generic_float,
NULL, NULL, NULL },
NULL, NULL },
{ PIXMAN_null },
};
@ -1452,12 +1521,10 @@ setup_accessors (bits_image_t *image)
{
if (info->format == image->format)
{
image->fetch_scanline_16 = info->fetch_scanline_16;
image->fetch_scanline_32 = info->fetch_scanline_32;
image->fetch_scanline_float = info->fetch_scanline_float;
image->fetch_pixel_32 = info->fetch_pixel_32;
image->fetch_pixel_float = info->fetch_pixel_float;
image->store_scanline_16 = info->store_scanline_16;
image->store_scanline_32 = info->store_scanline_32;
image->store_scanline_float = info->store_scanline_float;

Просмотреть файл

@ -0,0 +1,37 @@
/*
* Copyright © 2008 Mozilla Corporation
* Copyright © 2010 Nokia Corporation
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Mozilla Corporation not be used in
* advertising or publicity pertaining to distribution of the software without
* specific, written prior permission. Mozilla Corporation makes no
* representations about the suitability of this software for any purpose. It
* is provided "as is" without express or implied warranty.
*
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
* SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
*
* Author: Jeff Muizelaar (jeff@infidigm.net)
*
*/
/* Supplementary macro for setting function attributes */
.macro pixman_asm_function fname
.func fname
.global fname
#ifdef __ELF__
.hidden fname
.type fname, %function
#endif
fname:
.endm

Просмотреть файл

@ -266,13 +266,6 @@ FAST_NEAREST_MAINLOOP (cputype##_##name##_normal_##op, \
scaled_nearest_scanline_##cputype##_##name##_##op, \
src_type, dst_type, NORMAL)
/* Provide entries for the fast path table */
#define PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \
SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op, \
src_type, dst_type) \
void \
@ -318,9 +311,7 @@ FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \
/* Provide entries for the fast path table */
#define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func), \
SIMPLE_NEAREST_A8_MASK_FAST_PATH (op,s,d,func), \
SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
/*****************************************************************************/
@ -360,16 +351,16 @@ scaled_bilinear_scanline_##cputype##_##name##_##op ( \
\
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
NULL, src_type, uint32_t, dst_type, COVER, FLAG_NONE) \
src_type, uint32_t, dst_type, COVER, FLAG_NONE) \
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
NULL, src_type, uint32_t, dst_type, NONE, FLAG_NONE) \
src_type, uint32_t, dst_type, NONE, FLAG_NONE) \
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
NULL, src_type, uint32_t, dst_type, PAD, FLAG_NONE) \
src_type, uint32_t, dst_type, PAD, FLAG_NONE) \
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
NULL, src_type, uint32_t, dst_type, NORMAL, \
src_type, uint32_t, dst_type, NORMAL, \
FLAG_NONE)
@ -409,19 +400,19 @@ scaled_bilinear_scanline_##cputype##_##name##_##op ( \
\
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
NULL, src_type, uint8_t, dst_type, COVER, \
src_type, uint8_t, dst_type, COVER, \
FLAG_HAVE_NON_SOLID_MASK) \
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
NULL, src_type, uint8_t, dst_type, NONE, \
src_type, uint8_t, dst_type, NONE, \
FLAG_HAVE_NON_SOLID_MASK) \
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
NULL, src_type, uint8_t, dst_type, PAD, \
src_type, uint8_t, dst_type, PAD, \
FLAG_HAVE_NON_SOLID_MASK) \
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
NULL, src_type, uint8_t, dst_type, NORMAL, \
src_type, uint8_t, dst_type, NORMAL, \
FLAG_HAVE_NON_SOLID_MASK)

Просмотреть файл

@ -1,21 +0,0 @@
area pixman_msvc, code, readonly
export pixman_msvc_try_arm_simd_op
pixman_msvc_try_arm_simd_op
;; I don't think the msvc arm asm knows how to do SIMD insns
;; uqadd8 r3,r3,r3
dcd 0xe6633f93
mov pc,lr
endp
export pixman_msvc_try_arm_neon_op
pixman_msvc_try_arm_neon_op
;; I don't think the msvc arm asm knows how to do NEON insns
;; veor d0,d0,d0
dcd 0xf3000110
mov pc,lr
endp
end

Просмотреть файл

@ -65,23 +65,13 @@
.p2align 2
#include "pixman-private.h"
#include "pixman-arm-asm.h"
#include "pixman-arm-neon-asm.h"
/*
* Bilinear macros from pixman-arm-neon-asm.S
*/
/* Supplementary macro for setting function attributes */
.macro pixman_asm_function fname
.func fname
.global fname
#ifdef __ELF__
.hidden fname
.type fname, %function
#endif
fname:
.endm
/*
* Bilinear scaling support code which tries to provide pixel fetching, color
* format conversion, and interpolation as separate macros which can be used

Просмотреть файл

@ -50,6 +50,7 @@
.p2align 2
#include "pixman-private.h"
#include "pixman-arm-asm.h"
#include "pixman-arm-neon-asm.h"
/* Global configuration options and preferences */
@ -954,7 +955,6 @@ generate_composite_function \
*/
.macro pixman_composite_over_n_8_0565_init
add DUMMY, sp, #ARGS_STACK_OFFSET
.vsave {d8-d15}
vpush {d8-d15}
vld1.32 {d11[0]}, [DUMMY]
vdup.8 d8, d11[0]
@ -982,7 +982,6 @@ generate_composite_function \
.macro pixman_composite_over_8888_n_0565_init
add DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
.vsave {d8-d15}
vpush {d8-d15}
vld1.32 {d24[0]}, [DUMMY]
vdup.8 d24, d24[3]
@ -1449,7 +1448,6 @@ generate_composite_function \
.macro pixman_composite_over_n_8_8888_init
add DUMMY, sp, #ARGS_STACK_OFFSET
.vsave {d8-d15}
vpush {d8-d15}
vld1.32 {d11[0]}, [DUMMY]
vdup.8 d8, d11[0]
@ -1521,7 +1519,6 @@ generate_composite_function \
.macro pixman_composite_over_n_8_8_init
add DUMMY, sp, #ARGS_STACK_OFFSET
.vsave {d8-d15}
vpush {d8-d15}
vld1.32 {d8[0]}, [DUMMY]
vdup.8 d8, d8[3]
@ -1623,7 +1620,6 @@ generate_composite_function \
.macro pixman_composite_over_n_8888_8888_ca_init
add DUMMY, sp, #ARGS_STACK_OFFSET
.vsave {d8-d15}
vpush {d8-d15}
vld1.32 {d11[0]}, [DUMMY]
vdup.8 d8, d11[0]
@ -1793,7 +1789,6 @@ generate_composite_function \
.macro pixman_composite_over_n_8888_0565_ca_init
add DUMMY, sp, #ARGS_STACK_OFFSET
.vsave {d8-d15}
vpush {d8-d15}
vld1.32 {d11[0]}, [DUMMY]
vdup.8 d8, d11[0]
@ -1907,7 +1902,6 @@ generate_composite_function \
.macro pixman_composite_add_n_8_8_init
add DUMMY, sp, #ARGS_STACK_OFFSET
.vsave {d8-d15}
vpush {d8-d15}
vld1.32 {d11[0]}, [DUMMY]
vdup.8 d11, d11[3]
@ -2214,7 +2208,6 @@ generate_composite_function_single_scanline \
.macro pixman_composite_over_8888_n_8888_init
add DUMMY, sp, #48
.vsave {d8-d15}
vpush {d8-d15}
vld1.32 {d15[0]}, [DUMMY]
vdup.8 d15, d15[3]
@ -2587,7 +2580,6 @@ generate_composite_function \
.macro pixman_composite_over_0565_n_0565_init
add DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
.vsave {d8-d15}
vpush {d8-d15}
vld1.32 {d15[0]}, [DUMMY]
vdup.8 d15, d15[3]
@ -2839,17 +2831,6 @@ generate_composite_function_nearest_scanline \
/******************************************************************************/
/* Supplementary macro for setting function attributes */
.macro pixman_asm_function fname
.func fname
.global fname
#ifdef __ELF__
.hidden fname
.type fname, %function
#endif
fname:
.endm
/*
* Bilinear scaling support code which tries to provide pixel fetching, color
* format conversion, and interpolation as separate macros which can be used
@ -3141,16 +3122,13 @@ pixman_asm_function fname
TMP4 .req r9
STRIDE .req r2
.fnstart
mov ip, sp
.save {r4, r5, r6, r7, r8, r9}
push {r4, r5, r6, r7, r8, r9}
mov PF_OFFS, #prefetch_distance
ldmia ip, {WB, X, UX, WIDTH}
mul PF_OFFS, PF_OFFS, UX
.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
.vsave {d8-d15}
vpush {d8-d15}
.endif
@ -3244,7 +3222,6 @@ pixman_asm_function fname
.endif
pop {r4, r5, r6, r7, r8, r9}
bx lr
.fnend
.unreq OUT
.unreq TOP

Просмотреть файл

@ -385,7 +385,7 @@
* execute simultaneously with NEON and be completely shadowed by it. Thus
* we get no performance overhead at all (*). This looks like a very nice
* feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher,
* but still can implement some rather advanced prefetch logic in sofware
* but still can implement some rather advanced prefetch logic in software
* for almost zero cost!
*
* (*) The overhead of the prefetcher is visible when running some trivial
@ -631,16 +631,8 @@ local skip1
src_basereg_ = 0, \
mask_basereg_ = 24
.func fname
.global fname
/* For ELF format also set function visibility to hidden */
#ifdef __ELF__
.hidden fname
.type fname, %function
#endif
fname:
.fnstart
.save {r4-r12, lr}
pixman_asm_function fname
push {r4-r12, lr} /* save all registers */
/*
@ -818,7 +810,6 @@ fname:
init
.if regs_shortage
.save {r0, r1}
push {r0, r1}
.endif
subs H, H, #1
@ -904,7 +895,6 @@ fname:
.endif
cleanup
pop {r4-r12, pc} /* exit */
.fnend
.purgem fetch_src_pixblock
.purgem pixld_src
@ -949,15 +939,8 @@ fname:
src_basereg_ = 0, \
mask_basereg_ = 24
.func fname
.global fname
/* For ELF format also set function visibility to hidden */
#ifdef __ELF__
.hidden fname
.type fname, %function
#endif
fname:
.fnstart
pixman_asm_function fname
.set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
/*
* Make some macro arguments globally visible and accessible
@ -992,7 +975,6 @@ fname:
.endm
ldr UNIT_X, [sp]
.save {r4-r8, lr}
push {r4-r8, lr}
ldr SRC_WIDTH_FIXED, [sp, #(24 + 4)]
.if mask_bpp != 0
@ -1108,7 +1090,6 @@ fname:
.purgem fetch_src_pixblock
.purgem pixld_src
.fnend
.endfunc
.endm
@ -1135,7 +1116,6 @@ fname:
*/
.macro default_init_need_all_regs
.vsave {d8-d15}
vpush {d8-d15}
.endm

Просмотреть файл

@ -145,23 +145,6 @@ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC,
uint16_t, uint16_t)
PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, OVER,
uint32_t, uint32_t)
static force_inline void
pixman_scaled_bilinear_scanline_8888_8888_SRC (
uint32_t * dst,
const uint32_t * mask,
const uint32_t * src_top,
const uint32_t * src_bottom,
int32_t w,
int wt,
int wb,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
pixman_fixed_t max_vx,
pixman_bool_t zero_src)
{
pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (dst, src_top, src_bottom, wt, wb, vx, unit_x, w);
}
PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, ADD,
uint32_t, uint32_t)
@ -285,28 +268,6 @@ arm_neon_blt (pixman_implementation_t *imp,
}
}
static inline void op_bilinear_over_8888_0565(uint16_t *dst, const uint32_t *mask, const uint32_t *src, int width)
{
pixman_composite_over_8888_0565_asm_neon (width, 1, dst, 0, src, 0);
}
FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_cover_OVER,
pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
uint32_t, uint32_t, uint16_t,
COVER, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_pad_OVER,
pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
uint32_t, uint32_t, uint16_t,
PAD, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_none_OVER,
pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
uint32_t, uint32_t, uint16_t,
NONE, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_normal_OVER,
pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
uint32_t, uint32_t, uint16_t,
NORMAL, FLAG_NONE)
static const pixman_fast_path_t arm_neon_fast_paths[] =
{
PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565),
@ -401,21 +362,21 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8r8g8b8, neon_composite_out_reverse_8_8888),
PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8b8g8r8, neon_composite_out_reverse_8_8888),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888),
SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888),
SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565),
SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565),
SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565),
SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888),
SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888),
SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888),
/* Note: NONE repeat is not supported yet */
SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
@ -460,8 +421,6 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
{ PIXMAN_OP_NONE },
};

Просмотреть файл

@ -37,16 +37,7 @@
.altmacro
.p2align 2
/* Supplementary macro for setting function attributes */
.macro pixman_asm_function fname
.func fname
.global fname
#ifdef __ELF__
.hidden fname
.type fname, %function
#endif
fname:
.endm
#include "pixman-arm-asm.h"
/*
* Note: This code is only using armv5te instructions (not even armv6),

Просмотреть файл

@ -37,6 +37,7 @@
.altmacro
.p2align 2
#include "pixman-arm-asm.h"
#include "pixman-arm-simd-asm.h"
/* A head macro should do all processing which results in an output of up to
@ -303,6 +304,83 @@ generate_composite_function \
/******************************************************************************/
.macro src_x888_0565_init
/* Hold loop invariant in MASK */
ldr MASK, =0x001F001F
line_saved_regs STRIDE_S, ORIG_W
.endm
.macro src_x888_0565_1pixel s, d
and WK&d, MASK, WK&s, lsr #3 @ 00000000000rrrrr00000000000bbbbb
and STRIDE_S, WK&s, #0xFC00 @ 0000000000000000gggggg0000000000
orr WK&d, WK&d, WK&d, lsr #5 @ 00000000000-----rrrrr000000bbbbb
orr WK&d, WK&d, STRIDE_S, lsr #5 @ 00000000000-----rrrrrggggggbbbbb
/* Top 16 bits are discarded during the following STRH */
.endm
.macro src_x888_0565_2pixels slo, shi, d, tmp
and SCRATCH, WK&shi, #0xFC00 @ 0000000000000000GGGGGG0000000000
and WK&tmp, MASK, WK&shi, lsr #3 @ 00000000000RRRRR00000000000BBBBB
and WK&shi, MASK, WK&slo, lsr #3 @ 00000000000rrrrr00000000000bbbbb
orr WK&tmp, WK&tmp, WK&tmp, lsr #5 @ 00000000000-----RRRRR000000BBBBB
orr WK&tmp, WK&tmp, SCRATCH, lsr #5 @ 00000000000-----RRRRRGGGGGGBBBBB
and SCRATCH, WK&slo, #0xFC00 @ 0000000000000000gggggg0000000000
orr WK&shi, WK&shi, WK&shi, lsr #5 @ 00000000000-----rrrrr000000bbbbb
orr WK&shi, WK&shi, SCRATCH, lsr #5 @ 00000000000-----rrrrrggggggbbbbb
pkhbt WK&d, WK&shi, WK&tmp, lsl #16 @ RRRRRGGGGGGBBBBBrrrrrggggggbbbbb
.endm
.macro src_x888_0565_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
WK4 .req STRIDE_S
WK5 .req STRIDE_M
WK6 .req WK3
WK7 .req ORIG_W
.if numbytes == 16
pixld , 16, 4, SRC, 0
src_x888_0565_2pixels 4, 5, 0, 0
pixld , 8, 4, SRC, 0
src_x888_0565_2pixels 6, 7, 1, 1
pixld , 8, 6, SRC, 0
.else
pixld , numbytes*2, 4, SRC, 0
.endif
.endm
.macro src_x888_0565_process_tail cond, numbytes, firstreg
.if numbytes == 16
src_x888_0565_2pixels 4, 5, 2, 2
src_x888_0565_2pixels 6, 7, 3, 4
.elseif numbytes == 8
src_x888_0565_2pixels 4, 5, 1, 1
src_x888_0565_2pixels 6, 7, 2, 2
.elseif numbytes == 4
src_x888_0565_2pixels 4, 5, 1, 1
.else
src_x888_0565_1pixel 4, 1
.endif
.if numbytes == 16
pixst , numbytes, 0, DST
.else
pixst , numbytes, 1, DST
.endif
.unreq WK4
.unreq WK5
.unreq WK6
.unreq WK7
.endm
generate_composite_function \
pixman_composite_src_x888_0565_asm_armv6, 32, 0, 16, \
FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \
3, /* prefetch distance */ \
src_x888_0565_init, \
nop_macro, /* newline */ \
nop_macro, /* cleanup */ \
src_x888_0565_process_head, \
src_x888_0565_process_tail
/******************************************************************************/
.macro add_8_8_8pixels cond, dst1, dst2
uqadd8&cond WK&dst1, WK&dst1, MASK
uqadd8&cond WK&dst2, WK&dst2, STRIDE_M
@ -611,3 +689,491 @@ generate_composite_function \
/******************************************************************************/
.macro over_reverse_n_8888_init
ldr SRC, [sp, #ARGS_STACK_OFFSET]
ldr MASK, =0x00800080
/* Split source pixel into RB/AG parts */
uxtb16 STRIDE_S, SRC
uxtb16 STRIDE_M, SRC, ror #8
/* Set GE[3:0] to 0101 so SEL instructions do what we want */
uadd8 SCRATCH, MASK, MASK
line_saved_regs STRIDE_D, ORIG_W
.endm
.macro over_reverse_n_8888_newline
mov STRIDE_D, #0xFF
.endm
.macro over_reverse_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
pixld , numbytes, firstreg, DST, 0
.endm
.macro over_reverse_n_8888_1pixel d, is_only
teq WK&d, #0
beq 8f /* replace with source */
bics ORIG_W, STRIDE_D, WK&d, lsr #24
.if is_only == 1
beq 49f /* skip store */
.else
beq 9f /* write same value back */
.endif
mla SCRATCH, STRIDE_S, ORIG_W, MASK /* red/blue */
mla ORIG_W, STRIDE_M, ORIG_W, MASK /* alpha/green */
uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
uxtab16 ORIG_W, ORIG_W, ORIG_W, ror #8
mov SCRATCH, SCRATCH, ror #8
sel ORIG_W, SCRATCH, ORIG_W
uqadd8 WK&d, WK&d, ORIG_W
b 9f
8: mov WK&d, SRC
9:
.endm
.macro over_reverse_n_8888_tail numbytes, reg1, reg2, reg3, reg4
.if numbytes == 4
over_reverse_n_8888_1pixel reg1, 1
.else
and SCRATCH, WK&reg1, WK&reg2
.if numbytes == 16
and SCRATCH, SCRATCH, WK&reg3
and SCRATCH, SCRATCH, WK&reg4
.endif
mvns SCRATCH, SCRATCH, asr #24
beq 49f /* skip store if all opaque */
over_reverse_n_8888_1pixel reg1, 0
over_reverse_n_8888_1pixel reg2, 0
.if numbytes == 16
over_reverse_n_8888_1pixel reg3, 0
over_reverse_n_8888_1pixel reg4, 0
.endif
.endif
pixst , numbytes, reg1, DST
49:
.endm
.macro over_reverse_n_8888_process_tail cond, numbytes, firstreg
over_reverse_n_8888_tail numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3)
.endm
generate_composite_function \
pixman_composite_over_reverse_n_8888_asm_armv6, 0, 0, 32 \
FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \
3, /* prefetch distance */ \
over_reverse_n_8888_init, \
over_reverse_n_8888_newline, \
nop_macro, /* cleanup */ \
over_reverse_n_8888_process_head, \
over_reverse_n_8888_process_tail
/******************************************************************************/
.macro over_white_8888_8888_ca_init
HALF .req SRC
TMP0 .req STRIDE_D
TMP1 .req STRIDE_S
TMP2 .req STRIDE_M
TMP3 .req ORIG_W
WK4 .req SCRATCH
line_saved_regs STRIDE_D, STRIDE_M, ORIG_W
ldr SCRATCH, =0x800080
mov HALF, #0x80
/* Set GE[3:0] to 0101 so SEL instructions do what we want */
uadd8 SCRATCH, SCRATCH, SCRATCH
.set DST_PRELOAD_BIAS, 8
.endm
.macro over_white_8888_8888_ca_cleanup
.set DST_PRELOAD_BIAS, 0
.unreq HALF
.unreq TMP0
.unreq TMP1
.unreq TMP2
.unreq TMP3
.unreq WK4
.endm
.macro over_white_8888_8888_ca_combine m, d
uxtb16 TMP1, TMP0 /* rb_notmask */
uxtb16 TMP2, d /* rb_dest; 1 stall follows */
smlatt TMP3, TMP2, TMP1, HALF /* red */
smlabb TMP2, TMP2, TMP1, HALF /* blue */
uxtb16 TMP0, TMP0, ror #8 /* ag_notmask */
uxtb16 TMP1, d, ror #8 /* ag_dest; 1 stall follows */
smlatt d, TMP1, TMP0, HALF /* alpha */
smlabb TMP1, TMP1, TMP0, HALF /* green */
pkhbt TMP0, TMP2, TMP3, lsl #16 /* rb; 1 stall follows */
pkhbt TMP1, TMP1, d, lsl #16 /* ag */
uxtab16 TMP0, TMP0, TMP0, ror #8
uxtab16 TMP1, TMP1, TMP1, ror #8
mov TMP0, TMP0, ror #8
sel d, TMP0, TMP1
uqadd8 d, d, m /* d is a late result */
.endm
.macro over_white_8888_8888_ca_1pixel_head
pixld , 4, 1, MASK, 0
pixld , 4, 3, DST, 0
.endm
.macro over_white_8888_8888_ca_1pixel_tail
mvn TMP0, WK1
teq WK1, WK1, asr #32
bne 01f
bcc 03f
mov WK3, WK1
b 02f
01: over_white_8888_8888_ca_combine WK1, WK3
02: pixst , 4, 3, DST
03:
.endm
.macro over_white_8888_8888_ca_2pixels_head
pixld , 8, 1, MASK, 0
.endm
.macro over_white_8888_8888_ca_2pixels_tail
pixld , 8, 3, DST
mvn TMP0, WK1
teq WK1, WK1, asr #32
bne 01f
movcs WK3, WK1
bcs 02f
teq WK2, #0
beq 05f
b 02f
01: over_white_8888_8888_ca_combine WK1, WK3
02: mvn TMP0, WK2
teq WK2, WK2, asr #32
bne 03f
movcs WK4, WK2
b 04f
03: over_white_8888_8888_ca_combine WK2, WK4
04: pixst , 8, 3, DST
05:
.endm
.macro over_white_8888_8888_ca_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
.if numbytes == 4
over_white_8888_8888_ca_1pixel_head
.else
.if numbytes == 16
over_white_8888_8888_ca_2pixels_head
over_white_8888_8888_ca_2pixels_tail
.endif
over_white_8888_8888_ca_2pixels_head
.endif
.endm
.macro over_white_8888_8888_ca_process_tail cond, numbytes, firstreg
.if numbytes == 4
over_white_8888_8888_ca_1pixel_tail
.else
over_white_8888_8888_ca_2pixels_tail
.endif
.endm
generate_composite_function \
pixman_composite_over_white_8888_8888_ca_asm_armv6, 0, 32, 32 \
FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH \
2, /* prefetch distance */ \
over_white_8888_8888_ca_init, \
nop_macro, /* newline */ \
over_white_8888_8888_ca_cleanup, \
over_white_8888_8888_ca_process_head, \
over_white_8888_8888_ca_process_tail
.macro over_n_8888_8888_ca_init
/* Set up constants. RB_SRC and AG_SRC are in registers;
* RB_FLDS, A_SRC, and the two HALF values need to go on the
* stack (and the ful SRC value is already there) */
ldr SCRATCH, [sp, #ARGS_STACK_OFFSET]
mov WK0, #0x00FF0000
orr WK0, WK0, #0xFF /* RB_FLDS (0x00FF00FF) */
mov WK1, #0x80 /* HALF default value */
mov WK2, SCRATCH, lsr #24 /* A_SRC */
orr WK3, WK1, WK1, lsl #16 /* HALF alternate value (0x00800080) */
push {WK0-WK3}
.set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET+16
uxtb16 SRC, SCRATCH
uxtb16 STRIDE_S, SCRATCH, ror #8
/* Set GE[3:0] to 0101 so SEL instructions do what we want */
uadd8 SCRATCH, WK3, WK3
.unreq WK0
.unreq WK1
.unreq WK2
.unreq WK3
WK0 .req Y
WK1 .req STRIDE_D
RB_SRC .req SRC
AG_SRC .req STRIDE_S
WK2 .req STRIDE_M
RB_FLDS .req r8 /* the reloaded constants have to be at consecutive registers starting at an even one */
A_SRC .req r8
HALF .req r9
WK3 .req r10
WK4 .req r11
WK5 .req SCRATCH
WK6 .req ORIG_W
line_saved_regs Y, STRIDE_D, STRIDE_M, ORIG_W
.endm
.macro over_n_8888_8888_ca_cleanup
add sp, sp, #16
.set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET-16
.unreq WK0
.unreq WK1
.unreq RB_SRC
.unreq AG_SRC
.unreq WK2
.unreq RB_FLDS
.unreq A_SRC
.unreq HALF
.unreq WK3
.unreq WK4
.unreq WK5
.unreq WK6
WK0 .req r8
WK1 .req r9
WK2 .req r10
WK3 .req r11
.endm
.macro over_n_8888_8888_ca_1pixel_head
pixld , 4, 6, MASK, 0
pixld , 4, 0, DST, 0
.endm
.macro over_n_8888_8888_ca_1pixel_tail
ldrd A_SRC, HALF, [sp, #LOCALS_STACK_OFFSET+8]
uxtb16 WK1, WK6 /* rb_mask (first step of hard case placed in what would otherwise be a stall) */
teq WK6, WK6, asr #32 /* Zc if transparent, ZC if opaque */
bne 20f
bcc 40f
/* Mask is fully opaque (all channels) */
ldr WK6, [sp, #ARGS_STACK_OFFSET] /* get SRC back */
eors A_SRC, A_SRC, #0xFF
bne 10f
/* Source is also opaque - same as src_8888_8888 */
mov WK0, WK6
b 30f
10: /* Same as over_8888_8888 */
mul_8888_8 WK0, A_SRC, WK5, HALF
uqadd8 WK0, WK0, WK6
b 30f
20: /* No simplifications possible - do it the hard way */
uxtb16 WK2, WK6, ror #8 /* ag_mask */
mla WK3, WK1, A_SRC, HALF /* rb_mul; 2 cycles */
mla WK4, WK2, A_SRC, HALF /* ag_mul; 2 cycles */
ldrd RB_FLDS, HALF, [sp, #LOCALS_STACK_OFFSET]
uxtb16 WK5, WK0 /* rb_dest */
uxtab16 WK3, WK3, WK3, ror #8
uxtb16 WK6, WK0, ror #8 /* ag_dest */
uxtab16 WK4, WK4, WK4, ror #8
smlatt WK0, RB_SRC, WK1, HALF /* red1 */
smlabb WK1, RB_SRC, WK1, HALF /* blue1 */
bic WK3, RB_FLDS, WK3, lsr #8
bic WK4, RB_FLDS, WK4, lsr #8
pkhbt WK1, WK1, WK0, lsl #16 /* rb1 */
smlatt WK0, WK5, WK3, HALF /* red2 */
smlabb WK3, WK5, WK3, HALF /* blue2 */
uxtab16 WK1, WK1, WK1, ror #8
smlatt WK5, AG_SRC, WK2, HALF /* alpha1 */
pkhbt WK3, WK3, WK0, lsl #16 /* rb2 */
smlabb WK0, AG_SRC, WK2, HALF /* green1 */
smlatt WK2, WK6, WK4, HALF /* alpha2 */
smlabb WK4, WK6, WK4, HALF /* green2 */
pkhbt WK0, WK0, WK5, lsl #16 /* ag1 */
uxtab16 WK3, WK3, WK3, ror #8
pkhbt WK4, WK4, WK2, lsl #16 /* ag2 */
uxtab16 WK0, WK0, WK0, ror #8
uxtab16 WK4, WK4, WK4, ror #8
mov WK1, WK1, ror #8
mov WK3, WK3, ror #8
sel WK2, WK1, WK0 /* recombine source*mask */
sel WK1, WK3, WK4 /* recombine dest*(1-source_alpha*mask) */
uqadd8 WK0, WK1, WK2 /* followed by 1 stall */
30: /* The destination buffer is already in the L1 cache, so
* there's little point in amalgamating writes */
pixst , 4, 0, DST
40:
.endm
.macro over_n_8888_8888_ca_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
.rept (numbytes / 4) - 1
over_n_8888_8888_ca_1pixel_head
over_n_8888_8888_ca_1pixel_tail
.endr
over_n_8888_8888_ca_1pixel_head
.endm
.macro over_n_8888_8888_ca_process_tail cond, numbytes, firstreg
over_n_8888_8888_ca_1pixel_tail
.endm
pixman_asm_function pixman_composite_over_n_8888_8888_ca_asm_armv6
ldr ip, [sp]
cmp ip, #-1
beq pixman_composite_over_white_8888_8888_ca_asm_armv6
/* else drop through... */
.endfunc
generate_composite_function \
pixman_composite_over_n_8888_8888_ca_asm_armv6_helper, 0, 32, 32 \
FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH | FLAG_PROCESS_CORRUPTS_WK0 \
2, /* prefetch distance */ \
over_n_8888_8888_ca_init, \
nop_macro, /* newline */ \
over_n_8888_8888_ca_cleanup, \
over_n_8888_8888_ca_process_head, \
over_n_8888_8888_ca_process_tail
/******************************************************************************/
.macro in_reverse_8888_8888_init
/* Hold loop invariant in MASK */
ldr MASK, =0x00800080
/* Set GE[3:0] to 0101 so SEL instructions do what we want */
uadd8 SCRATCH, MASK, MASK
/* Offset the source pointer: we only need the alpha bytes */
add SRC, SRC, #3
line_saved_regs ORIG_W
.endm
.macro in_reverse_8888_8888_head numbytes, reg1, reg2, reg3
ldrb ORIG_W, [SRC], #4
.if numbytes >= 8
ldrb WK&reg1, [SRC], #4
.if numbytes == 16
ldrb WK&reg2, [SRC], #4
ldrb WK&reg3, [SRC], #4
.endif
.endif
add DST, DST, #numbytes
.endm
.macro in_reverse_8888_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
in_reverse_8888_8888_head numbytes, firstreg, %(firstreg+1), %(firstreg+2)
.endm
.macro in_reverse_8888_8888_1pixel s, d, offset, is_only
.if is_only != 1
movs s, ORIG_W
.if offset != 0
ldrb ORIG_W, [SRC, #offset]
.endif
beq 01f
teq STRIDE_M, #0xFF
beq 02f
.endif
uxtb16 SCRATCH, d /* rb_dest */
uxtb16 d, d, ror #8 /* ag_dest */
mla SCRATCH, SCRATCH, s, MASK
mla d, d, s, MASK
uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
uxtab16 d, d, d, ror #8
mov SCRATCH, SCRATCH, ror #8
sel d, SCRATCH, d
b 02f
.if offset == 0
48: /* Last mov d,#0 of the set - used as part of shortcut for
* source values all 0 */
.endif
01: mov d, #0
02:
.endm
.macro in_reverse_8888_8888_tail numbytes, reg1, reg2, reg3, reg4
.if numbytes == 4
teq ORIG_W, ORIG_W, asr #32
ldrne WK&reg1, [DST, #-4]
.elseif numbytes == 8
teq ORIG_W, WK&reg1
teqeq ORIG_W, ORIG_W, asr #32 /* all 0 or all -1? */
ldmnedb DST, {WK&reg1-WK&reg2}
.else
teq ORIG_W, WK&reg1
teqeq ORIG_W, WK&reg2
teqeq ORIG_W, WK&reg3
teqeq ORIG_W, ORIG_W, asr #32 /* all 0 or all -1? */
ldmnedb DST, {WK&reg1-WK&reg4}
.endif
cmnne DST, #0 /* clear C if NE */
bcs 49f /* no writes to dest if source all -1 */
beq 48f /* set dest to all 0 if source all 0 */
.if numbytes == 4
in_reverse_8888_8888_1pixel ORIG_W, WK&reg1, 0, 1
str WK&reg1, [DST, #-4]
.elseif numbytes == 8
in_reverse_8888_8888_1pixel STRIDE_M, WK&reg1, -4, 0
in_reverse_8888_8888_1pixel STRIDE_M, WK&reg2, 0, 0
stmdb DST, {WK&reg1-WK&reg2}
.else
in_reverse_8888_8888_1pixel STRIDE_M, WK&reg1, -12, 0
in_reverse_8888_8888_1pixel STRIDE_M, WK&reg2, -8, 0
in_reverse_8888_8888_1pixel STRIDE_M, WK&reg3, -4, 0
in_reverse_8888_8888_1pixel STRIDE_M, WK&reg4, 0, 0
stmdb DST, {WK&reg1-WK&reg4}
.endif
49:
.endm
.macro in_reverse_8888_8888_process_tail cond, numbytes, firstreg
in_reverse_8888_8888_tail numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3)
.endm
generate_composite_function \
pixman_composite_in_reverse_8888_8888_asm_armv6, 32, 0, 32 \
FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH | FLAG_NO_PRELOAD_DST \
2, /* prefetch distance */ \
in_reverse_8888_8888_init, \
nop_macro, /* newline */ \
nop_macro, /* cleanup */ \
in_reverse_8888_8888_process_head, \
in_reverse_8888_8888_process_tail
/******************************************************************************/
.macro over_n_8888_init
ldr SRC, [sp, #ARGS_STACK_OFFSET]
/* Hold loop invariant in MASK */
ldr MASK, =0x00800080
/* Hold multiplier for destination in STRIDE_M */
mov STRIDE_M, #255
sub STRIDE_M, STRIDE_M, SRC, lsr #24
/* Set GE[3:0] to 0101 so SEL instructions do what we want */
uadd8 SCRATCH, MASK, MASK
.endm
.macro over_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
pixld , numbytes, firstreg, DST, 0
.endm
.macro over_n_8888_1pixel dst
mul_8888_8 WK&dst, STRIDE_M, SCRATCH, MASK
uqadd8 WK&dst, WK&dst, SRC
.endm
.macro over_n_8888_process_tail cond, numbytes, firstreg
.set PROCESS_REG, firstreg
.rept numbytes / 4
over_n_8888_1pixel %(PROCESS_REG)
.set PROCESS_REG, PROCESS_REG+1
.endr
pixst , numbytes, firstreg, DST
.endm
generate_composite_function \
pixman_composite_over_n_8888_asm_armv6, 0, 0, 32 \
FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE \
2, /* prefetch distance */ \
over_n_8888_init, \
nop_macro, /* newline */ \
nop_macro, /* cleanup */ \
over_n_8888_process_head, \
over_n_8888_process_tail
/******************************************************************************/

Просмотреть файл

@ -76,6 +76,16 @@
.set FLAG_SPILL_LINE_VARS, 48
.set FLAG_PROCESS_CORRUPTS_SCRATCH, 0
.set FLAG_PROCESS_PRESERVES_SCRATCH, 64
.set FLAG_PROCESS_PRESERVES_WK0, 0
.set FLAG_PROCESS_CORRUPTS_WK0, 128 /* if possible, use the specified register(s) instead so WK0 can hold number of leading pixels */
.set FLAG_PRELOAD_DST, 0
.set FLAG_NO_PRELOAD_DST, 256
/*
* Number of bytes by which to adjust preload offset of destination
* buffer (allows preload instruction to be moved before the load(s))
*/
.set DST_PRELOAD_BIAS, 0
/*
* Offset into stack where mask and source pointer/stride can be accessed.
@ -86,6 +96,11 @@
.set ARGS_STACK_OFFSET, (9*4)
#endif
/*
* Offset into stack where space allocated during init macro can be accessed.
*/
.set LOCALS_STACK_OFFSET, 0
/*
* Constants for selecting preferable prefetch type.
*/
@ -196,8 +211,8 @@
PF add, SCRATCH, base, WK0, lsl #bpp_shift-dst_bpp_shift
PF and, SCRATCH, SCRATCH, #31
PF rsb, SCRATCH, SCRATCH, WK0, lsl #bpp_shift-dst_bpp_shift
PF sub, SCRATCH, SCRATCH, #1 /* so now ranges are -16..-1 / 0..31 / 32..63 */
PF movs, SCRATCH, SCRATCH, #32-6 /* so this sets NC / nc / Nc */
PF sub, SCRATCH, SCRATCH, #1 /* so now ranges are -16..-1 / 0..31 / 32..63 */
PF movs, SCRATCH, SCRATCH, lsl #32-6 /* so this sets NC / nc / Nc */
PF bcs, 61f
PF bpl, 60f
PF pld, [ptr, #32*(prefetch_distance+2)]
@ -359,23 +374,41 @@
.macro test_bits_1_0_ptr
.if (flags) & FLAG_PROCESS_CORRUPTS_WK0
movs SCRATCH, X, lsl #32-1 /* C,N = bits 1,0 of DST */
.else
movs SCRATCH, WK0, lsl #32-1 /* C,N = bits 1,0 of DST */
.endif
.endm
.macro test_bits_3_2_ptr
.if (flags) & FLAG_PROCESS_CORRUPTS_WK0
movs SCRATCH, X, lsl #32-3 /* C,N = bits 3, 2 of DST */
.else
movs SCRATCH, WK0, lsl #32-3 /* C,N = bits 3, 2 of DST */
.endif
.endm
.macro leading_15bytes process_head, process_tail
/* On entry, WK0 bits 0-3 = number of bytes until destination is 16-byte aligned */
.set DECREMENT_X, 1
.if (flags) & FLAG_PROCESS_CORRUPTS_WK0
.set DECREMENT_X, 0
sub X, X, WK0, lsr #dst_bpp_shift
str X, [sp, #LINE_SAVED_REG_COUNT*4]
mov X, WK0
.endif
/* Use unaligned loads in all cases for simplicity */
.if dst_w_bpp == 8
conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, 1
conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X
.elseif dst_w_bpp == 16
test_bits_1_0_ptr
conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, 1
conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X
.endif
conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X
.if (flags) & FLAG_PROCESS_CORRUPTS_WK0
ldr X, [sp, #LINE_SAVED_REG_COUNT*4]
.endif
conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, 1
.endm
.macro test_bits_3_2_pix
@ -414,7 +447,7 @@
preload_middle src_bpp, SRC, 0
preload_middle mask_bpp, MASK, 0
.endif
.if (dst_r_bpp > 0) && ((SUBBLOCK % 2) == 0)
.if (dst_r_bpp > 0) && ((SUBBLOCK % 2) == 0) && (((flags) & FLAG_NO_PRELOAD_DST) == 0)
/* Because we know that writes are 16-byte aligned, it's relatively easy to ensure that
* destination prefetches are 32-byte aligned. It's also the easiest channel to offset
* preloads for, to achieve staggered prefetches for multiple channels, because there are
@ -437,11 +470,11 @@
.if dst_r_bpp > 0
tst DST, #16
bne 111f
process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 16
process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 16 + DST_PRELOAD_BIAS
b 112f
111:
.endif
process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 0
process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 0 + DST_PRELOAD_BIAS
112:
/* Just before the final (prefetch_distance+1) 32-byte blocks, deal with final preloads */
.if (src_bpp*pix_per_block > 256) || (mask_bpp*pix_per_block > 256) || (dst_r_bpp*pix_per_block > 256)
@ -449,7 +482,9 @@
.endif
preload_trailing src_bpp, src_bpp_shift, SRC
preload_trailing mask_bpp, mask_bpp_shift, MASK
.if ((flags) & FLAG_NO_PRELOAD_DST) == 0
preload_trailing dst_r_bpp, dst_bpp_shift, DST
.endif
add X, X, #(prefetch_distance+2)*pix_per_block - 128/dst_w_bpp
/* The remainder of the line is handled identically to the medium case */
medium_case_inner_loop_and_trailing_pixels process_head, process_tail,, exit_label, unaligned_src, unaligned_mask
@ -561,13 +596,7 @@
process_tail, \
process_inner_loop
.func fname
.global fname
/* For ELF format also set function visibility to hidden */
#ifdef __ELF__
.hidden fname
.type fname, %function
#endif
pixman_asm_function fname
/*
* Make some macro arguments globally visible and accessible
@ -679,16 +708,12 @@
SCRATCH .req r12
ORIG_W .req r14 /* width (pixels) */
fname:
.fnstart
.save {r4-r11, lr}
push {r4-r11, lr} /* save all registers */
subs Y, Y, #1
blo 199f
#ifdef DEBUG_PARAMS
.pad #9*4
sub sp, sp, #9*4
#endif
@ -708,6 +733,13 @@ fname:
#endif
init
.if (flags) & FLAG_PROCESS_CORRUPTS_WK0
/* Reserve a word in which to store X during leading pixels */
sub sp, sp, #4
.set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET+4
.set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET+4
.endif
lsl STRIDE_D, #dst_bpp_shift /* stride in bytes */
sub STRIDE_D, STRIDE_D, X, lsl #dst_bpp_shift
@ -737,42 +769,49 @@ fname:
.if (flags) & FLAG_SPILL_LINE_VARS_WIDE
/* This is stmdb sp!,{} */
.word 0xE92D0000 | LINE_SAVED_REGS
.set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
.set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
.endif
151: /* New line */
newline
preload_leading_step1 src_bpp, WK1, SRC
preload_leading_step1 mask_bpp, WK2, MASK
.if ((flags) & FLAG_NO_PRELOAD_DST) == 0
preload_leading_step1 dst_r_bpp, WK3, DST
tst DST, #15
beq 154f
rsb WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */
.if (src_bpp != 0 && src_bpp != 2*dst_w_bpp) || (mask_bpp != 0 && mask_bpp != 2*dst_w_bpp)
PF and, WK0, WK0, #15
.endif
ands WK0, DST, #15
beq 154f
rsb WK0, WK0, #16 /* number of leading bytes until destination aligned */
preload_leading_step2 src_bpp, src_bpp_shift, WK1, SRC
preload_leading_step2 mask_bpp, mask_bpp_shift, WK2, MASK
.if ((flags) & FLAG_NO_PRELOAD_DST) == 0
preload_leading_step2 dst_r_bpp, dst_bpp_shift, WK3, DST
.endif
leading_15bytes process_head, process_tail
154: /* Destination now 16-byte aligned; we have at least one prefetch on each channel as well as at least one 16-byte output block */
.if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
.if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
and SCRATCH, SRC, #31
rsb SCRATCH, SCRATCH, #32*prefetch_distance
.elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
.elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
and SCRATCH, MASK, #31
rsb SCRATCH, SCRATCH, #32*prefetch_distance
.endif
.ifc "process_inner_loop",""
.endif
.ifc "process_inner_loop",""
switch_on_alignment wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, wide_case_inner_loop, 157f
.else
.else
switch_on_alignment wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, process_inner_loop, 157f
.endif
.endif
157: /* Check for another line */
end_of_line 1, %((flags) & FLAG_SPILL_LINE_VARS_WIDE), 151b
.if (flags) & FLAG_SPILL_LINE_VARS_WIDE
.set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
.set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
.endif
.endif
.ltorg
@ -782,17 +821,21 @@ fname:
.if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE
/* This is stmdb sp!,{} */
.word 0xE92D0000 | LINE_SAVED_REGS
.set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
.set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
.endif
161: /* New line */
newline
preload_line 0, src_bpp, src_bpp_shift, SRC /* in: X, corrupts: WK0-WK1 */
preload_line 0, mask_bpp, mask_bpp_shift, MASK
.if ((flags) & FLAG_NO_PRELOAD_DST) == 0
preload_line 0, dst_r_bpp, dst_bpp_shift, DST
.endif
sub X, X, #128/dst_w_bpp /* simplifies inner loop termination */
tst DST, #15
ands WK0, DST, #15
beq 164f
rsb WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */
rsb WK0, WK0, #16 /* number of leading bytes until destination aligned */
leading_15bytes process_head, process_tail
@ -816,7 +859,9 @@ fname:
newline
preload_line 1, src_bpp, src_bpp_shift, SRC /* in: X, corrupts: WK0-WK1 */
preload_line 1, mask_bpp, mask_bpp_shift, MASK
.if ((flags) & FLAG_NO_PRELOAD_DST) == 0
preload_line 1, dst_r_bpp, dst_bpp_shift, DST
.endif
.if dst_w_bpp == 8
tst DST, #3
@ -847,12 +892,22 @@ fname:
177: /* Check for another line */
end_of_line %(dst_w_bpp < 32), %((flags) & FLAG_SPILL_LINE_VARS_NON_WIDE), 171b, last_one
.if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE
.set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
.set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
.endif
197:
.if (flags) & FLAG_SPILL_LINE_VARS
add sp, sp, #LINE_SAVED_REG_COUNT*4
.endif
198:
.if (flags) & FLAG_PROCESS_CORRUPTS_WK0
.set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET-4
.set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET-4
add sp, sp, #4
.endif
cleanup
#ifdef DEBUG_PARAMS
@ -860,7 +915,6 @@ fname:
#endif
199:
pop {r4-r11, pc} /* exit */
.fnend
.ltorg

Просмотреть файл

@ -41,11 +41,20 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_8_8,
uint8_t, 1, uint8_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_0565_8888,
uint16_t, 1, uint32_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_x888_0565,
uint32_t, 1, uint16_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
uint8_t, 1, uint8_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
uint32_t, 1, uint32_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, in_reverse_8888_8888,
uint32_t, 1, uint32_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, armv6, over_n_8888,
uint32_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, armv6, over_reverse_n_8888,
uint32_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
uint32_t, 1, uint32_t, 1)
@ -53,6 +62,9 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888,
uint8_t, 1, uint32_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8888_8888_ca,
uint32_t, 1, uint32_t, 1)
PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC,
uint16_t, uint16_t)
PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC,
@ -216,6 +228,11 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, armv6_composite_src_0565_8888),
PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, armv6_composite_src_0565_8888),
PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, armv6_composite_src_x888_0565),
PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, armv6_composite_src_x888_0565),
PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, armv6_composite_src_x888_0565),
PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, armv6_composite_src_x888_0565),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888),
@ -225,6 +242,13 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, armv6_composite_over_n_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, armv6_composite_over_n_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, armv6_composite_over_n_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, armv6_composite_over_n_8888),
PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, armv6_composite_over_reverse_n_8888),
PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, armv6_composite_over_reverse_n_8888),
PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
@ -232,15 +256,25 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565),
PIXMAN_STD_FAST_PATH (IN_REVERSE, a8r8g8b8, null, a8r8g8b8, armv6_composite_in_reverse_8888_8888),
PIXMAN_STD_FAST_PATH (IN_REVERSE, a8r8g8b8, null, x8r8g8b8, armv6_composite_in_reverse_8888_8888),
PIXMAN_STD_FAST_PATH (IN_REVERSE, a8b8g8r8, null, a8b8g8r8, armv6_composite_in_reverse_8888_8888),
PIXMAN_STD_FAST_PATH (IN_REVERSE, a8b8g8r8, null, x8b8g8r8, armv6_composite_in_reverse_8888_8888),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, armv6_composite_over_n_8888_8888_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, armv6_composite_over_n_8888_8888_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, armv6_composite_over_n_8888_8888_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, armv6_composite_over_n_8888_8888_ca),
SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565),
SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565),
SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888),
SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888),
SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888),
SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888),
SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888),
SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888),
{ PIXMAN_OP_NONE },
};

Просмотреть файл

@ -96,33 +96,29 @@ detect_cpu_features (void)
#elif defined(__ANDROID__) || defined(ANDROID) /* Android */
#include <cpu-features.h>
static arm_cpu_features_t
detect_cpu_features (void)
{
arm_cpu_features_t features = 0;
char buf[1024];
char* pos;
const char* ver_token = "CPU architecture: ";
FILE* f = fopen("/proc/cpuinfo", "r");
if (!f) {
return features;
}
AndroidCpuFamily cpu_family;
uint64_t cpu_features;
fread(buf, sizeof(char), sizeof(buf), f);
fclose(f);
pos = strstr(buf, ver_token);
if (pos) {
char vchar = *(pos + strlen(ver_token));
if (vchar >= '0' && vchar <= '9') {
int ver = vchar - '0';
if (ver >= 7)
features |= ARM_V7;
}
cpu_family = android_getCpuFamily();
cpu_features = android_getCpuFeatures();
if (cpu_family == ANDROID_CPU_FAMILY_ARM)
{
if (cpu_features & ANDROID_CPU_ARM_FEATURE_ARMv7)
features |= ARM_V7;
if (cpu_features & ANDROID_CPU_ARM_FEATURE_VFPv3)
features |= ARM_VFP;
if (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON)
features |= ARM_NEON;
}
if (strstr(buf, "neon") != NULL)
features |= ARM_NEON;
if (strstr(buf, "vfp") != NULL)
features |= ARM_VFP;
return features;
}
@ -180,6 +176,31 @@ detect_cpu_features (void)
return features;
}
#elif defined (_3DS) /* 3DS homebrew (devkitARM) */
static arm_cpu_features_t
detect_cpu_features (void)
{
arm_cpu_features_t features = 0;
features |= ARM_V6;
return features;
}
#elif defined (PSP2) || defined (__SWITCH__)
/* Vita (VitaSDK) or Switch (devkitA64) homebrew */
static arm_cpu_features_t
detect_cpu_features (void)
{
arm_cpu_features_t features = 0;
features |= ARM_NEON;
return features;
}
#else /* Unknown */
static arm_cpu_features_t

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -42,8 +42,6 @@
#define force_inline __inline__
#endif
#define IS_ZERO(f) (-FLT_MIN < (f) && (f) < FLT_MIN)
typedef float (* combine_channel_t) (float sa, float s, float da, float d);
static force_inline void
@ -203,56 +201,56 @@ get_factor (combine_factor_t factor, float sa, float da)
break;
case SA_OVER_DA:
if (IS_ZERO (da))
if (FLOAT_IS_ZERO (da))
f = 1.0f;
else
f = CLAMP (sa / da);
break;
case DA_OVER_SA:
if (IS_ZERO (sa))
if (FLOAT_IS_ZERO (sa))
f = 1.0f;
else
f = CLAMP (da / sa);
break;
case INV_SA_OVER_DA:
if (IS_ZERO (da))
if (FLOAT_IS_ZERO (da))
f = 1.0f;
else
f = CLAMP ((1.0f - sa) / da);
break;
case INV_DA_OVER_SA:
if (IS_ZERO (sa))
if (FLOAT_IS_ZERO (sa))
f = 1.0f;
else
f = CLAMP ((1.0f - da) / sa);
break;
case ONE_MINUS_SA_OVER_DA:
if (IS_ZERO (da))
if (FLOAT_IS_ZERO (da))
f = 0.0f;
else
f = CLAMP (1.0f - sa / da);
break;
case ONE_MINUS_DA_OVER_SA:
if (IS_ZERO (sa))
if (FLOAT_IS_ZERO (sa))
f = 0.0f;
else
f = CLAMP (1.0f - da / sa);
break;
case ONE_MINUS_INV_DA_OVER_SA:
if (IS_ZERO (sa))
if (FLOAT_IS_ZERO (sa))
f = 0.0f;
else
f = CLAMP (1.0f - (1.0f - da) / sa);
break;
case ONE_MINUS_INV_SA_OVER_DA:
if (IS_ZERO (da))
if (FLOAT_IS_ZERO (da))
f = 0.0f;
else
f = CLAMP (1.0f - (1.0f - sa) / da);
@ -321,23 +319,44 @@ MAKE_PD_COMBINERS (conjoint_xor, ONE_MINUS_DA_OVER_SA, ONE_MINUS_SA_OVER_DA)
*
* The following blend modes have been taken from the PDF ISO 32000
* specification, which at this point in time is available from
* http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf
* The relevant chapters are 11.3.5 and 11.3.6.
*
* http://www.adobe.com/devnet/pdf/pdf_reference.html
*
* The specific documents of interest are the PDF spec itself:
*
* http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/PDF32000_2008.pdf
*
* chapters 11.3.5 and 11.3.6 and a later supplement for Adobe Acrobat
* 9.1 and Reader 9.1:
*
* http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/adobe_supplement_iso32000_1.pdf
*
* that clarifies the specifications for blend modes ColorDodge and
* ColorBurn.
*
* The formula for computing the final pixel color given in 11.3.6 is:
* αr × Cr = (1 αs) × αb × Cb + (1 αb) × αs × Cs + αb × αs × B(Cb, Cs)
* with B() being the blend function.
* Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs
*
* These blend modes should match the SVG filter draft specification, as
* it has been designed to mirror ISO 32000. Note that at the current point
* no released draft exists that shows this, as the formulas have not been
* updated yet after the release of ISO 32000.
* αr × Cr = (1 αs) × αb × Cb + (1 αb) × αs × Cs + αb × αs × B(Cb, Cs)
*
* The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and
* PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an
* argument. Note that this implementation operates on premultiplied colors,
* while the PDF specification does not. Therefore the code uses the formula
* ar.Cra = (1 as) . Dca + (1 ad) . Sca + B(Dca, ad, Sca, as)
* with B() is the blend function. When B(Cb, Cs) = Cs, this formula
* reduces to the regular OVER operator.
*
* Cs and Cb are not premultiplied, so in our implementation we instead
* use:
*
* cr = (1 αs) × cb + (1 αb) × cs + αb × αs × B (cb/αb, cs/αs)
*
* where cr, cs, and cb are premultiplied colors, and where the
*
* αb × αs × B(cb/αb, cs/αs)
*
* part is first arithmetically simplified under the assumption that αb
* and αs are not 0, and then updated to produce a meaningful result when
* they are.
*
* For all the blend mode operators, the alpha channel is given by
*
* αr = αs + αb + αb × αs
*/
#define MAKE_SEPARABLE_PDF_COMBINERS(name) \
@ -357,18 +376,55 @@ MAKE_PD_COMBINERS (conjoint_xor, ONE_MINUS_DA_OVER_SA, ONE_MINUS_SA_OVER_DA)
\
MAKE_COMBINERS (name, combine_ ## name ## _a, combine_ ## name ## _c)
/*
* Multiply
*
* ad * as * B(d / ad, s / as)
* = ad * as * d/ad * s/as
* = d * s
*
*/
static force_inline float
blend_multiply (float sa, float s, float da, float d)
{
return d * s;
}
/*
* Screen
*
* ad * as * B(d/ad, s/as)
* = ad * as * (d/ad + s/as - s/as * d/ad)
* = ad * s + as * d - s * d
*/
static force_inline float
blend_screen (float sa, float s, float da, float d)
{
return d * sa + s * da - s * d;
}
/*
* Overlay
*
* ad * as * B(d/ad, s/as)
* = ad * as * Hardlight (s, d)
* = if (d / ad < 0.5)
* as * ad * Multiply (s/as, 2 * d/ad)
* else
* as * ad * Screen (s/as, 2 * d / ad - 1)
* = if (d < 0.5 * ad)
* as * ad * s/as * 2 * d /ad
* else
* as * ad * (s/as + 2 * d / ad - 1 - s / as * (2 * d / ad - 1))
* = if (2 * d < ad)
* 2 * s * d
* else
* ad * s + 2 * as * d - as * ad - ad * s * (2 * d / ad - 1)
* = if (2 * d < ad)
* 2 * s * d
* else
* as * ad - 2 * (ad - d) * (as - s)
*/
static force_inline float
blend_overlay (float sa, float s, float da, float d)
{
@ -378,6 +434,13 @@ blend_overlay (float sa, float s, float da, float d)
return sa * da - 2 * (da - d) * (sa - s);
}
/*
* Darken
*
* ad * as * B(d/ad, s/as)
* = ad * as * MIN(d/ad, s/as)
* = MIN (as * d, ad * s)
*/
static force_inline float
blend_darken (float sa, float s, float da, float d)
{
@ -390,6 +453,13 @@ blend_darken (float sa, float s, float da, float d)
return s;
}
/*
* Lighten
*
* ad * as * B(d/ad, s/as)
* = ad * as * MAX(d/ad, s/as)
* = MAX (as * d, ad * s)
*/
static force_inline float
blend_lighten (float sa, float s, float da, float d)
{
@ -402,19 +472,57 @@ blend_lighten (float sa, float s, float da, float d)
return d;
}
/*
* Color dodge
*
* ad * as * B(d/ad, s/as)
* = if d/ad = 0
* ad * as * 0
* else if (d/ad >= (1 - s/as)
* ad * as * 1
* else
* ad * as * ((d/ad) / (1 - s/as))
* = if d = 0
* 0
* elif as * d >= ad * (as - s)
* ad * as
* else
* as * (as * d / (as - s))
*
*/
static force_inline float
blend_color_dodge (float sa, float s, float da, float d)
{
if (IS_ZERO (d))
if (FLOAT_IS_ZERO (d))
return 0.0f;
else if (d * sa >= sa * da - s * da)
return sa * da;
else if (IS_ZERO (sa - s))
else if (FLOAT_IS_ZERO (sa - s))
return sa * da;
else
return sa * sa * d / (sa - s);
}
/*
* Color burn
*
* We modify the first clause "if d = 1" to "if d >= 1" since with
* premultiplied colors d > 1 can actually happen.
*
* ad * as * B(d/ad, s/as)
* = if d/ad >= 1
* ad * as * 1
* elif (1 - d/ad) >= s/as
* ad * as * 0
* else
* ad * as * (1 - ((1 - d/ad) / (s/as)))
* = if d >= ad
* ad * as
* elif as * ad - as * d >= ad * s
* 0
* else
* ad * as - as * as * (ad - d) / s
*/
static force_inline float
blend_color_burn (float sa, float s, float da, float d)
{
@ -422,12 +530,29 @@ blend_color_burn (float sa, float s, float da, float d)
return sa * da;
else if (sa * (da - d) >= s * da)
return 0.0f;
else if (IS_ZERO (s))
else if (FLOAT_IS_ZERO (s))
return 0.0f;
else
return sa * (da - sa * (da - d) / s);
}
/*
* Hard light
*
* ad * as * B(d/ad, s/as)
* = if (s/as <= 0.5)
* ad * as * Multiply (d/ad, 2 * s/as)
* else
* ad * as * Screen (d/ad, 2 * s/as - 1)
* = if 2 * s <= as
* ad * as * d/ad * 2 * s / as
* else
* ad * as * (d/ad + (2 * s/as - 1) + d/ad * (2 * s/as - 1))
* = if 2 * s <= as
* 2 * s * d
* else
* as * ad - 2 * (ad - d) * (as - s)
*/
static force_inline float
blend_hard_light (float sa, float s, float da, float d)
{
@ -437,21 +562,38 @@ blend_hard_light (float sa, float s, float da, float d)
return sa * da - 2 * (da - d) * (sa - s);
}
/*
* Soft light
*
* ad * as * B(d/ad, s/as)
* = if (s/as <= 0.5)
* ad * as * (d/ad - (1 - 2 * s/as) * d/ad * (1 - d/ad))
* else if (d/ad <= 0.25)
* ad * as * (d/ad + (2 * s/as - 1) * ((((16 * d/ad - 12) * d/ad + 4) * d/ad) - d/ad))
* else
* ad * as * (d/ad + (2 * s/as - 1) * sqrt (d/ad))
* = if (2 * s <= as)
* d * as - d * (ad - d) * (as - 2 * s) / ad;
* else if (4 * d <= ad)
* (2 * s - as) * d * ((16 * d / ad - 12) * d / ad + 3);
* else
* d * as + (sqrt (d * ad) - d) * (2 * s - as);
*/
static force_inline float
blend_soft_light (float sa, float s, float da, float d)
{
if (2 * s < sa)
if (2 * s <= sa)
{
if (IS_ZERO (da))
if (FLOAT_IS_ZERO (da))
return d * sa;
else
return d * sa - d * (da - d) * (sa - 2 * s) / da;
}
else
{
if (IS_ZERO (da))
if (FLOAT_IS_ZERO (da))
{
return 0.0f;
return d * sa;
}
else
{
@ -463,6 +605,20 @@ blend_soft_light (float sa, float s, float da, float d)
}
}
/*
* Difference
*
* ad * as * B(s/as, d/ad)
* = ad * as * abs (s/as - d/ad)
* = if (s/as <= d/ad)
* ad * as * (d/ad - s/as)
* else
* ad * as * (s/as - d/ad)
* = if (ad * s <= as * d)
* as * d - ad * s
* else
* ad * s - as * d
*/
static force_inline float
blend_difference (float sa, float s, float da, float d)
{
@ -475,6 +631,13 @@ blend_difference (float sa, float s, float da, float d)
return sda - dsa;
}
/*
* Exclusion
*
* ad * as * B(s/as, d/ad)
* = ad * as * (d/ad + s/as - 2 * d/ad * s/as)
* = as * d + ad * s - 2 * s * d
*/
static force_inline float
blend_exclusion (float sa, float s, float da, float d)
{
@ -494,116 +657,79 @@ MAKE_SEPARABLE_PDF_COMBINERS (difference)
MAKE_SEPARABLE_PDF_COMBINERS (exclusion)
/*
* PDF nonseperable blend modes.
*
* These are implemented using the following functions to operate in Hsl
* space, with Cmax, Cmid, Cmin referring to the max, mid and min value
* of the red, green and blue components.
* PDF nonseperable blend modes are implemented using the following functions
* to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid
* and min value of the red, green and blue components.
*
* LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue
*
* clip_color (C):
* l = LUM (C)
* min = Cmin
* max = Cmax
* if n < 0.0
* C = l + (((C l) × l) (l min))
* if x > 1.0
* C = l + (((C l) × (1 l)) (max l))
* return C
* l = LUM (C)
* min = Cmin
* max = Cmax
* if n < 0.0
* C = l + (((C l) × l) (l min))
* if x > 1.0
* C = l + (((C l) × (1 l) ) (max l))
* return C
*
* set_lum (C, l):
* d = l LUM (C)
* C += d
* return clip_color (C)
* d = l LUM (C)
* C += d
* return clip_color (C)
*
* SAT (C) = CH_MAX (C) - CH_MIN (C)
*
* set_sat (C, s):
* if Cmax > Cmin
* Cmid = ( ( ( Cmid Cmin ) × s ) ( Cmax Cmin ) )
* Cmax = s
* else
* Cmid = Cmax = 0.0
* Cmin = 0.0
* return C
* if Cmax > Cmin
* Cmid = ( ( ( Cmid Cmin ) × s ) ( Cmax Cmin ) )
* Cmax = s
* else
* Cmid = Cmax = 0.0
* Cmin = 0.0
* return C
*/
/* For premultiplied colors, we need to know what happens when C is
* multiplied by a real number. LUM and SAT are linear:
*
* LUM (r × C) = r × LUM (C) SAT (r × C) = r × SAT (C)
* LUM (r × C) = r × LUM (C) SAT (r * C) = r * SAT (C)
*
* If we extend clip_color with an extra argument a and change
*
* if x >= 1.0
* if x >= 1.0
*
* into
*
* if x >= a
* if x >= a
*
* then clip_color is also linear:
*
* r * clip_color (C, a) = clip_color (r_c, ra);
* r * clip_color (C, a) = clip_color (r * C, r * a);
*
* for positive r.
*
* Similarly, we can extend set_lum with an extra argument that is just passed
* on to clip_color:
*
* r × set_lum ( C, l, a)
* r * set_lum (C, l, a)
*
* = r × clip_color ( C + l - LUM (C), a)
* = r × clip_color (C + l - LUM (C), a)
*
* = clip_color ( r * C + r × l - LUM (r × C), r * a)
* = clip_color (r * C + r × l - r * LUM (C), r * a)
*
* = set_lum ( r * C, r * l, r * a)
* = set_lum (r * C, r * l, r * a)
*
* Finally, set_sat:
*
* r * set_sat (C, s) = set_sat (x * C, r * s)
* r * set_sat (C, s) = set_sat (x * C, r * s)
*
* The above holds for all non-zero x because they x'es in the fraction for
* The above holds for all non-zero x, because the x'es in the fraction for
* C_mid cancel out. Specifically, it holds for x = r:
*
* r * set_sat (C, s) = set_sat (r_c, rs)
*
*
*
*
* So, for the non-separable PDF blend modes, we have (using s, d for
* non-premultiplied colors, and S, D for premultiplied:
*
* Color:
*
* a_s * a_d * B(s, d)
* = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1)
* = set_lum (S * a_d, a_s * LUM (D), a_s * a_d)
*
*
* Luminosity:
*
* a_s * a_d * B(s, d)
* = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1)
* = set_lum (a_s * D, a_d * LUM(S), a_s * a_d)
*
*
* Saturation:
*
* a_s * a_d * B(s, d)
* = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1)
* = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)),
* a_s * LUM (D), a_s * a_d)
* = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d))
*
* Hue:
*
* a_s * a_d * B(s, d)
* = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1)
* = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d)
* r * set_sat (C, s) = set_sat (r * C, r * s)
*
*/
typedef struct
{
float r;
@ -658,7 +784,7 @@ clip_color (rgb_t *color, float a)
if (n < 0.0f)
{
t = l - n;
if (IS_ZERO (t))
if (FLOAT_IS_ZERO (t))
{
color->r = 0.0f;
color->g = 0.0f;
@ -674,7 +800,7 @@ clip_color (rgb_t *color, float a)
if (x > a)
{
t = x - l;
if (IS_ZERO (t))
if (FLOAT_IS_ZERO (t))
{
color->r = a;
color->g = a;
@ -758,7 +884,7 @@ set_sat (rgb_t *src, float sat)
t = *max - *min;
if (IS_ZERO (t))
if (FLOAT_IS_ZERO (t))
{
*mid = *max = 0.0f;
}
@ -771,9 +897,12 @@ set_sat (rgb_t *src, float sat)
*min = 0.0f;
}
/*
* Hue:
* B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb))
/* Hue:
*
* as * ad * B(s/as, d/as)
* = as * ad * set_lum (set_sat (s/as, SAT (d/ad)), LUM (d/ad), 1)
* = set_lum (set_sat (ad * s, as * SAT (d)), as * LUM (d), as * ad)
*
*/
static force_inline void
blend_hsl_hue (rgb_t *res,
@ -788,9 +917,14 @@ blend_hsl_hue (rgb_t *res,
set_lum (res, sa * da, get_lum (dest) * sa);
}
/*
* Saturation:
* B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb))
/*
* Saturation
*
* as * ad * B(s/as, d/ad)
* = as * ad * set_lum (set_sat (d/ad, SAT (s/as)), LUM (d/ad), 1)
* = set_lum (as * ad * set_sat (d/ad, SAT (s/as)),
* as * LUM (d), as * ad)
* = set_lum (set_sat (as * d, ad * SAT (s), as * LUM (d), as * ad))
*/
static force_inline void
blend_hsl_saturation (rgb_t *res,
@ -805,9 +939,12 @@ blend_hsl_saturation (rgb_t *res,
set_lum (res, sa * da, get_lum (dest) * sa);
}
/*
* Color:
* B(Cb, Cs) = set_lum (Cs, LUM (Cb))
/*
* Color
*
* as * ad * B(s/as, d/as)
* = as * ad * set_lum (s/as, LUM (d/ad), 1)
* = set_lum (s * ad, as * LUM (d), as * ad)
*/
static force_inline void
blend_hsl_color (rgb_t *res,
@ -822,8 +959,11 @@ blend_hsl_color (rgb_t *res,
}
/*
* Luminosity:
* B(Cb, Cs) = set_lum (Cb, LUM (Cs))
* Luminosity
*
* as * ad * B(s/as, d/ad)
* = as * ad * set_lum (d/ad, LUM (s/as), 1)
* = set_lum (as * d, ad * LUM (s), as * ad)
*/
static force_inline void
blend_hsl_luminosity (rgb_t *res,

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,226 +0,0 @@
#define COMPONENT_SIZE
#define MASK
#define ONE_HALF
#define A_SHIFT
#define R_SHIFT
#define G_SHIFT
#define A_MASK
#define R_MASK
#define G_MASK
#define RB_MASK
#define AG_MASK
#define RB_ONE_HALF
#define RB_MASK_PLUS_ONE
#define ALPHA_c(x) ((x) >> A_SHIFT)
#define RED_c(x) (((x) >> R_SHIFT) & MASK)
#define GREEN_c(x) (((x) >> G_SHIFT) & MASK)
#define BLUE_c(x) ((x) & MASK)
/*
* Helper macros.
*/
#define MUL_UNc(a, b, t) \
((t) = (a) * (comp2_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT ))
#define DIV_UNc(a, b) \
(((comp2_t) (a) * MASK + ((b) / 2)) / (b))
#define ADD_UNc(x, y, t) \
((t) = (x) + (y), \
(comp4_t) (comp1_t) ((t) | (0 - ((t) >> G_SHIFT))))
#define DIV_ONE_UNc(x) \
(((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT)
/*
* The methods below use some tricks to be able to do two color
* components at the same time.
*/
/*
* x_rb = (x_rb * a) / 255
*/
#define UNc_rb_MUL_UNc(x, a, t) \
do \
{ \
t = ((x) & RB_MASK) * (a); \
t += RB_ONE_HALF; \
x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \
x &= RB_MASK; \
} while (0)
/*
* x_rb = min (x_rb + y_rb, 255)
*/
#define UNc_rb_ADD_UNc_rb(x, y, t) \
do \
{ \
t = ((x) + (y)); \
t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \
x = (t & RB_MASK); \
} while (0)
/*
* x_rb = (x_rb * a_rb) / 255
*/
#define UNc_rb_MUL_UNc_rb(x, a, t) \
do \
{ \
t = (x & MASK) * (a & MASK); \
t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \
t += RB_ONE_HALF; \
t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \
x = t & RB_MASK; \
} while (0)
/*
* x_c = (x_c * a) / 255
*/
#define UNcx4_MUL_UNc(x, a) \
do \
{ \
comp4_t r1__, r2__, t__; \
\
r1__ = (x); \
UNc_rb_MUL_UNc (r1__, (a), t__); \
\
r2__ = (x) >> G_SHIFT; \
UNc_rb_MUL_UNc (r2__, (a), t__); \
\
(x) = r1__ | (r2__ << G_SHIFT); \
} while (0)
/*
* x_c = (x_c * a) / 255 + y_c
*/
#define UNcx4_MUL_UNc_ADD_UNcx4(x, a, y) \
do \
{ \
comp4_t r1__, r2__, r3__, t__; \
\
r1__ = (x); \
r2__ = (y) & RB_MASK; \
UNc_rb_MUL_UNc (r1__, (a), t__); \
UNc_rb_ADD_UNc_rb (r1__, r2__, t__); \
\
r2__ = (x) >> G_SHIFT; \
r3__ = ((y) >> G_SHIFT) & RB_MASK; \
UNc_rb_MUL_UNc (r2__, (a), t__); \
UNc_rb_ADD_UNc_rb (r2__, r3__, t__); \
\
(x) = r1__ | (r2__ << G_SHIFT); \
} while (0)
/*
* x_c = (x_c * a + y_c * b) / 255
*/
#define UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc(x, a, y, b) \
do \
{ \
comp4_t r1__, r2__, r3__, t__; \
\
r1__ = (x); \
r2__ = (y); \
UNc_rb_MUL_UNc (r1__, (a), t__); \
UNc_rb_MUL_UNc (r2__, (b), t__); \
UNc_rb_ADD_UNc_rb (r1__, r2__, t__); \
\
r2__ = ((x) >> G_SHIFT); \
r3__ = ((y) >> G_SHIFT); \
UNc_rb_MUL_UNc (r2__, (a), t__); \
UNc_rb_MUL_UNc (r3__, (b), t__); \
UNc_rb_ADD_UNc_rb (r2__, r3__, t__); \
\
(x) = r1__ | (r2__ << G_SHIFT); \
} while (0)
/*
* x_c = (x_c * a_c) / 255
*/
#define UNcx4_MUL_UNcx4(x, a) \
do \
{ \
comp4_t r1__, r2__, r3__, t__; \
\
r1__ = (x); \
r2__ = (a); \
UNc_rb_MUL_UNc_rb (r1__, r2__, t__); \
\
r2__ = (x) >> G_SHIFT; \
r3__ = (a) >> G_SHIFT; \
UNc_rb_MUL_UNc_rb (r2__, r3__, t__); \
\
(x) = r1__ | (r2__ << G_SHIFT); \
} while (0)
/*
* x_c = (x_c * a_c) / 255 + y_c
*/
#define UNcx4_MUL_UNcx4_ADD_UNcx4(x, a, y) \
do \
{ \
comp4_t r1__, r2__, r3__, t__; \
\
r1__ = (x); \
r2__ = (a); \
UNc_rb_MUL_UNc_rb (r1__, r2__, t__); \
r2__ = (y) & RB_MASK; \
UNc_rb_ADD_UNc_rb (r1__, r2__, t__); \
\
r2__ = ((x) >> G_SHIFT); \
r3__ = ((a) >> G_SHIFT); \
UNc_rb_MUL_UNc_rb (r2__, r3__, t__); \
r3__ = ((y) >> G_SHIFT) & RB_MASK; \
UNc_rb_ADD_UNc_rb (r2__, r3__, t__); \
\
(x) = r1__ | (r2__ << G_SHIFT); \
} while (0)
/*
* x_c = (x_c * a_c + y_c * b) / 255
*/
#define UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc(x, a, y, b) \
do \
{ \
comp4_t r1__, r2__, r3__, t__; \
\
r1__ = (x); \
r2__ = (a); \
UNc_rb_MUL_UNc_rb (r1__, r2__, t__); \
r2__ = (y); \
UNc_rb_MUL_UNc (r2__, (b), t__); \
UNc_rb_ADD_UNc_rb (r1__, r2__, t__); \
\
r2__ = (x) >> G_SHIFT; \
r3__ = (a) >> G_SHIFT; \
UNc_rb_MUL_UNc_rb (r2__, r3__, t__); \
r3__ = (y) >> G_SHIFT; \
UNc_rb_MUL_UNc (r3__, (b), t__); \
UNc_rb_ADD_UNc_rb (r2__, r3__, t__); \
\
x = r1__ | (r2__ << G_SHIFT); \
} while (0)
/*
x_c = min(x_c + y_c, 255)
*/
#define UNcx4_ADD_UNcx4(x, y) \
do \
{ \
comp4_t r1__, r2__, r3__, t__; \
\
r1__ = (x) & RB_MASK; \
r2__ = (y) & RB_MASK; \
UNc_rb_ADD_UNc_rb (r1__, r2__, t__); \
\
r2__ = ((x) >> G_SHIFT) & RB_MASK; \
r3__ = ((y) >> G_SHIFT) & RB_MASK; \
UNc_rb_ADD_UNc_rb (r2__, r3__, t__); \
\
x = r1__ | (r2__ << G_SHIFT); \
} while (0)

Просмотреть файл

@ -1,114 +0,0 @@
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <math.h>
#include <string.h>
#include "pixman-private.h"
#include "pixman-combine32.h"
static force_inline uint32_t
combine_mask (const uint32_t src, const uint32_t mask)
{
uint32_t s, m;
m = mask >> A_SHIFT;
if (!m)
return 0;
s = src;
UN8x4_MUL_UN8 (s, m);
return s;
}
static void
combine_src_u (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * dest,
const uint32_t * src,
const uint32_t * mask,
int width)
{
int i;
if (!mask)
memcpy (dest, src, width * sizeof (uint16_t));
else
{
uint16_t *d = (uint16_t*)dest;
uint16_t *src16 = (uint16_t*)src;
for (i = 0; i < width; ++i)
{
if ((*mask & 0xff000000) == 0xff000000) {
// it's likely worth special casing
// fully opaque because it avoids
// the cost of conversion as well the multiplication
*(d + i) = *src16;
} else {
// the mask is still 32bits
uint32_t s = combine_mask (convert_0565_to_8888(*src16), *mask);
*(d + i) = convert_8888_to_0565(s);
}
mask++;
src16++;
}
}
}
static void
combine_over_u (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * dest,
const uint32_t * src,
const uint32_t * mask,
int width)
{
int i;
if (!mask)
memcpy (dest, src, width * sizeof (uint16_t));
else
{
uint16_t *d = (uint16_t*)dest;
uint16_t *src16 = (uint16_t*)src;
for (i = 0; i < width; ++i)
{
if ((*mask & 0xff000000) == 0xff000000) {
// it's likely worth special casing
// fully opaque because it avoids
// the cost of conversion as well the multiplication
*(d + i) = *src16;
} else if ((*mask & 0xff000000) == 0x00000000) {
// keep the dest the same
} else {
// the mask is still 32bits
uint32_t s = combine_mask (convert_0565_to_8888(*src16), *mask);
uint32_t ia = ALPHA_8 (~s);
uint32_t d32 = convert_0565_to_8888(*(d + i));
UN8x4_MUL_UN8_ADD_UN8x4 (d32, ia, s);
*(d + i) = convert_8888_to_0565(d32);
}
mask++;
src16++;
}
}
}
void
_pixman_setup_combiner_functions_16 (pixman_implementation_t *imp)
{
int i;
for (i = 0; i < PIXMAN_N_OPERATORS; i++) {
imp->combine_16[i] = NULL;
}
imp->combine_16[PIXMAN_OP_SRC] = combine_src_u;
imp->combine_16[PIXMAN_OP_OVER] = combine_over_u;
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -12,7 +12,7 @@
#define RB_MASK 0xff00ff
#define AG_MASK 0xff00ff00
#define RB_ONE_HALF 0x800080
#define RB_MASK_PLUS_ONE 0x10000100
#define RB_MASK_PLUS_ONE 0x1000100
#define ALPHA_8(x) ((x) >> A_SHIFT)
#define RED_8(x) (((x) >> R_SHIFT) & MASK)

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,230 +0,0 @@
/* WARNING: This file is generated by combine.pl from combine.inc.
Please edit one of those files rather than this one. */
#line 1 "pixman-combine.c.template"
#define COMPONENT_SIZE 16
#define MASK 0xffffULL
#define ONE_HALF 0x8000ULL
#define A_SHIFT 16 * 3
#define R_SHIFT 16 * 2
#define G_SHIFT 16
#define A_MASK 0xffff000000000000ULL
#define R_MASK 0xffff00000000ULL
#define G_MASK 0xffff0000ULL
#define RB_MASK 0xffff0000ffffULL
#define AG_MASK 0xffff0000ffff0000ULL
#define RB_ONE_HALF 0x800000008000ULL
#define RB_MASK_PLUS_ONE 0x10000000010000ULL
#define ALPHA_16(x) ((x) >> A_SHIFT)
#define RED_16(x) (((x) >> R_SHIFT) & MASK)
#define GREEN_16(x) (((x) >> G_SHIFT) & MASK)
#define BLUE_16(x) ((x) & MASK)
/*
* Helper macros.
*/
#define MUL_UN16(a, b, t) \
((t) = (a) * (uint32_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT ))
#define DIV_UN16(a, b) \
(((uint32_t) (a) * MASK + ((b) / 2)) / (b))
#define ADD_UN16(x, y, t) \
((t) = (x) + (y), \
(uint64_t) (uint16_t) ((t) | (0 - ((t) >> G_SHIFT))))
#define DIV_ONE_UN16(x) \
(((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT)
/*
* The methods below use some tricks to be able to do two color
* components at the same time.
*/
/*
* x_rb = (x_rb * a) / 255
*/
#define UN16_rb_MUL_UN16(x, a, t) \
do \
{ \
t = ((x) & RB_MASK) * (a); \
t += RB_ONE_HALF; \
x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \
x &= RB_MASK; \
} while (0)
/*
* x_rb = min (x_rb + y_rb, 255)
*/
#define UN16_rb_ADD_UN16_rb(x, y, t) \
do \
{ \
t = ((x) + (y)); \
t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \
x = (t & RB_MASK); \
} while (0)
/*
* x_rb = (x_rb * a_rb) / 255
*/
#define UN16_rb_MUL_UN16_rb(x, a, t) \
do \
{ \
t = (x & MASK) * (a & MASK); \
t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \
t += RB_ONE_HALF; \
t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \
x = t & RB_MASK; \
} while (0)
/*
* x_c = (x_c * a) / 255
*/
#define UN16x4_MUL_UN16(x, a) \
do \
{ \
uint64_t r1__, r2__, t__; \
\
r1__ = (x); \
UN16_rb_MUL_UN16 (r1__, (a), t__); \
\
r2__ = (x) >> G_SHIFT; \
UN16_rb_MUL_UN16 (r2__, (a), t__); \
\
(x) = r1__ | (r2__ << G_SHIFT); \
} while (0)
/*
* x_c = (x_c * a) / 255 + y_c
*/
#define UN16x4_MUL_UN16_ADD_UN16x4(x, a, y) \
do \
{ \
uint64_t r1__, r2__, r3__, t__; \
\
r1__ = (x); \
r2__ = (y) & RB_MASK; \
UN16_rb_MUL_UN16 (r1__, (a), t__); \
UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \
\
r2__ = (x) >> G_SHIFT; \
r3__ = ((y) >> G_SHIFT) & RB_MASK; \
UN16_rb_MUL_UN16 (r2__, (a), t__); \
UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \
\
(x) = r1__ | (r2__ << G_SHIFT); \
} while (0)
/*
* x_c = (x_c * a + y_c * b) / 255
*/
#define UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16(x, a, y, b) \
do \
{ \
uint64_t r1__, r2__, r3__, t__; \
\
r1__ = (x); \
r2__ = (y); \
UN16_rb_MUL_UN16 (r1__, (a), t__); \
UN16_rb_MUL_UN16 (r2__, (b), t__); \
UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \
\
r2__ = ((x) >> G_SHIFT); \
r3__ = ((y) >> G_SHIFT); \
UN16_rb_MUL_UN16 (r2__, (a), t__); \
UN16_rb_MUL_UN16 (r3__, (b), t__); \
UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \
\
(x) = r1__ | (r2__ << G_SHIFT); \
} while (0)
/*
* x_c = (x_c * a_c) / 255
*/
#define UN16x4_MUL_UN16x4(x, a) \
do \
{ \
uint64_t r1__, r2__, r3__, t__; \
\
r1__ = (x); \
r2__ = (a); \
UN16_rb_MUL_UN16_rb (r1__, r2__, t__); \
\
r2__ = (x) >> G_SHIFT; \
r3__ = (a) >> G_SHIFT; \
UN16_rb_MUL_UN16_rb (r2__, r3__, t__); \
\
(x) = r1__ | (r2__ << G_SHIFT); \
} while (0)
/*
* x_c = (x_c * a_c) / 255 + y_c
*/
#define UN16x4_MUL_UN16x4_ADD_UN16x4(x, a, y) \
do \
{ \
uint64_t r1__, r2__, r3__, t__; \
\
r1__ = (x); \
r2__ = (a); \
UN16_rb_MUL_UN16_rb (r1__, r2__, t__); \
r2__ = (y) & RB_MASK; \
UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \
\
r2__ = ((x) >> G_SHIFT); \
r3__ = ((a) >> G_SHIFT); \
UN16_rb_MUL_UN16_rb (r2__, r3__, t__); \
r3__ = ((y) >> G_SHIFT) & RB_MASK; \
UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \
\
(x) = r1__ | (r2__ << G_SHIFT); \
} while (0)
/*
* x_c = (x_c * a_c + y_c * b) / 255
*/
#define UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16(x, a, y, b) \
do \
{ \
uint64_t r1__, r2__, r3__, t__; \
\
r1__ = (x); \
r2__ = (a); \
UN16_rb_MUL_UN16_rb (r1__, r2__, t__); \
r2__ = (y); \
UN16_rb_MUL_UN16 (r2__, (b), t__); \
UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \
\
r2__ = (x) >> G_SHIFT; \
r3__ = (a) >> G_SHIFT; \
UN16_rb_MUL_UN16_rb (r2__, r3__, t__); \
r3__ = (y) >> G_SHIFT; \
UN16_rb_MUL_UN16 (r3__, (b), t__); \
UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \
\
x = r1__ | (r2__ << G_SHIFT); \
} while (0)
/*
x_c = min(x_c + y_c, 255)
*/
#define UN16x4_ADD_UN16x4(x, y) \
do \
{ \
uint64_t r1__, r2__, r3__, t__; \
\
r1__ = (x) & RB_MASK; \
r2__ = (y) & RB_MASK; \
UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \
\
r2__ = ((x) >> G_SHIFT) & RB_MASK; \
r3__ = ((y) >> G_SHIFT) & RB_MASK; \
UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \
\
x = r1__ | (r2__ << G_SHIFT); \
} while (0)

Просмотреть файл

@ -18,6 +18,12 @@
# define FUNC ((const char*) ("???"))
#endif
#if defined (__GNUC__)
# define unlikely(expr) __builtin_expect ((expr), 0)
#else
# define unlikely(expr) (expr)
#endif
#if defined (__GNUC__)
# define MAYBE_UNUSED __attribute__((unused))
#else
@ -83,35 +89,18 @@
# endif
#endif
/* In libxul builds we don't ever want to export pixman symbols */
#if 1
#include "prcpucfg.h"
#ifdef HAVE_VISIBILITY_HIDDEN_ATTRIBUTE
#define CVISIBILITY_HIDDEN __attribute__((visibility("hidden")))
#elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550)
#define CVISIBILITY_HIDDEN __hidden
#else
#define CVISIBILITY_HIDDEN
#endif
/* In libxul builds we don't ever want to export cairo symbols */
#define PIXMAN_EXPORT extern CVISIBILITY_HIDDEN
#else
/* GCC visibility */
#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(_WIN32)
# define PIXMAN_EXPORT __attribute__ ((visibility("default")))
/* Sun Studio 8 visibility */
#elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550)
# define PIXMAN_EXPORT __global
#elif defined (_MSC_VER) || defined(__MINGW32__)
# define PIXMAN_EXPORT PIXMAN_API
#else
# define PIXMAN_EXPORT
#endif
#endif
/* member offsets */
#define CONTAINER_OF(type, member, data) \
((type *)(((uint8_t *)data) - offsetof (type, member)))
@ -131,12 +120,10 @@
# define PIXMAN_GET_THREAD_LOCAL(name) \
(&name)
#elif defined(__MINGW32__) || defined(PIXMAN_USE_XP_DLL_TLS_WORKAROUND)
#elif defined(__MINGW32__)
# define _NO_W32_PSEUDO_MODIFIERS
# include <windows.h>
#undef IN
#undef OUT
# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \
static volatile int tls_ ## name ## _initialized = 0; \
@ -193,7 +180,7 @@
# define PIXMAN_GET_THREAD_LOCAL(name) \
(&name)
#elif defined(HAVE_PTHREAD_SETSPECIFIC)
#elif defined(HAVE_PTHREADS)
#include <pthread.h>

Просмотреть файл

@ -51,7 +51,10 @@ coordinates_to_parameter (double x, double y, double angle)
}
static uint32_t *
conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
conical_get_scanline (pixman_iter_t *iter,
const uint32_t *mask,
int Bpp,
pixman_gradient_walker_write_t write_pixel)
{
pixman_image_t *image = iter->image;
int x = iter->x;
@ -61,7 +64,7 @@ conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
gradient_t *gradient = (gradient_t *)image;
conical_gradient_t *conical = (conical_gradient_t *)image;
uint32_t *end = buffer + width;
uint32_t *end = buffer + width * (Bpp / 4);
pixman_gradient_walker_t walker;
pixman_bool_t affine = TRUE;
double cx = 1.;
@ -109,11 +112,12 @@ conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
{
double t = coordinates_to_parameter (rx, ry, conical->angle);
*buffer = _pixman_gradient_walker_pixel (
&walker, (pixman_fixed_48_16_t)pixman_double_to_fixed (t));
write_pixel (&walker,
(pixman_fixed_48_16_t)pixman_double_to_fixed (t),
buffer);
}
++buffer;
buffer += (Bpp / 4);
rx += cx;
ry += cy;
@ -144,11 +148,12 @@ conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
t = coordinates_to_parameter (x, y, conical->angle);
*buffer = _pixman_gradient_walker_pixel (
&walker, (pixman_fixed_48_16_t)pixman_double_to_fixed (t));
write_pixel (&walker,
(pixman_fixed_48_16_t)pixman_double_to_fixed (t),
buffer);
}
++buffer;
buffer += (Bpp / 4);
rx += cx;
ry += cy;
@ -160,15 +165,18 @@ conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
return iter->buffer;
}
static uint32_t *
conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
{
return conical_get_scanline (iter, mask, 4,
_pixman_gradient_walker_write_narrow);
}
static uint32_t *
conical_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
{
uint32_t *buffer = conical_get_scanline_narrow (iter, NULL);
pixman_expand_to_float (
(argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
return buffer;
return conical_get_scanline (iter, NULL, 16,
_pixman_gradient_walker_write_wide);
}
void

Просмотреть файл

@ -1,799 +0,0 @@
/*
* Copyright © 2000 SuSE, Inc.
* Copyright © 2007 Red Hat, Inc.
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of SuSE not be used in advertising or
* publicity pertaining to distribution of the software without specific,
* written prior permission. SuSE makes no representations about the
* suitability of this software for any purpose. It is provided "as is"
* without express or implied warranty.
*
* SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
* BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <string.h>
#include <stdlib.h>
#if defined(USE_ARM_SIMD) && defined(_MSC_VER)
/* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */
#include <windows.h>
#endif
#if defined(__APPLE__)
#include "TargetConditionals.h"
#endif
#include "pixman-private.h"
#ifdef USE_VMX
/* The CPU detection code needs to be in a file not compiled with
* "-maltivec -mabi=altivec", as gcc would try to save vector register
* across function calls causing SIGILL on cpus without Altivec/vmx.
*/
static pixman_bool_t initialized = FALSE;
static volatile pixman_bool_t have_vmx = TRUE;
#ifdef __APPLE__
#include <sys/sysctl.h>
static pixman_bool_t
pixman_have_vmx (void)
{
if (!initialized)
{
size_t length = sizeof(have_vmx);
int error =
sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0);
if (error)
have_vmx = FALSE;
initialized = TRUE;
}
return have_vmx;
}
#elif defined (__OpenBSD__)
#include <sys/param.h>
#include <sys/sysctl.h>
#include <machine/cpu.h>
static pixman_bool_t
pixman_have_vmx (void)
{
if (!initialized)
{
int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC };
size_t length = sizeof(have_vmx);
int error =
sysctl (mib, 2, &have_vmx, &length, NULL, 0);
if (error != 0)
have_vmx = FALSE;
initialized = TRUE;
}
return have_vmx;
}
#elif defined (__linux__)
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <linux/auxvec.h>
#include <asm/cputable.h>
static pixman_bool_t
pixman_have_vmx (void)
{
if (!initialized)
{
char fname[64];
unsigned long buf[64];
ssize_t count = 0;
pid_t pid;
int fd, i;
pid = getpid ();
snprintf (fname, sizeof(fname) - 1, "/proc/%d/auxv", pid);
fd = open (fname, O_RDONLY);
if (fd >= 0)
{
for (i = 0; i <= (count / sizeof(unsigned long)); i += 2)
{
/* Read more if buf is empty... */
if (i == (count / sizeof(unsigned long)))
{
count = read (fd, buf, sizeof(buf));
if (count <= 0)
break;
i = 0;
}
if (buf[i] == AT_HWCAP)
{
have_vmx = !!(buf[i + 1] & PPC_FEATURE_HAS_ALTIVEC);
initialized = TRUE;
break;
}
else if (buf[i] == AT_NULL)
{
break;
}
}
close (fd);
}
}
if (!initialized)
{
/* Something went wrong. Assume 'no' rather than playing
fragile tricks with catching SIGILL. */
have_vmx = FALSE;
initialized = TRUE;
}
return have_vmx;
}
#else /* !__APPLE__ && !__OpenBSD__ && !__linux__ */
#include <signal.h>
#include <setjmp.h>
static jmp_buf jump_env;
static void
vmx_test (int sig,
siginfo_t *si,
void * unused)
{
longjmp (jump_env, 1);
}
static pixman_bool_t
pixman_have_vmx (void)
{
struct sigaction sa, osa;
int jmp_result;
if (!initialized)
{
sa.sa_flags = SA_SIGINFO;
sigemptyset (&sa.sa_mask);
sa.sa_sigaction = vmx_test;
sigaction (SIGILL, &sa, &osa);
jmp_result = setjmp (jump_env);
if (jmp_result == 0)
{
asm volatile ( "vor 0, 0, 0" );
}
sigaction (SIGILL, &osa, NULL);
have_vmx = (jmp_result == 0);
initialized = TRUE;
}
return have_vmx;
}
#endif /* __APPLE__ */
#endif /* USE_VMX */
#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) || defined(USE_ARM_IWMMXT)
#if defined(_MSC_VER)
#if defined(USE_ARM_SIMD)
extern int pixman_msvc_try_arm_simd_op ();
pixman_bool_t
pixman_have_arm_simd (void)
{
static pixman_bool_t initialized = FALSE;
static pixman_bool_t have_arm_simd = FALSE;
if (!initialized)
{
__try {
pixman_msvc_try_arm_simd_op ();
have_arm_simd = TRUE;
} __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) {
have_arm_simd = FALSE;
}
initialized = TRUE;
}
return have_arm_simd;
}
#endif /* USE_ARM_SIMD */
#if defined(USE_ARM_NEON)
extern int pixman_msvc_try_arm_neon_op ();
pixman_bool_t
pixman_have_arm_neon (void)
{
static pixman_bool_t initialized = FALSE;
static pixman_bool_t have_arm_neon = FALSE;
if (!initialized)
{
__try
{
pixman_msvc_try_arm_neon_op ();
have_arm_neon = TRUE;
}
__except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION)
{
have_arm_neon = FALSE;
}
initialized = TRUE;
}
return have_arm_neon;
}
#endif /* USE_ARM_NEON */
#elif (defined (__APPLE__) && defined(TARGET_OS_IPHONE)) /* iOS (iPhone/iPad/iPod touch) */
/* Detection of ARM NEON on iOS is fairly simple because iOS binaries
* contain separate executable images for each processor architecture.
* So all we have to do is detect the armv7 architecture build. The
* operating system automatically runs the armv7 binary for armv7 devices
* and the armv6 binary for armv6 devices.
*/
pixman_bool_t
pixman_have_arm_simd (void)
{
#if defined(USE_ARM_SIMD)
return TRUE;
#else
return FALSE;
#endif
}
pixman_bool_t
pixman_have_arm_neon (void)
{
#if defined(USE_ARM_NEON) && defined(__ARM_NEON__)
/* This is an armv7 cpu build */
return TRUE;
#else
/* This is an armv6 cpu build */
return FALSE;
#endif
}
pixman_bool_t
pixman_have_arm_iwmmxt (void)
{
#if defined(USE_ARM_IWMMXT)
return FALSE;
#else
return FALSE;
#endif
}
#elif defined (__linux__) || defined(__ANDROID__) || defined(ANDROID) /* linux ELF or ANDROID */
static pixman_bool_t arm_has_v7 = FALSE;
static pixman_bool_t arm_has_v6 = FALSE;
static pixman_bool_t arm_has_vfp = FALSE;
static pixman_bool_t arm_has_neon = FALSE;
static pixman_bool_t arm_has_iwmmxt = FALSE;
static pixman_bool_t arm_tests_initialized = FALSE;
#if defined(__ANDROID__) || defined(ANDROID) /* Android device support */
static void
pixman_arm_read_auxv_or_cpu_features ()
{
char buf[1024];
char* pos;
const char* ver_token = "CPU architecture: ";
FILE* f = fopen("/proc/cpuinfo", "r");
if (!f) {
arm_tests_initialized = TRUE;
return;
}
fread(buf, sizeof(char), sizeof(buf), f);
fclose(f);
pos = strstr(buf, ver_token);
if (pos) {
char vchar = *(pos + strlen(ver_token));
if (vchar >= '0' && vchar <= '9') {
int ver = vchar - '0';
arm_has_v7 = ver >= 7;
arm_has_v6 = ver >= 6;
}
}
arm_has_neon = strstr(buf, "neon") != NULL;
arm_has_vfp = strstr(buf, "vfp") != NULL;
arm_has_iwmmxt = strstr(buf, "iwmmxt") != NULL;
arm_tests_initialized = TRUE;
}
#elif defined (__linux__) /* linux ELF */
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <string.h>
#include <elf.h>
static void
pixman_arm_read_auxv_or_cpu_features ()
{
int fd;
Elf32_auxv_t aux;
fd = open ("/proc/self/auxv", O_RDONLY);
if (fd >= 0)
{
while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t))
{
if (aux.a_type == AT_HWCAP)
{
uint32_t hwcap = aux.a_un.a_val;
/* hardcode these values to avoid depending on specific
* versions of the hwcap header, e.g. HWCAP_NEON
*/
arm_has_vfp = (hwcap & 64) != 0;
arm_has_iwmmxt = (hwcap & 512) != 0;
/* this flag is only present on kernel 2.6.29 */
arm_has_neon = (hwcap & 4096) != 0;
}
else if (aux.a_type == AT_PLATFORM)
{
const char *plat = (const char*) aux.a_un.a_val;
if (strncmp (plat, "v7l", 3) == 0)
{
arm_has_v7 = TRUE;
arm_has_v6 = TRUE;
}
else if (strncmp (plat, "v6l", 3) == 0)
{
arm_has_v6 = TRUE;
}
}
}
close (fd);
}
arm_tests_initialized = TRUE;
}
#endif /* Linux elf */
#if defined(USE_ARM_SIMD)
pixman_bool_t
pixman_have_arm_simd (void)
{
if (!arm_tests_initialized)
pixman_arm_read_auxv_or_cpu_features ();
return arm_has_v6;
}
#endif /* USE_ARM_SIMD */
#if defined(USE_ARM_NEON)
pixman_bool_t
pixman_have_arm_neon (void)
{
if (!arm_tests_initialized)
pixman_arm_read_auxv_or_cpu_features ();
return arm_has_neon;
}
#endif /* USE_ARM_NEON */
#if defined(USE_ARM_IWMMXT)
pixman_bool_t
pixman_have_arm_iwmmxt (void)
{
if (!arm_tests_initialized)
pixman_arm_read_auxv_or_cpu_features ();
return arm_has_iwmmxt;
}
#endif /* USE_ARM_IWMMXT */
#else /* !_MSC_VER && !Linux elf && !Android */
#define pixman_have_arm_simd() FALSE
#define pixman_have_arm_neon() FALSE
#define pixman_have_arm_iwmmxt() FALSE
#endif
#endif /* USE_ARM_SIMD || USE_ARM_NEON || USE_ARM_IWMMXT */
#if defined(USE_MIPS_DSPR2)
#if defined (__linux__) /* linux ELF */
pixman_bool_t
pixman_have_mips_dspr2 (void)
{
const char *search_string = "MIPS 74K";
const char *file_name = "/proc/cpuinfo";
/* Simple detection of MIPS DSP ASE (revision 2) at runtime for Linux.
* It is based on /proc/cpuinfo, which reveals hardware configuration
* to user-space applications. According to MIPS (early 2010), no similar
* facility is universally available on the MIPS architectures, so it's up
* to individual OSes to provide such.
*
* Only currently available MIPS core that supports DSPr2 is 74K.
*/
char cpuinfo_line[256];
FILE *f = NULL;
if ((f = fopen (file_name, "r")) == NULL)
return FALSE;
while (fgets (cpuinfo_line, sizeof (cpuinfo_line), f) != NULL)
{
if (strstr (cpuinfo_line, search_string) != NULL)
{
fclose (f);
return TRUE;
}
}
fclose (f);
/* Did not find string in the proc file. */
return FALSE;
}
#else /* linux ELF */
#define pixman_have_mips_dspr2() FALSE
#endif /* linux ELF */
#endif /* USE_MIPS_DSPR2 */
#if defined(USE_X86_MMX) || defined(USE_SSE2)
/* The CPU detection code needs to be in a file not compiled with
* "-mmmx -msse", as gcc would generate CMOV instructions otherwise
* that would lead to SIGILL instructions on old CPUs that don't have
* it.
*/
#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64)
#ifdef HAVE_GETISAX
#include <sys/auxv.h>
#endif
typedef enum
{
NO_FEATURES = 0,
MMX = 0x1,
MMX_EXTENSIONS = 0x2,
SSE = 0x6,
SSE2 = 0x8,
CMOV = 0x10
} cpu_features_t;
static unsigned int
detect_cpu_features (void)
{
unsigned int features = 0;
unsigned int result = 0;
#ifdef HAVE_GETISAX
if (getisax (&result, 1))
{
if (result & AV_386_CMOV)
features |= CMOV;
if (result & AV_386_MMX)
features |= MMX;
if (result & AV_386_AMD_MMX)
features |= MMX_EXTENSIONS;
if (result & AV_386_SSE)
features |= SSE;
if (result & AV_386_SSE2)
features |= SSE2;
}
#else
char vendor[13];
#ifdef _MSC_VER
int vendor0 = 0, vendor1, vendor2;
#endif
vendor[0] = 0;
vendor[12] = 0;
#ifdef __GNUC__
/* see p. 118 of amd64 instruction set manual Vol3 */
/* We need to be careful about the handling of %ebx and
* %esp here. We can't declare either one as clobbered
* since they are special registers (%ebx is the "PIC
* register" holding an offset to global data, %esp the
* stack pointer), so we need to make sure they have their
* original values when we access the output operands.
*/
__asm__ (
"pushf\n"
"pop %%eax\n"
"mov %%eax, %%ecx\n"
"xor $0x00200000, %%eax\n"
"push %%eax\n"
"popf\n"
"pushf\n"
"pop %%eax\n"
"mov $0x0, %%edx\n"
"xor %%ecx, %%eax\n"
"jz 1f\n"
"mov $0x00000000, %%eax\n"
"push %%ebx\n"
"cpuid\n"
"mov %%ebx, %%eax\n"
"pop %%ebx\n"
"mov %%eax, %1\n"
"mov %%edx, %2\n"
"mov %%ecx, %3\n"
"mov $0x00000001, %%eax\n"
"push %%ebx\n"
"cpuid\n"
"pop %%ebx\n"
"1:\n"
"mov %%edx, %0\n"
: "=r" (result),
"=m" (vendor[0]),
"=m" (vendor[4]),
"=m" (vendor[8])
:
: "%eax", "%ecx", "%edx"
);
#elif defined (_MSC_VER)
_asm {
pushfd
pop eax
mov ecx, eax
xor eax, 00200000h
push eax
popfd
pushfd
pop eax
mov edx, 0
xor eax, ecx
jz nocpuid
mov eax, 0
push ebx
cpuid
mov eax, ebx
pop ebx
mov vendor0, eax
mov vendor1, edx
mov vendor2, ecx
mov eax, 1
push ebx
cpuid
pop ebx
nocpuid:
mov result, edx
}
memmove (vendor + 0, &vendor0, 4);
memmove (vendor + 4, &vendor1, 4);
memmove (vendor + 8, &vendor2, 4);
#else
# error unsupported compiler
#endif
features = 0;
if (result)
{
/* result now contains the standard feature bits */
if (result & (1 << 15))
features |= CMOV;
if (result & (1 << 23))
features |= MMX;
if (result & (1 << 25))
features |= SSE;
if (result & (1 << 26))
features |= SSE2;
if ((features & MMX) && !(features & SSE) &&
(strcmp (vendor, "AuthenticAMD") == 0 ||
strcmp (vendor, "Geode by NSC") == 0))
{
/* check for AMD MMX extensions */
#ifdef __GNUC__
__asm__ (
" push %%ebx\n"
" mov $0x80000000, %%eax\n"
" cpuid\n"
" xor %%edx, %%edx\n"
" cmp $0x1, %%eax\n"
" jge 2f\n"
" mov $0x80000001, %%eax\n"
" cpuid\n"
"2:\n"
" pop %%ebx\n"
" mov %%edx, %0\n"
: "=r" (result)
:
: "%eax", "%ecx", "%edx"
);
#elif defined _MSC_VER
_asm {
push ebx
mov eax, 80000000h
cpuid
xor edx, edx
cmp eax, 1
jge notamd
mov eax, 80000001h
cpuid
notamd:
pop ebx
mov result, edx
}
#endif
if (result & (1 << 22))
features |= MMX_EXTENSIONS;
}
}
#endif /* HAVE_GETISAX */
return features;
}
#ifdef USE_X86_MMX
static pixman_bool_t
pixman_have_mmx (void)
{
static pixman_bool_t initialized = FALSE;
static pixman_bool_t mmx_present;
if (!initialized)
{
unsigned int features = detect_cpu_features ();
mmx_present = (features & (MMX | MMX_EXTENSIONS)) == (MMX | MMX_EXTENSIONS);
initialized = TRUE;
}
return mmx_present;
}
#endif
#ifdef USE_SSE2
static pixman_bool_t
pixman_have_sse2 (void)
{
static pixman_bool_t initialized = FALSE;
static pixman_bool_t sse2_present;
if (!initialized)
{
unsigned int features = detect_cpu_features ();
sse2_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2)) == (MMX | MMX_EXTENSIONS | SSE | SSE2);
initialized = TRUE;
}
return sse2_present;
}
#endif
#else /* __amd64__ */
#ifdef USE_X86_MMX
#define pixman_have_mmx() TRUE
#endif
#ifdef USE_SSE2
#define pixman_have_sse2() TRUE
#endif
#endif /* __amd64__ */
#endif
static pixman_bool_t
disabled (const char *name)
{
const char *env;
if ((env = getenv ("PIXMAN_DISABLE")))
{
do
{
const char *end;
int len;
if ((end = strchr (env, ' ')))
len = end - env;
else
len = strlen (env);
if (strlen (name) == len && strncmp (name, env, len) == 0)
{
printf ("pixman: Disabled %s implementation\n", name);
return TRUE;
}
env += len;
}
while (*env++);
}
return FALSE;
}
pixman_implementation_t *
_pixman_choose_implementation (void)
{
pixman_implementation_t *imp;
imp = _pixman_implementation_create_general();
if (!disabled ("fast"))
imp = _pixman_implementation_create_fast_path (imp);
#ifdef USE_X86_MMX
if (!disabled ("mmx") && pixman_have_mmx ())
imp = _pixman_implementation_create_mmx (imp);
#endif
#ifdef USE_SSE2
if (!disabled ("sse2") && pixman_have_sse2 ())
imp = _pixman_implementation_create_sse2 (imp);
#endif
#ifdef USE_ARM_SIMD
if (!disabled ("arm-simd") && pixman_have_arm_simd ())
imp = _pixman_implementation_create_arm_simd (imp);
#endif
#ifdef USE_ARM_IWMMXT
if (!disabled ("arm-iwmmxt") && pixman_have_arm_iwmmxt ())
imp = _pixman_implementation_create_mmx (imp);
#endif
#ifdef USE_ARM_NEON
if (!disabled ("arm-neon") && pixman_have_arm_neon ())
imp = _pixman_implementation_create_arm_neon (imp);
#endif
#ifdef USE_MIPS_DSPR2
if (!disabled ("mips-dspr2") && pixman_have_mips_dspr2 ())
imp = _pixman_implementation_create_mips_dspr2 (imp);
#endif
#ifdef USE_VMX
if (!disabled ("vmx") && pixman_have_vmx ())
imp = _pixman_implementation_create_vmx (imp);
#endif
imp = _pixman_implementation_create_noop (imp);
return imp;
}

Просмотреть файл

@ -1,51 +0,0 @@
#define R16_BITS 5
#define G16_BITS 6
#define B16_BITS 5
#define R16_SHIFT (B16_BITS + G16_BITS)
#define G16_SHIFT (B16_BITS)
#define B16_SHIFT 0
#define MASK 0xff
#define ONE_HALF 0x80
#define A_SHIFT 8 * 3
#define R_SHIFT 8 * 2
#define G_SHIFT 8
#define A_MASK 0xff000000
#define R_MASK 0xff0000
#define G_MASK 0xff00
#define RB_MASK 0xff00ff
#define AG_MASK 0xff00ff00
#define RB_ONE_HALF 0x800080
#define RB_MASK_PLUS_ONE 0x10000100
#define ALPHA_8(x) ((x) >> A_SHIFT)
#define RED_8(x) (((x) >> R_SHIFT) & MASK)
#define GREEN_8(x) (((x) >> G_SHIFT) & MASK)
#define BLUE_8(x) ((x) & MASK)
// This uses the same dithering technique that Skia does.
// It is essentially preturbing the lower bit based on the
// high bit
static inline uint16_t dither_32_to_16(uint32_t c)
{
uint8_t b = BLUE_8(c);
uint8_t g = GREEN_8(c);
uint8_t r = RED_8(c);
r = ((r << 1) - ((r >> (8 - R16_BITS) << (8 - R16_BITS)) | (r >> R16_BITS))) >> (8 - R16_BITS);
g = ((g << 1) - ((g >> (8 - G16_BITS) << (8 - G16_BITS)) | (g >> G16_BITS))) >> (8 - G16_BITS);
b = ((b << 1) - ((b >> (8 - B16_BITS) << (8 - B16_BITS)) | (b >> B16_BITS))) >> (8 - B16_BITS);
return ((r << R16_SHIFT) | (g << G16_SHIFT) | (b << B16_SHIFT));
}
static inline uint16_t dither_8888_to_0565(uint32_t color, pixman_bool_t toggle)
{
// alternate between a preturbed truncation and a regular truncation
if (toggle) {
return dither_32_to_16(color);
} else {
return convert_8888_to_0565(color);
}
}

Просмотреть файл

@ -55,9 +55,8 @@ RASTERIZE_EDGES (pixman_image_t *image,
*
* (The AA case does a similar adjustment in RENDER_SAMPLES_X)
*/
/* we cast to unsigned to get defined behaviour for overflow */
lx = (unsigned)lx + X_FRAC_FIRST(1) - pixman_fixed_e;
rx = (unsigned)rx + X_FRAC_FIRST(1) - pixman_fixed_e;
lx += X_FRAC_FIRST(1) - pixman_fixed_e;
rx += X_FRAC_FIRST(1) - pixman_fixed_e;
#endif
/* clip X */
if (lx < 0)

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -109,14 +109,16 @@ general_cubic (double x, double B, double C)
if (ax < 1)
{
return ((12 - 9 * B - 6 * C) * ax * ax * ax +
(-18 + 12 * B + 6 * C) * ax * ax + (6 - 2 * B)) / 6;
return (((12 - 9 * B - 6 * C) * ax +
(-18 + 12 * B + 6 * C)) * ax * ax +
(6 - 2 * B)) / 6;
}
else if (ax >= 1 && ax < 2)
else if (ax < 2)
{
return ((-B - 6 * C) * ax * ax * ax +
(6 * B + 30 * C) * ax * ax + (-12 * B - 48 * C) *
ax + (8 * B + 24 * C)) / 6;
return ((((-B - 6 * C) * ax +
(6 * B + 30 * C)) * ax +
(-12 * B - 48 * C)) * ax +
(8 * B + 24 * C)) / 6;
}
else
{
@ -141,7 +143,7 @@ static const filter_info_t filters[] =
{ PIXMAN_KERNEL_BOX, box_kernel, 1.0 },
{ PIXMAN_KERNEL_LINEAR, linear_kernel, 2.0 },
{ PIXMAN_KERNEL_CUBIC, cubic_kernel, 4.0 },
{ PIXMAN_KERNEL_GAUSSIAN, gaussian_kernel, 6 * SIGMA },
{ PIXMAN_KERNEL_GAUSSIAN, gaussian_kernel, 5.0 },
{ PIXMAN_KERNEL_LANCZOS2, lanczos2_kernel, 4.0 },
{ PIXMAN_KERNEL_LANCZOS3, lanczos3_kernel, 6.0 },
{ PIXMAN_KERNEL_LANCZOS3_STRETCHED, nice_kernel, 8.0 },
@ -160,18 +162,21 @@ integral (pixman_kernel_t kernel1, double x1,
pixman_kernel_t kernel2, double scale, double x2,
double width)
{
/* If the integration interval crosses zero, break it into
* two separate integrals. This ensures that filters such
* as LINEAR that are not differentiable at 0 will still
* integrate properly.
if (kernel1 == PIXMAN_KERNEL_BOX && kernel2 == PIXMAN_KERNEL_BOX)
{
return width;
}
/* The LINEAR filter is not differentiable at 0, so if the
* integration interval crosses zero, break it into two
* separate integrals.
*/
if (x1 < 0 && x1 + width > 0)
else if (kernel1 == PIXMAN_KERNEL_LINEAR && x1 < 0 && x1 + width > 0)
{
return
integral (kernel1, x1, kernel2, scale, x2, - x1) +
integral (kernel1, 0, kernel2, scale, x2 - x1, width + x1);
}
else if (x2 < 0 && x2 + width > 0)
else if (kernel2 == PIXMAN_KERNEL_LINEAR && x2 < 0 && x2 + width > 0)
{
return
integral (kernel1, x1, kernel2, scale, x2, - x2) +
@ -189,13 +194,19 @@ integral (pixman_kernel_t kernel1, double x1,
}
else
{
/* Integration via Simpson's rule */
#define N_SEGMENTS 128
/* Integration via Simpson's rule
* See http://www.intmath.com/integration/6-simpsons-rule.php
* 12 segments (6 cubic approximations) seems to produce best
* result for lanczos3.linear, which was the combination that
* showed the most errors. This makes sense as the lanczos3
* filter is 6 wide.
*/
#define N_SEGMENTS 12
#define SAMPLE(a1, a2) \
(filters[kernel1].func ((a1)) * filters[kernel2].func ((a2) * scale))
double s = 0.0;
double h = width / (double)N_SEGMENTS;
double h = width / N_SEGMENTS;
int i;
s = SAMPLE (x1, x2);
@ -204,11 +215,14 @@ integral (pixman_kernel_t kernel1, double x1,
{
double a1 = x1 + h * i;
double a2 = x2 + h * i;
s += 4 * SAMPLE (a1, a2);
}
for (i = 2; i < N_SEGMENTS; i += 2)
{
double a1 = x1 + h * i;
double a2 = x2 + h * i;
s += 2 * SAMPLE (a1, a2);
if (i >= 2 && i < N_SEGMENTS - 1)
s += 4 * SAMPLE (a1, a2);
}
s += SAMPLE (x1 + width, x2 + width);
@ -217,25 +231,17 @@ integral (pixman_kernel_t kernel1, double x1,
}
}
static pixman_fixed_t *
create_1d_filter (int *width,
static void
create_1d_filter (int width,
pixman_kernel_t reconstruct,
pixman_kernel_t sample,
double scale,
int n_phases)
int n_phases,
pixman_fixed_t *p)
{
pixman_fixed_t *params, *p;
double step;
double size;
int i;
size = scale * filters[sample].width + filters[reconstruct].width;
*width = ceil (size);
p = params = malloc (*width * n_phases * sizeof (pixman_fixed_t));
if (!params)
return NULL;
step = 1.0 / n_phases;
for (i = 0; i < n_phases; ++i)
@ -243,15 +249,15 @@ create_1d_filter (int *width,
double frac = step / 2.0 + i * step;
pixman_fixed_t new_total;
int x, x1, x2;
double total;
double total, e;
/* Sample convolution of reconstruction and sampling
* filter. See rounding.txt regarding the rounding
* and sample positions.
*/
x1 = ceil (frac - *width / 2.0 - 0.5);
x2 = x1 + *width;
x1 = ceil (frac - width / 2.0 - 0.5);
x2 = x1 + width;
total = 0;
for (x = x1; x < x2; ++x)
@ -274,29 +280,154 @@ create_1d_filter (int *width,
ihigh - ilow);
}
total += c;
*p++ = (pixman_fixed_t)(c * 65535.0 + 0.5);
*p = (pixman_fixed_t)floor (c * 65536.0 + 0.5);
total += *p;
p++;
}
/* Normalize */
p -= *width;
total = 1 / total;
/* Normalize, with error diffusion */
p -= width;
total = 65536.0 / total;
new_total = 0;
e = 0.0;
for (x = x1; x < x2; ++x)
{
pixman_fixed_t t = (*p) * total + 0.5;
double v = (*p) * total + e;
pixman_fixed_t t = floor (v + 0.5);
e = v - t;
new_total += t;
*p++ = t;
}
if (new_total != pixman_fixed_1)
*(p - *width / 2) += (pixman_fixed_1 - new_total);
/* pixman_fixed_e's worth of error may remain; put it
* at the first sample, since that is the only one that
* hasn't had any error diffused into it.
*/
*(p - width) += pixman_fixed_1 - new_total;
}
}
static int
filter_width (pixman_kernel_t reconstruct, pixman_kernel_t sample, double size)
{
return ceil (filters[reconstruct].width + size * filters[sample].width);
}
#ifdef PIXMAN_GNUPLOT
/* If enable-gnuplot is configured, then you can pipe the output of a
* pixman-using program to gnuplot and get a continuously-updated plot
* of the horizontal filter. This works well with demos/scale to test
* the filter generation.
*
* The plot is all the different subposition filters shuffled
* together. This is misleading in a few cases:
*
* IMPULSE.BOX - goes up and down as the subfilters have different
* numbers of non-zero samples
* IMPULSE.TRIANGLE - somewhat crooked for the same reason
* 1-wide filters - looks triangular, but a 1-wide box would be more
* accurate
*/
static void
gnuplot_filter (int width, int n_phases, const pixman_fixed_t* p)
{
double step;
int i, j;
int first;
step = 1.0 / n_phases;
printf ("set style line 1 lc rgb '#0060ad' lt 1 lw 0.5 pt 7 pi 1 ps 0.5\n");
printf ("plot [x=%g:%g] '-' with linespoints ls 1\n", -width*0.5, width*0.5);
/* Print a point at the origin so that y==0 line is included: */
printf ("0 0\n\n");
/* The position of the first sample of the phase corresponding to
* frac is given by:
*
* ceil (frac - width / 2.0 - 0.5) + 0.5 - frac
*
* We have to find the frac that minimizes this expression.
*
* For odd widths, we have
*
* ceil (frac - width / 2.0 - 0.5) + 0.5 - frac
* = ceil (frac) + K - frac
* = 1 + K - frac
*
* for some K, so this is minimized when frac is maximized and
* strictly growing with frac. So for odd widths, we can simply
* start at the last phase and go backwards.
*
* For even widths, we have
*
* ceil (frac - width / 2.0 - 0.5) + 0.5 - frac
* = ceil (frac - 0.5) + K - frac
*
* The graph for this function (ignoring K) looks like this:
*
* 0.5
* | |\
* | | \
* | | \
* 0 | | \
* |\ |
* | \ |
* | \ |
* -0.5 | \|
* ---------------------------------
* 0 0.5 1
*
* So in this case we need to start with the phase whose frac is
* less than, but as close as possible to 0.5, then go backwards
* until we hit the first phase, then wrap around to the last
* phase and continue backwards.
*
* Which phase is as close as possible 0.5? The locations of the
* sampling point corresponding to the kth phase is given by
* 1/(2 * n_phases) + k / n_phases:
*
* 1/(2 * n_phases) + k / n_phases = 0.5
*
* from which it follows that
*
* k = (n_phases - 1) / 2
*
* rounded down is the phase in question.
*/
if (width & 1)
first = n_phases - 1;
else
first = (n_phases - 1) / 2;
for (j = 0; j < width; ++j)
{
for (i = 0; i < n_phases; ++i)
{
int phase = first - i;
double frac, pos;
if (phase < 0)
phase = n_phases + phase;
frac = step / 2.0 + phase * step;
pos = ceil (frac - width / 2.0 - 0.5) + 0.5 - frac + j;
printf ("%g %g\n",
pos,
pixman_fixed_to_double (*(p + phase * width + j)));
}
}
return params;
printf ("e\n");
fflush (stdout);
}
#endif
/* Create the parameter list for a SEPARABLE_CONVOLUTION filter
* with the given kernels and scale parameters
*/
@ -313,38 +444,35 @@ pixman_filter_create_separable_convolution (int *n_values,
{
double sx = fabs (pixman_fixed_to_double (scale_x));
double sy = fabs (pixman_fixed_to_double (scale_y));
pixman_fixed_t *horz = NULL, *vert = NULL, *params = NULL;
pixman_fixed_t *params;
int subsample_x, subsample_y;
int width, height;
width = filter_width (reconstruct_x, sample_x, sx);
subsample_x = (1 << subsample_bits_x);
height = filter_width (reconstruct_y, sample_y, sy);
subsample_y = (1 << subsample_bits_y);
horz = create_1d_filter (&width, reconstruct_x, sample_x, sx, subsample_x);
vert = create_1d_filter (&height, reconstruct_y, sample_y, sy, subsample_y);
if (!horz || !vert)
goto out;
*n_values = 4 + width * subsample_x + height * subsample_y;
params = malloc (*n_values * sizeof (pixman_fixed_t));
if (!params)
goto out;
return NULL;
params[0] = pixman_int_to_fixed (width);
params[1] = pixman_int_to_fixed (height);
params[2] = pixman_int_to_fixed (subsample_bits_x);
params[3] = pixman_int_to_fixed (subsample_bits_y);
memcpy (params + 4, horz,
width * subsample_x * sizeof (pixman_fixed_t));
memcpy (params + 4 + width * subsample_x, vert,
height * subsample_y * sizeof (pixman_fixed_t));
create_1d_filter (width, reconstruct_x, sample_x, sx, subsample_x,
params + 4);
create_1d_filter (height, reconstruct_y, sample_y, sy, subsample_y,
params + 4 + width * subsample_x);
out:
free (horz);
free (vert);
#ifdef PIXMAN_GNUPLOT
gnuplot_filter(width, subsample_x, params + 4);
#endif
return params;
}

Просмотреть файл

@ -37,43 +37,47 @@
#include <string.h>
#include "pixman-private.h"
static pixman_bool_t
general_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
static void
general_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *info)
{
pixman_image_t *image = iter->image;
if (image->type == LINEAR)
_pixman_linear_gradient_iter_init (image, iter);
else if (image->type == RADIAL)
switch (image->type)
{
case BITS:
if ((iter->iter_flags & ITER_SRC) == ITER_SRC)
_pixman_bits_image_src_iter_init (image, iter);
else
_pixman_bits_image_dest_iter_init (image, iter);
break;
case LINEAR:
_pixman_linear_gradient_iter_init (image, iter);
break;
case RADIAL:
_pixman_radial_gradient_iter_init (image, iter);
else if (image->type == CONICAL)
break;
case CONICAL:
_pixman_conical_gradient_iter_init (image, iter);
else if (image->type == BITS)
_pixman_bits_image_src_iter_init (image, iter);
else if (image->type == SOLID)
break;
case SOLID:
_pixman_log_error (FUNC, "Solid image not handled by noop");
else
break;
default:
_pixman_log_error (FUNC, "Pixman bug: unknown image type\n");
return TRUE;
break;
}
}
static pixman_bool_t
general_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
static const pixman_iter_info_t general_iters[] =
{
if (iter->image->type == BITS)
{
_pixman_bits_image_dest_iter_init (iter->image, iter);
return TRUE;
}
else
{
_pixman_log_error (FUNC, "Trying to write to a non-writable image");
return FALSE;
}
}
{ PIXMAN_any, 0, 0, general_iter_init, NULL, NULL },
{ PIXMAN_null },
};
typedef struct op_info_t op_info_t;
struct op_info_t
@ -105,62 +109,75 @@ static const op_info_t op_flags[PIXMAN_N_OPERATORS] =
#define SCANLINE_BUFFER_LENGTH 8192
static pixman_bool_t
operator_needs_division (pixman_op_t op)
{
static const uint8_t needs_division[] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, /* SATURATE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, /* DISJOINT */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, /* CONJOINT */
0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, /* blend ops */
};
return needs_division[op];
}
static void
general_composite_rect (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint64_t stack_scanline_buffer[(SCANLINE_BUFFER_LENGTH * 3 + 7) / 8];
uint8_t stack_scanline_buffer[3 * SCANLINE_BUFFER_LENGTH];
uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer;
uint8_t *src_buffer, *mask_buffer, *dest_buffer;
pixman_iter_t src_iter, mask_iter, dest_iter;
pixman_combine_32_func_t compose;
pixman_bool_t component_alpha;
iter_flags_t narrow, src_iter_flags;
iter_flags_t rgb16;
iter_flags_t width_flag, src_iter_flags;
int Bpp;
int i;
if ((src_image->common.flags & FAST_PATH_NARROW_FORMAT) &&
(!mask_image || mask_image->common.flags & FAST_PATH_NARROW_FORMAT) &&
(dest_image->common.flags & FAST_PATH_NARROW_FORMAT))
if ((src_image->common.flags & FAST_PATH_NARROW_FORMAT) &&
(!mask_image || mask_image->common.flags & FAST_PATH_NARROW_FORMAT) &&
(dest_image->common.flags & FAST_PATH_NARROW_FORMAT) &&
!(operator_needs_division (op)) &&
(dest_image->bits.dither == PIXMAN_DITHER_NONE))
{
narrow = ITER_NARROW;
width_flag = ITER_NARROW;
Bpp = 4;
}
else
{
narrow = 0;
width_flag = ITER_WIDE;
Bpp = 16;
}
// XXX: This special casing is bad. Ideally, we'd keep the general code general perhaps
// by having it deal more specifically with different intermediate formats
if (
(dest_image->common.flags & FAST_PATH_16_FORMAT && (src_image->type == LINEAR || src_image->type == RADIAL)) &&
( op == PIXMAN_OP_SRC ||
(op == PIXMAN_OP_OVER && (src_image->common.flags & FAST_PATH_IS_OPAQUE))
)
) {
rgb16 = ITER_16;
} else {
rgb16 = 0;
}
#define ALIGN(addr) \
((uint8_t *)((((uintptr_t)(addr)) + 15) & (~15)))
if (width <= 0 || _pixman_multiply_overflows_int (width, Bpp * 3))
return;
if (width * Bpp > SCANLINE_BUFFER_LENGTH)
if (width * Bpp * 3 > sizeof (stack_scanline_buffer) - 15 * 3)
{
scanline_buffer = pixman_malloc_abc (width, 3, Bpp);
scanline_buffer = pixman_malloc_ab_plus_c (width, Bpp * 3, 15 * 3);
if (!scanline_buffer)
return;
memset (scanline_buffer, 0, width * Bpp * 3 + 15 * 3);
}
else
{
memset (stack_scanline_buffer, 0, sizeof (stack_scanline_buffer));
}
src_buffer = scanline_buffer;
mask_buffer = src_buffer + width * Bpp;
dest_buffer = mask_buffer + width * Bpp;
src_buffer = ALIGN (scanline_buffer);
mask_buffer = ALIGN (src_buffer + width * Bpp);
dest_buffer = ALIGN (mask_buffer + width * Bpp);
if (!narrow)
if (width_flag == ITER_WIDE)
{
/* To make sure there aren't any NANs in the buffers */
memset (src_buffer, 0, width * Bpp);
@ -169,11 +186,12 @@ general_composite_rect (pixman_implementation_t *imp,
}
/* src iter */
src_iter_flags = narrow | op_flags[op].src | rgb16;
src_iter_flags = width_flag | op_flags[op].src | ITER_SRC;
_pixman_implementation_src_iter_init (imp->toplevel, &src_iter, src_image,
src_x, src_y, width, height,
src_buffer, src_iter_flags, info->src_flags);
_pixman_implementation_iter_init (imp->toplevel, &src_iter, src_image,
src_x, src_y, width, height,
src_buffer, src_iter_flags,
info->src_flags);
/* mask iter */
if ((src_iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) ==
@ -185,23 +203,21 @@ general_composite_rect (pixman_implementation_t *imp,
mask_image = NULL;
}
component_alpha =
mask_image &&
mask_image->common.type == BITS &&
mask_image->common.component_alpha &&
PIXMAN_FORMAT_RGB (mask_image->bits.format);
component_alpha = mask_image && mask_image->common.component_alpha;
_pixman_implementation_src_iter_init (
imp->toplevel, &mask_iter, mask_image, mask_x, mask_y, width, height,
mask_buffer, narrow | (component_alpha? 0 : ITER_IGNORE_RGB), info->mask_flags);
_pixman_implementation_iter_init (
imp->toplevel, &mask_iter,
mask_image, mask_x, mask_y, width, height, mask_buffer,
ITER_SRC | width_flag | (component_alpha? 0 : ITER_IGNORE_RGB),
info->mask_flags);
/* dest iter */
_pixman_implementation_dest_iter_init (
_pixman_implementation_iter_init (
imp->toplevel, &dest_iter, dest_image, dest_x, dest_y, width, height,
dest_buffer, narrow | op_flags[op].dst | rgb16, info->dest_flags);
dest_buffer, ITER_DEST | width_flag | op_flags[op].dst, info->dest_flags);
compose = _pixman_implementation_lookup_combiner (
imp->toplevel, op, component_alpha, narrow, !!rgb16);
imp->toplevel, op, component_alpha, width_flag != ITER_WIDE);
for (i = 0; i < height; ++i)
{
@ -216,6 +232,13 @@ general_composite_rect (pixman_implementation_t *imp,
dest_iter.write_back (&dest_iter);
}
if (src_iter.fini)
src_iter.fini (&src_iter);
if (mask_iter.fini)
mask_iter.fini (&mask_iter);
if (dest_iter.fini)
dest_iter.fini (&dest_iter);
if (scanline_buffer != (uint8_t *) stack_scanline_buffer)
free (scanline_buffer);
}
@ -231,12 +254,10 @@ _pixman_implementation_create_general (void)
{
pixman_implementation_t *imp = _pixman_implementation_create (NULL, general_fast_path);
_pixman_setup_combiner_functions_16 (imp);
_pixman_setup_combiner_functions_32 (imp);
_pixman_setup_combiner_functions_float (imp);
imp->src_iter_init = general_src_iter_init;
imp->dest_iter_init = general_dest_iter_init;
imp->iter_info = general_iters;
return imp;
}

Просмотреть файл

@ -391,6 +391,9 @@ box32_intersect (pixman_box32_t *dest,
return dest->x2 > dest->x1 && dest->y2 > dest->y1;
}
#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
__attribute__((__force_align_arg_pointer__))
#endif
PIXMAN_EXPORT void
pixman_composite_glyphs_no_mask (pixman_op_t op,
pixman_image_t *src,
@ -630,6 +633,9 @@ out:
* - Trim the mask to the destination clip/image?
* - Trim composite region based on sources, when the op ignores 0s.
*/
#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
__attribute__((__force_align_arg_pointer__))
#endif
PIXMAN_EXPORT void
pixman_composite_glyphs (pixman_op_t op,
pixman_image_t *src,

Просмотреть файл

@ -37,11 +37,14 @@ _pixman_gradient_walker_init (pixman_gradient_walker_t *walker,
walker->stops = gradient->stops;
walker->left_x = 0;
walker->right_x = 0x10000;
walker->stepper = 0;
walker->left_ag = 0;
walker->left_rb = 0;
walker->right_ag = 0;
walker->right_rb = 0;
walker->a_s = 0.0f;
walker->a_b = 0.0f;
walker->r_s = 0.0f;
walker->r_b = 0.0f;
walker->g_s = 0.0f;
walker->g_b = 0.0f;
walker->b_s = 0.0f;
walker->b_b = 0.0f;
walker->repeat = repeat;
walker->need_reset = TRUE;
@ -51,10 +54,13 @@ static void
gradient_walker_reset (pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t pos)
{
int32_t x, left_x, right_x;
int64_t x, left_x, right_x;
pixman_color_t *left_c, *right_c;
int n, count = walker->num_stops;
pixman_gradient_stop_t *stops = walker->stops;
float la, lr, lg, lb;
float ra, rr, rg, rb;
float lx, rx;
if (walker->repeat == PIXMAN_REPEAT_NORMAL)
{
@ -116,57 +122,143 @@ gradient_walker_reset (pixman_gradient_walker_t *walker,
left_c = right_c;
}
walker->left_x = left_x;
walker->right_x = right_x;
walker->left_ag = ((left_c->alpha >> 8) << 16) | (left_c->green >> 8);
walker->left_rb = ((left_c->red & 0xff00) << 8) | (left_c->blue >> 8);
walker->right_ag = ((right_c->alpha >> 8) << 16) | (right_c->green >> 8);
walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8);
/* The alpha/red/green/blue channels are scaled to be in [0, 1].
* This ensures that after premultiplication all channels will
* be in the [0, 1] interval.
*/
la = (left_c->alpha * (1.0f/257.0f));
lr = (left_c->red * (1.0f/257.0f));
lg = (left_c->green * (1.0f/257.0f));
lb = (left_c->blue * (1.0f/257.0f));
if (walker->left_x == walker->right_x ||
(walker->left_ag == walker->right_ag &&
walker->left_rb == walker->right_rb))
ra = (right_c->alpha * (1.0f/257.0f));
rr = (right_c->red * (1.0f/257.0f));
rg = (right_c->green * (1.0f/257.0f));
rb = (right_c->blue * (1.0f/257.0f));
lx = left_x * (1.0f/65536.0f);
rx = right_x * (1.0f/65536.0f);
if (FLOAT_IS_ZERO (rx - lx) || left_x == INT32_MIN || right_x == INT32_MAX)
{
walker->stepper = 0;
walker->a_s = walker->r_s = walker->g_s = walker->b_s = 0.0f;
walker->a_b = (la + ra) / 510.0f;
walker->r_b = (lr + rr) / 510.0f;
walker->g_b = (lg + rg) / 510.0f;
walker->b_b = (lb + rb) / 510.0f;
}
else
{
int32_t width = right_x - left_x;
walker->stepper = ((1 << 24) + width / 2) / width;
float w_rec = 1.0f / (rx - lx);
walker->a_b = (la * rx - ra * lx) * w_rec * (1.0f/255.0f);
walker->r_b = (lr * rx - rr * lx) * w_rec * (1.0f/255.0f);
walker->g_b = (lg * rx - rg * lx) * w_rec * (1.0f/255.0f);
walker->b_b = (lb * rx - rb * lx) * w_rec * (1.0f/255.0f);
walker->a_s = (ra - la) * w_rec * (1.0f/255.0f);
walker->r_s = (rr - lr) * w_rec * (1.0f/255.0f);
walker->g_s = (rg - lg) * w_rec * (1.0f/255.0f);
walker->b_s = (rb - lb) * w_rec * (1.0f/255.0f);
}
walker->left_x = left_x;
walker->right_x = right_x;
walker->need_reset = FALSE;
}
uint32_t
_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t x)
static argb_t
pixman_gradient_walker_pixel_float (pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t x)
{
int dist, idist;
uint32_t t1, t2, a, color;
argb_t f;
float y;
if (walker->need_reset || x < walker->left_x || x >= walker->right_x)
gradient_walker_reset (walker, x);
dist = ((int)(x - walker->left_x) * walker->stepper) >> 16;
idist = 256 - dist;
y = x * (1.0f / 65536.0f);
/* combined INTERPOLATE and premultiply */
t1 = walker->left_rb * idist + walker->right_rb * dist;
t1 = (t1 >> 8) & 0xff00ff;
f.a = walker->a_s * y + walker->a_b;
f.r = f.a * (walker->r_s * y + walker->r_b);
f.g = f.a * (walker->g_s * y + walker->g_b);
f.b = f.a * (walker->b_s * y + walker->b_b);
t2 = walker->left_ag * idist + walker->right_ag * dist;
t2 &= 0xff00ff00;
color = t2 & 0xff000000;
a = t2 >> 24;
t1 = t1 * a + 0x800080;
t1 = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8;
t2 = (t2 >> 8) * a + 0x800080;
t2 = (t2 + ((t2 >> 8) & 0xff00ff));
return (color | (t1 & 0xff00ff) | (t2 & 0xff00));
return f;
}
static uint32_t
pixman_gradient_walker_pixel_32 (pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t x)
{
argb_t f;
float y;
if (walker->need_reset || x < walker->left_x || x >= walker->right_x)
gradient_walker_reset (walker, x);
y = x * (1.0f / 65536.0f);
/* Instead of [0...1] for ARGB, we want [0...255],
* multiply alpha with 255 and the color channels
* also get multiplied by the alpha multiplier.
*
* We don't use pixman_contract_from_float because it causes a 2x
* slowdown to do so, and the values are already normalized,
* so we don't have to worry about values < 0.f or > 1.f
*/
f.a = 255.f * (walker->a_s * y + walker->a_b);
f.r = f.a * (walker->r_s * y + walker->r_b);
f.g = f.a * (walker->g_s * y + walker->g_b);
f.b = f.a * (walker->b_s * y + walker->b_b);
return (((uint32_t)(f.a + .5f) << 24) & 0xff000000) |
(((uint32_t)(f.r + .5f) << 16) & 0x00ff0000) |
(((uint32_t)(f.g + .5f) << 8) & 0x0000ff00) |
(((uint32_t)(f.b + .5f) >> 0) & 0x000000ff);
}
void
_pixman_gradient_walker_write_narrow (pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t x,
uint32_t *buffer)
{
*buffer = pixman_gradient_walker_pixel_32 (walker, x);
}
void
_pixman_gradient_walker_write_wide (pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t x,
uint32_t *buffer)
{
*(argb_t *)buffer = pixman_gradient_walker_pixel_float (walker, x);
}
void
_pixman_gradient_walker_fill_narrow (pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t x,
uint32_t *buffer,
uint32_t *end)
{
register uint32_t color;
color = pixman_gradient_walker_pixel_32 (walker, x);
while (buffer < end)
*buffer++ = color;
}
void
_pixman_gradient_walker_fill_wide (pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t x,
uint32_t *buffer,
uint32_t *end)
{
register argb_t color;
argb_t *buffer_wide = (argb_t *)buffer;
argb_t *end_wide = (argb_t *)end;
color = pixman_gradient_walker_pixel_float (walker, x);
while (buffer_wide < end_wide)
*buffer_wide++ = color;
}

Просмотреть файл

@ -33,25 +33,6 @@
static const pixman_color_t transparent_black = { 0, 0, 0, 0 };
/**
** bug 1293598 - clean up every pointer after free to avoid
** "dereferencing freed memory" problem
**/
#define PIXMAN_POSION
static void
free_memory (void** p)
{
#ifdef PIXMAN_POISON
if (*p) {
#endif
free (*p);
#ifdef PIXMAN_POISON
*p = NULL;
}
#endif
}
static void
gradient_property_changed (pixman_image_t *image)
{
@ -164,8 +145,8 @@ _pixman_image_fini (pixman_image_t *image)
pixman_region32_fini (&common->clip_region);
free_memory (&common->transform);
free_memory (&common->filter_params);
free (common->transform);
free (common->filter_params);
if (common->alpha_map)
pixman_image_unref ((pixman_image_t *)common->alpha_map);
@ -177,8 +158,7 @@ _pixman_image_fini (pixman_image_t *image)
if (image->gradient.stops)
{
/* See _pixman_init_gradient() for an explanation of the - 1 */
void *addr = image->gradient.stops - 1;
free_memory (&addr);
free (image->gradient.stops - 1);
}
/* This will trigger if someone adds a property_changed
@ -189,11 +169,8 @@ _pixman_image_fini (pixman_image_t *image)
image->common.property_changed == gradient_property_changed);
}
if (image->type == BITS && image->bits.free_me) {
free_memory (&image->bits.free_me);
image->bits.bits = NULL;
}
if (image->type == BITS && image->bits.free_me)
free (image->bits.free_me);
return TRUE;
}
@ -233,7 +210,7 @@ pixman_image_unref (pixman_image_t *image)
{
if (_pixman_image_fini (image))
{
free_memory (&image);
free (image);
return TRUE;
}
@ -358,37 +335,47 @@ compute_image_info (pixman_image_t *image)
{
flags |= FAST_PATH_NEAREST_FILTER;
}
else if (
/* affine and integer translation components in matrix ... */
((flags & FAST_PATH_AFFINE_TRANSFORM) &&
!pixman_fixed_frac (image->common.transform->matrix[0][2] |
image->common.transform->matrix[1][2])) &&
(
/* ... combined with a simple rotation */
(flags & (FAST_PATH_ROTATE_90_TRANSFORM |
FAST_PATH_ROTATE_180_TRANSFORM |
FAST_PATH_ROTATE_270_TRANSFORM)) ||
/* ... or combined with a simple non-rotated translation */
(image->common.transform->matrix[0][0] == pixman_fixed_1 &&
image->common.transform->matrix[1][1] == pixman_fixed_1 &&
image->common.transform->matrix[0][1] == 0 &&
image->common.transform->matrix[1][0] == 0)
)
)
else if (flags & FAST_PATH_AFFINE_TRANSFORM)
{
/* FIXME: there are some affine-test failures, showing that
* handling of BILINEAR and NEAREST filter is not quite
* equivalent when getting close to 32K for the translation
* components of the matrix. That's likely some bug, but for
* now just skip BILINEAR->NEAREST optimization in this case.
/* Suppose the transform is
*
* [ t00, t01, t02 ]
* [ t10, t11, t12 ]
* [ 0, 0, 1 ]
*
* and the destination coordinates are (n + 0.5, m + 0.5). Then
* the transformed x coordinate is:
*
* tx = t00 * (n + 0.5) + t01 * (m + 0.5) + t02
* = t00 * n + t01 * m + t02 + (t00 + t01) * 0.5
*
* which implies that if t00, t01 and t02 are all integers
* and (t00 + t01) is odd, then tx will be an integer plus 0.5,
* which means a BILINEAR filter will reduce to NEAREST. The same
* applies in the y direction
*/
pixman_fixed_t magic_limit = pixman_int_to_fixed (30000);
if (image->common.transform->matrix[0][2] <= magic_limit &&
image->common.transform->matrix[1][2] <= magic_limit &&
image->common.transform->matrix[0][2] >= -magic_limit &&
image->common.transform->matrix[1][2] >= -magic_limit)
pixman_fixed_t (*t)[3] = image->common.transform->matrix;
if ((pixman_fixed_frac (
t[0][0] | t[0][1] | t[0][2] |
t[1][0] | t[1][1] | t[1][2]) == 0) &&
(pixman_fixed_to_int (
(t[0][0] + t[0][1]) & (t[1][0] + t[1][1])) % 2) == 1)
{
flags |= FAST_PATH_NEAREST_FILTER;
/* FIXME: there are some affine-test failures, showing that
* handling of BILINEAR and NEAREST filter is not quite
* equivalent when getting close to 32K for the translation
* components of the matrix. That's likely some bug, but for
* now just skip BILINEAR->NEAREST optimization in this case.
*/
pixman_fixed_t magic_limit = pixman_int_to_fixed (30000);
if (image->common.transform->matrix[0][2] <= magic_limit &&
image->common.transform->matrix[1][2] <= magic_limit &&
image->common.transform->matrix[0][2] >= -magic_limit &&
image->common.transform->matrix[1][2] >= -magic_limit)
{
flags |= FAST_PATH_NEAREST_FILTER;
}
}
}
break;
@ -483,10 +470,6 @@ compute_image_info (pixman_image_t *image)
if (PIXMAN_FORMAT_IS_WIDE (image->bits.format))
flags &= ~FAST_PATH_NARROW_FORMAT;
if (image->bits.format == PIXMAN_r5g6b5)
flags |= FAST_PATH_16_FORMAT;
break;
case RADIAL:
@ -529,8 +512,10 @@ compute_image_info (pixman_image_t *image)
break;
}
/* Alpha map */
if (!image->common.alpha_map)
/* Alpha maps are only supported for BITS images, so it's always
* safe to ignore their presense for non-BITS images
*/
if (!image->common.alpha_map || image->type != BITS)
{
flags |= FAST_PATH_NO_ALPHA_MAP;
}
@ -699,6 +684,41 @@ pixman_image_set_repeat (pixman_image_t *image,
image_property_changed (image);
}
PIXMAN_EXPORT void
pixman_image_set_dither (pixman_image_t *image,
pixman_dither_t dither)
{
if (image->type == BITS)
{
if (image->bits.dither == dither)
return;
image->bits.dither = dither;
image_property_changed (image);
}
}
PIXMAN_EXPORT void
pixman_image_set_dither_offset (pixman_image_t *image,
int offset_x,
int offset_y)
{
if (image->type == BITS)
{
if (image->bits.dither_offset_x == offset_x &&
image->bits.dither_offset_y == offset_y)
{
return;
}
image->bits.dither_offset_x = offset_x;
image->bits.dither_offset_y = offset_y;
image_property_changed (image);
}
}
PIXMAN_EXPORT pixman_bool_t
pixman_image_set_filter (pixman_image_t * image,
pixman_filter_t filter,
@ -857,6 +877,10 @@ pixman_image_set_accessors (pixman_image_t * image,
if (image->type == BITS)
{
/* Accessors only work for <= 32 bpp. */
if (PIXMAN_FORMAT_BPP(image->bits.format) > 32)
return_if_fail (!read_func && !write_func);
image->bits.read_func = read_func;
image->bits.write_func = write_func;
@ -936,7 +960,7 @@ _pixman_image_get_solid (pixman_implementation_t *imp,
else if (image->bits.format == PIXMAN_x8r8g8b8)
result = image->bits.bits[0] | 0xff000000;
else if (image->bits.format == PIXMAN_a8)
result = (*(uint8_t *)image->bits.bits) << 24;
result = (uint32_t)(*(uint8_t *)image->bits.bits) << 24;
else
goto otherwise;
}
@ -945,12 +969,15 @@ _pixman_image_get_solid (pixman_implementation_t *imp,
pixman_iter_t iter;
otherwise:
_pixman_implementation_src_iter_init (
_pixman_implementation_iter_init (
imp, &iter, image, 0, 0, 1, 1,
(uint8_t *)&result,
ITER_NARROW, image->common.flags);
ITER_NARROW | ITER_SRC, image->common.flags);
result = *iter.get_scanline (&iter, NULL);
if (iter.fini)
iter.fini (&iter);
}
/* If necessary, convert RGB <--> BGR. */

Просмотреть файл

@ -150,9 +150,16 @@ _pixman_implementation_lookup_composite (pixman_implementation_t *toplevel,
}
/* We should never reach this point */
_pixman_log_error (FUNC, "No known composite function\n");
_pixman_log_error (
FUNC,
"No composite function found\n"
"\n"
"The most likely cause of this is that this system has issues with\n"
"thread local storage\n");
*out_imp = NULL;
*out_func = dummy_composite_rect;
return;
update_cache:
if (i)
@ -186,8 +193,7 @@ pixman_combine_32_func_t
_pixman_implementation_lookup_combiner (pixman_implementation_t *imp,
pixman_op_t op,
pixman_bool_t component_alpha,
pixman_bool_t narrow,
pixman_bool_t rgb16)
pixman_bool_t narrow)
{
while (imp)
{
@ -211,8 +217,6 @@ _pixman_implementation_lookup_combiner (pixman_implementation_t *imp,
f = imp->combine_32_ca[op];
break;
}
if (rgb16)
f = (pixman_combine_32_func_t *)imp->combine_16[op];
if (f)
return f;
@ -281,50 +285,26 @@ _pixman_implementation_fill (pixman_implementation_t *imp,
return FALSE;
}
pixman_bool_t
_pixman_implementation_src_iter_init (pixman_implementation_t *imp,
pixman_iter_t *iter,
pixman_image_t *image,
int x,
int y,
int width,
int height,
uint8_t *buffer,
iter_flags_t iter_flags,
uint32_t image_flags)
static uint32_t *
get_scanline_null (pixman_iter_t *iter, const uint32_t *mask)
{
iter->image = image;
iter->buffer = (uint32_t *)buffer;
iter->x = x;
iter->y = y;
iter->width = width;
iter->height = height;
iter->iter_flags = iter_flags;
iter->image_flags = image_flags;
while (imp)
{
if (imp->src_iter_init && (*imp->src_iter_init) (imp, iter))
return TRUE;
imp = imp->fallback;
}
return FALSE;
return NULL;
}
pixman_bool_t
_pixman_implementation_dest_iter_init (pixman_implementation_t *imp,
pixman_iter_t *iter,
pixman_image_t *image,
int x,
int y,
int width,
int height,
uint8_t *buffer,
iter_flags_t iter_flags,
uint32_t image_flags)
void
_pixman_implementation_iter_init (pixman_implementation_t *imp,
pixman_iter_t *iter,
pixman_image_t *image,
int x,
int y,
int width,
int height,
uint8_t *buffer,
iter_flags_t iter_flags,
uint32_t image_flags)
{
pixman_format_code_t format;
iter->image = image;
iter->buffer = (uint32_t *)buffer;
iter->x = x;
@ -333,16 +313,40 @@ _pixman_implementation_dest_iter_init (pixman_implementation_t *imp,
iter->height = height;
iter->iter_flags = iter_flags;
iter->image_flags = image_flags;
iter->fini = NULL;
if (!iter->image)
{
iter->get_scanline = get_scanline_null;
return;
}
format = iter->image->common.extended_format_code;
while (imp)
{
if (imp->dest_iter_init && (*imp->dest_iter_init) (imp, iter))
return TRUE;
if (imp->iter_info)
{
const pixman_iter_info_t *info;
imp = imp->fallback;
for (info = imp->iter_info; info->format != PIXMAN_null; ++info)
{
if ((info->format == PIXMAN_any || info->format == format) &&
(info->image_flags & image_flags) == info->image_flags &&
(info->iter_flags & iter_flags) == info->iter_flags)
{
iter->get_scanline = info->get_scanline;
iter->write_back = info->write_back;
if (info->initializer)
info->initializer (iter, info);
return;
}
}
}
imp = imp->fallback;
}
return FALSE;
}
pixman_bool_t
@ -376,6 +380,11 @@ _pixman_disabled (const char *name)
return FALSE;
}
static const pixman_fast_path_t empty_fast_path[] =
{
{ PIXMAN_OP_NONE }
};
pixman_implementation_t *
_pixman_choose_implementation (void)
{
@ -393,5 +402,16 @@ _pixman_choose_implementation (void)
imp = _pixman_implementation_create_noop (imp);
if (_pixman_disabled ("wholeops"))
{
pixman_implementation_t *cur;
/* Disable all whole-operation paths except the general one,
* so that optimized iterators are used as much as possible.
*/
for (cur = imp; cur->fallback; cur = cur->fallback)
cur->fast_paths = empty_fast_path;
}
return imp;
}

Просмотреть файл

@ -26,7 +26,6 @@
#ifndef PIXMAN_FAST_PATH_H__
#define PIXMAN_FAST_PATH_H__
#include <stdlib.h>
#include "pixman-private.h"
#define PIXMAN_REPEAT_COVER -1
@ -174,34 +173,6 @@ bilinear_interpolation (uint32_t tl, uint32_t tr,
#else
#ifdef LOW_QUALITY_INTERPOLATION
/* Based on Filter_32_opaque_portable from Skia */
static force_inline uint32_t
bilinear_interpolation(uint32_t a00, uint32_t a01,
uint32_t a10, uint32_t a11,
int x, int y)
{
int xy = x * y;
static const uint32_t mask = 0xff00ff;
int scale = 256 - 16*y - 16*x + xy;
uint32_t lo = (a00 & mask) * scale;
uint32_t hi = ((a00 >> 8) & mask) * scale;
scale = 16*x - xy;
lo += (a01 & mask) * scale;
hi += ((a01 >> 8) & mask) * scale;
scale = 16*y - xy;
lo += (a10 & mask) * scale;
hi += ((a10 >> 8) & mask) * scale;
lo += (a11 & mask) * xy;
hi += ((a11 >> 8) & mask) * xy;
return ((lo >> 8) & mask) | (hi & ~mask);
}
#else
static force_inline uint32_t
bilinear_interpolation (uint32_t tl, uint32_t tr,
uint32_t bl, uint32_t br,
@ -247,10 +218,35 @@ bilinear_interpolation (uint32_t tl, uint32_t tr,
return r;
}
#endif
#endif
#endif // BILINEAR_INTERPOLATION_BITS <= 4
static force_inline argb_t
bilinear_interpolation_float (argb_t tl, argb_t tr,
argb_t bl, argb_t br,
float distx, float disty)
{
float distxy, distxiy, distixy, distixiy;
argb_t r;
distxy = distx * disty;
distxiy = distx * (1.f - disty);
distixy = (1.f - distx) * disty;
distixiy = (1.f - distx) * (1.f - disty);
r.a = tl.a * distixiy + tr.a * distxiy +
bl.a * distixy + br.a * distxy;
r.r = tl.r * distixiy + tr.r * distxiy +
bl.r * distixy + br.r * distxy;
r.g = tl.g * distixiy + tr.g * distxiy +
bl.g * distixy + br.g * distxy;
r.b = tl.b * distixiy + tr.b * distxiy +
bl.b * distixy + br.b * distxy;
return r;
}
/*
* For each scanline fetched from source image with PAD repeat:
* - calculate how many pixels need to be padded on the left side
@ -776,7 +772,8 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp,
#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
/*****************************************************************************/
@ -853,38 +850,8 @@ bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width,
* with 8-bit SIMD multiplication instructions for 8-bit interpolation
* precision.
*/
/* Replace a single "scanline_func" with "fetch_func" & "op_func" to allow optional
* two stage processing (bilinear fetch to a temp buffer, followed by unscaled
* combine), "op_func" may be NULL, in this case we keep old behavior.
* This is ugly and gcc issues some warnings, but works.
*
* An advice: clang has much better error reporting than gcc for deeply nested macros.
*/
#define scanline_func(dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
scanline_buf, mask, src_top, src_bottom, width, \
weight_top, weight_bottom, vx, unit_x, max_vx, zero_src) \
do { \
if (op_func != NULL) \
{ \
fetch_func ((void *)scanline_buf, (mask), (src_top), (src_bottom), (width), \
(weight_top), (weight_bottom), (vx), (unit_x), (max_vx), (zero_src)); \
((void (*)(dst_type_t *, const mask_type_t *, const src_type_t *, int)) op_func)\
((dst), (mask), (src_type_t *)scanline_buf, (width)); \
} \
else \
{ \
fetch_func ((void*)(dst), (mask), (src_top), (src_bottom), (width), (weight_top), \
(weight_bottom), (vx), (unit_x), (max_vx), (zero_src)); \
} \
} while (0)
#define SCANLINE_BUFFER_LENGTH 3072
#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, fetch_func, op_func, src_type_t, \
mask_type_t, dst_type_t, repeat_mode, flags) \
#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
dst_type_t, repeat_mode, flags) \
static void \
fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \
pixman_composite_info_t *info) \
@ -909,9 +876,6 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
pixman_fixed_t src_width_fixed; \
int max_x; \
pixman_bool_t need_src_extension; \
\
uint64_t stack_scanline_buffer[SCANLINE_BUFFER_LENGTH]; \
uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer; \
\
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \
if (flags & FLAG_HAVE_SOLID_MASK) \
@ -984,14 +948,6 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
\
src_width_fixed = pixman_int_to_fixed (src_width); \
} \
\
if (op_func != NULL && width * sizeof(src_type_t) > sizeof(stack_scanline_buffer)) \
{ \
scanline_buffer = pixman_malloc_ab (width, sizeof(src_type_t)); \
\
if (!scanline_buffer) \
return; \
} \
\
while (--height >= 0) \
{ \
@ -1034,18 +990,16 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
{ \
buf1[0] = buf1[1] = src1[0]; \
buf2[0] = buf2[1] = src2[0]; \
scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2, \
0, 0, 0, FALSE); \
scanline_func (dst, mask, \
buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \
dst += left_pad; \
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += left_pad; \
} \
if (width > 0) \
{ \
scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
scanline_buffer, mask, src1, src2, width, weight1, weight2, \
vx, unit_x, 0, FALSE); \
scanline_func (dst, mask, \
src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
dst += width; \
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += width; \
@ -1054,9 +1008,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
{ \
buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \
buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \
scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2, \
0, 0, 0, FALSE); \
scanline_func (dst, mask, \
buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \
} \
} \
else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
@ -1092,9 +1045,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
{ \
buf1[0] = buf1[1] = 0; \
buf2[0] = buf2[1] = 0; \
scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2, \
0, 0, 0, TRUE); \
scanline_func (dst, mask, \
buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \
dst += left_pad; \
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += left_pad; \
@ -1105,8 +1057,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
buf1[1] = src1[0]; \
buf2[0] = 0; \
buf2[1] = src2[0]; \
scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
scanline_buffer, mask, buf1, buf2, left_tz, weight1, weight2, \
scanline_func (dst, mask, \
buf1, buf2, left_tz, weight1, weight2, \
pixman_fixed_frac (vx), unit_x, 0, FALSE); \
dst += left_tz; \
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
@ -1115,9 +1067,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
} \
if (width > 0) \
{ \
scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
scanline_buffer, mask, src1, src2, width, weight1, weight2, \
vx, unit_x, 0, FALSE); \
scanline_func (dst, mask, \
src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
dst += width; \
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += width; \
@ -1129,8 +1080,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
buf1[1] = 0; \
buf2[0] = src2[src_image->bits.width - 1]; \
buf2[1] = 0; \
scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
scanline_buffer, mask, buf1, buf2, right_tz, weight1, weight2, \
scanline_func (dst, mask, \
buf1, buf2, right_tz, weight1, weight2, \
pixman_fixed_frac (vx), unit_x, 0, FALSE); \
dst += right_tz; \
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
@ -1140,9 +1091,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
{ \
buf1[0] = buf1[1] = 0; \
buf2[0] = buf2[1] = 0; \
scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2, \
0, 0, 0, TRUE); \
scanline_func (dst, mask, \
buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \
} \
} \
else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
@ -1204,8 +1154,7 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
if (num_pixels > width_remain) \
num_pixels = width_remain; \
\
scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, \
dst, scanline_buffer, mask, buf1, buf2, num_pixels, \
scanline_func (dst, mask, buf1, buf2, num_pixels, \
weight1, weight2, pixman_fixed_frac(vx), \
unit_x, src_width_fixed, FALSE); \
\
@ -1234,10 +1183,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
if (num_pixels > width_remain) \
num_pixels = width_remain; \
\
scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, \
dst, scanline_buffer, mask, src_line_top, src_line_bottom, \
num_pixels, weight1, weight2, vx, unit_x, src_width_fixed, \
FALSE); \
scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels, \
weight1, weight2, vx, unit_x, src_width_fixed, FALSE); \
\
width_remain -= num_pixels; \
vx += num_pixels * unit_x; \
@ -1250,21 +1197,17 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
} \
else \
{ \
scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
scanline_buffer, mask, \
src_first_line + src_stride * y1, \
scanline_func (dst, mask, src_first_line + src_stride * y1, \
src_first_line + src_stride * y2, width, \
weight1, weight2, vx, unit_x, max_vx, FALSE); \
} \
} \
if (scanline_buffer != (uint8_t *) stack_scanline_buffer) \
free (scanline_buffer); \
}
/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
dst_type_t, repeat_mode, flags) \
FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
dst_type_t, repeat_mode, flags)
#define SCALED_BILINEAR_FLAGS \

Просмотреть файл

@ -31,8 +31,6 @@
#include <stdlib.h>
#include "pixman-private.h"
#include "pixman-dither.h"
static pixman_bool_t
linear_gradient_is_horizontal (pixman_image_t *image,
int x,
@ -91,8 +89,11 @@ linear_gradient_is_horizontal (pixman_image_t *image,
}
static uint32_t *
linear_get_scanline_narrow (pixman_iter_t *iter,
const uint32_t *mask)
linear_get_scanline (pixman_iter_t *iter,
const uint32_t *mask,
int Bpp,
pixman_gradient_walker_write_t write_pixel,
pixman_gradient_walker_fill_t fill_pixel)
{
pixman_image_t *image = iter->image;
int x = iter->x;
@ -105,7 +106,7 @@ linear_get_scanline_narrow (pixman_iter_t *iter,
pixman_fixed_48_16_t dx, dy;
gradient_t *gradient = (gradient_t *)image;
linear_gradient_t *linear = (linear_gradient_t *)image;
uint32_t *end = buffer + width;
uint32_t *end = buffer + width * (Bpp / 4);
pixman_gradient_walker_t walker;
_pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
@ -139,7 +140,7 @@ linear_get_scanline_narrow (pixman_iter_t *iter,
if (l == 0 || unit.vector[2] == 0)
{
/* affine transformation only */
pixman_fixed_32_32_t t, next_inc;
pixman_fixed_32_32_t t, next_inc;
double inc;
if (l == 0 || v.vector[2] == 0)
@ -154,7 +155,7 @@ linear_get_scanline_narrow (pixman_iter_t *iter,
invden = pixman_fixed_1 * (double) pixman_fixed_1 /
(l * (double) v.vector[2]);
v2 = v.vector[2] * (1. / pixman_fixed_1);
t = ((dx * v.vector[0] + dy * v.vector[1]) -
t = ((dx * v.vector[0] + dy * v.vector[1]) -
(dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
inc = (dx * unit.vector[0] + dy * unit.vector[1]) * invden;
}
@ -162,11 +163,7 @@ linear_get_scanline_narrow (pixman_iter_t *iter,
if (((pixman_fixed_32_32_t )(inc * width)) == 0)
{
register uint32_t color;
color = _pixman_gradient_walker_pixel (&walker, t);
while (buffer < end)
*buffer++ = color;
fill_pixel (&walker, t, buffer, end);
}
else
{
@ -177,12 +174,11 @@ linear_get_scanline_narrow (pixman_iter_t *iter,
{
if (!mask || *mask++)
{
*buffer = _pixman_gradient_walker_pixel (&walker,
t + next_inc);
write_pixel (&walker, t + next_inc, buffer);
}
i++;
next_inc = inc * i;
buffer++;
buffer += (Bpp / 4);
}
}
}
@ -204,14 +200,14 @@ linear_get_scanline_narrow (pixman_iter_t *iter,
invden = pixman_fixed_1 * (double) pixman_fixed_1 /
(l * (double) v.vector[2]);
v2 = v.vector[2] * (1. / pixman_fixed_1);
t = ((dx * v.vector[0] + dy * v.vector[1]) -
t = ((dx * v.vector[0] + dy * v.vector[1]) -
(dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
}
*buffer = _pixman_gradient_walker_pixel (&walker, t);
write_pixel (&walker, t, buffer);
}
++buffer;
buffer += (Bpp / 4);
v.vector[0] += unit.vector[0];
v.vector[1] += unit.vector[1];
@ -225,176 +221,30 @@ linear_get_scanline_narrow (pixman_iter_t *iter,
}
static uint32_t *
linear_get_scanline_16 (pixman_iter_t *iter,
const uint32_t *mask)
linear_get_scanline_narrow (pixman_iter_t *iter,
const uint32_t *mask)
{
pixman_image_t *image = iter->image;
int x = iter->x;
int y = iter->y;
int width = iter->width;
uint16_t * buffer = (uint16_t*)iter->buffer;
pixman_bool_t toggle = ((x ^ y) & 1);
pixman_vector_t v, unit;
pixman_fixed_32_32_t l;
pixman_fixed_48_16_t dx, dy;
gradient_t *gradient = (gradient_t *)image;
linear_gradient_t *linear = (linear_gradient_t *)image;
uint16_t *end = buffer + width;
pixman_gradient_walker_t walker;
_pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
/* reference point is the center of the pixel */
v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
v.vector[2] = pixman_fixed_1;
if (image->common.transform)
{
if (!pixman_transform_point_3d (image->common.transform, &v))
return iter->buffer;
unit.vector[0] = image->common.transform->matrix[0][0];
unit.vector[1] = image->common.transform->matrix[1][0];
unit.vector[2] = image->common.transform->matrix[2][0];
}
else
{
unit.vector[0] = pixman_fixed_1;
unit.vector[1] = 0;
unit.vector[2] = 0;
}
dx = linear->p2.x - linear->p1.x;
dy = linear->p2.y - linear->p1.y;
l = dx * dx + dy * dy;
if (l == 0 || unit.vector[2] == 0)
{
/* affine transformation only */
pixman_fixed_32_32_t t, next_inc;
double inc;
if (l == 0 || v.vector[2] == 0)
{
t = 0;
inc = 0;
}
else
{
double invden, v2;
invden = pixman_fixed_1 * (double) pixman_fixed_1 /
(l * (double) v.vector[2]);
v2 = v.vector[2] * (1. / pixman_fixed_1);
t = ((dx * v.vector[0] + dy * v.vector[1]) -
(dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
inc = (dx * unit.vector[0] + dy * unit.vector[1]) * invden;
}
next_inc = 0;
if (((pixman_fixed_32_32_t )(inc * width)) == 0)
{
register uint32_t color;
uint16_t dither_diff;
uint16_t color16;
uint16_t color16b;
color = _pixman_gradient_walker_pixel (&walker, t);
color16 = dither_8888_to_0565(color, toggle);
color16b = dither_8888_to_0565(color, toggle^1);
// compute the difference
dither_diff = color16 ^ color16b;
while (buffer < end) {
*buffer++ = color16;
// use dither_diff to toggle between color16 and color16b
color16 ^= dither_diff;
toggle ^= 1;
}
}
else
{
int i;
i = 0;
while (buffer < end)
{
if (!mask || *mask++)
{
*buffer = dither_8888_to_0565(_pixman_gradient_walker_pixel (&walker,
t + next_inc),
toggle);
}
toggle ^= 1;
i++;
next_inc = inc * i;
buffer++;
}
}
}
else
{
/* projective transformation */
double t;
t = 0;
while (buffer < end)
{
if (!mask || *mask++)
{
if (v.vector[2] != 0)
{
double invden, v2;
invden = pixman_fixed_1 * (double) pixman_fixed_1 /
(l * (double) v.vector[2]);
v2 = v.vector[2] * (1. / pixman_fixed_1);
t = ((dx * v.vector[0] + dy * v.vector[1]) -
(dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
}
*buffer = dither_8888_to_0565(_pixman_gradient_walker_pixel (&walker, t),
toggle);
}
toggle ^= 1;
++buffer;
v.vector[0] += unit.vector[0];
v.vector[1] += unit.vector[1];
v.vector[2] += unit.vector[2];
}
}
iter->y++;
return iter->buffer;
return linear_get_scanline (iter, mask, 4,
_pixman_gradient_walker_write_narrow,
_pixman_gradient_walker_fill_narrow);
}
static uint32_t *
linear_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
{
uint32_t *buffer = linear_get_scanline_narrow (iter, NULL);
pixman_expand_to_float (
(argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
return buffer;
return linear_get_scanline (iter, NULL, 16,
_pixman_gradient_walker_write_wide,
_pixman_gradient_walker_fill_wide);
}
void
_pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter)
{
// XXX: we can't use this optimization when dithering
if (0 && linear_gradient_is_horizontal (
if (linear_gradient_is_horizontal (
iter->image, iter->x, iter->y, iter->width, iter->height))
{
if (iter->iter_flags & ITER_16)
linear_get_scanline_16 (iter, NULL);
else if (iter->iter_flags & ITER_NARROW)
if (iter->iter_flags & ITER_NARROW)
linear_get_scanline_narrow (iter, NULL);
else
linear_get_scanline_wide (iter, NULL);
@ -403,9 +253,7 @@ _pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter)
}
else
{
if (iter->iter_flags & ITER_16)
iter->get_scanline = linear_get_scanline_16;
else if (iter->iter_flags & ITER_NARROW)
if (iter->iter_flags & ITER_NARROW)
iter->get_scanline = linear_get_scanline_narrow;
else
iter->get_scanline = linear_get_scanline_wide;

Просмотреть файл

@ -37,7 +37,7 @@
static force_inline int
count_leading_zeros (uint32_t x)
{
#ifdef __GNUC__
#ifdef HAVE_BUILTIN_CLZ
return __builtin_clz (x);
#else
int n = 0;
@ -273,7 +273,7 @@ pixman_transform_point_31_16 (const pixman_transform_t *t,
{
/* the divisor is small, we can actually keep all the bits */
int64_t hi, rhi, lo, rlo;
int64_t div = (divint << 16) + divfrac;
int64_t div = ((uint64_t)divint << 16) + divfrac;
fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32);
rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Author: Nemanja Lukic (nlukic@mips.com)
* Author: Nemanja Lukic (nemanja.lukic@rt-rk.com)
*/
#ifndef PIXMAN_MIPS_DSPR2_ASM_H
@ -72,6 +72,7 @@
#define LEAF_MIPS32R2(symbol) \
.globl symbol; \
.align 2; \
.hidden symbol; \
.type symbol, @function; \
.ent symbol, 0; \
symbol: .frame sp, 0, ra; \
@ -354,17 +355,16 @@ LEAF_MIPS32R2(symbol) \
out1_565, out2_565, \
maskR, maskG, maskB, \
scratch1, scratch2
precrq.ph.w \scratch1, \in2_8888, \in1_8888
precr_sra.ph.w \in2_8888, \in1_8888, 0
shll.ph \scratch1, \scratch1, 8
srl \in2_8888, \in2_8888, 3
and \scratch2, \in2_8888, \maskB
and \scratch1, \scratch1, \maskR
srl \in2_8888, \in2_8888, 2
and \out2_565, \in2_8888, \maskG
or \out2_565, \out2_565, \scratch2
or \out1_565, \out2_565, \scratch1
srl \out2_565, \out1_565, 16
precr.qb.ph \scratch1, \in2_8888, \in1_8888
precrq.qb.ph \in2_8888, \in2_8888, \in1_8888
and \out1_565, \scratch1, \maskR
shrl.ph \scratch1, \scratch1, 3
shll.ph \in2_8888, \in2_8888, 3
and \scratch1, \scratch1, \maskB
or \out1_565, \out1_565, \scratch1
and \in2_8888, \in2_8888, \maskG
or \out1_565, \out1_565, \in2_8888
srl \out2_565, \out1_565, 16
.endm
/*
@ -587,6 +587,36 @@ LEAF_MIPS32R2(symbol) \
addu_s.qb \out_8888, \out_8888, \s_8888
.endm
/*
* OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two
* a8r8g8b8 destination pixels (d1_8888 and d2_8888). It also requires maskLSR
* needed for rounding process. maskLSR must have following value:
* li maskLSR, 0x00ff00ff
*/
.macro OVER_2x8888_2x8888 s1_8888, \
s2_8888, \
d1_8888, \
d2_8888, \
out1_8888, \
out2_8888, \
maskLSR, \
scratch1, scratch2, scratch3, \
scratch4, scratch5, scratch6
not \scratch1, \s1_8888
srl \scratch1, \scratch1, 24
not \scratch2, \s2_8888
srl \scratch2, \scratch2, 24
MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \
\scratch1, \scratch2, \
\out1_8888, \out2_8888, \
\maskLSR, \
\scratch3, \scratch4, \scratch5, \
\scratch6, \d1_8888, \d2_8888
addu_s.qb \out1_8888, \out1_8888, \s1_8888
addu_s.qb \out2_8888, \out2_8888, \s2_8888
.endm
.macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888, \
m_8, \
d_8888, \

Просмотреть файл

@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Author: Nemanja Lukic (nlukic@mips.com)
* Author: Nemanja Lukic (nemanja.lukic@rt-rk.com)
*/
#ifdef HAVE_CONFIG_H
@ -48,8 +48,20 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_8888_8888,
uint32_t, 1, uint32_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888,
uint8_t, 3, uint8_t, 3)
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_8888_rev,
uint8_t, 3, uint32_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_0565_rev,
uint8_t, 3, uint16_t, 1)
#endif
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_pixbuf_8888,
uint32_t, 1, uint32_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_rpixbuf_8888,
uint32_t, 1, uint32_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_8888,
uint32_t, 1, uint32_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_0565,
uint32_t, 1, uint16_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8_8,
uint8_t, 1, uint8_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8888_8888,
@ -67,6 +79,8 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_8888_ca,
uint32_t, 1, uint32_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_0565_ca,
uint32_t, 1, uint16_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8,
uint8_t, 1, uint8_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888,
uint8_t, 1, uint32_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565,
@ -111,6 +125,13 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_0565_8_0565, uint16_t, 1,
PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8888_8888, uint32_t, 1,
uint32_t, 1, uint32_t, 1)
PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_8888, OVER,
uint32_t, uint32_t)
PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_0565, OVER,
uint32_t, uint16_t)
PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (0565_8888, SRC,
uint16_t, uint32_t)
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC,
uint32_t, uint32_t)
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC,
@ -278,6 +299,14 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888),
PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888),
PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, mips_composite_src_0888_0888),
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, mips_composite_src_0888_8888_rev),
PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, mips_composite_src_0888_0565_rev),
#endif
PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, mips_composite_src_pixbuf_8888),
PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, mips_composite_src_rpixbuf_8888),
PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, mips_composite_src_rpixbuf_8888),
PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, mips_composite_src_pixbuf_8888),
PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, mips_composite_src_n_8_8888),
PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, mips_composite_src_n_8_8888),
PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, mips_composite_src_n_8_8888),
@ -290,6 +319,7 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, mips_composite_over_n_8888_8888_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, mips_composite_over_n_8888_0565_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, mips_composite_over_n_8888_0565_ca),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, mips_composite_over_n_8_8),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, mips_composite_over_n_8_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, mips_composite_over_n_8_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, mips_composite_over_n_8_8888),
@ -318,6 +348,8 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, mips_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, mips_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, mips_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, mips_composite_over_8888_0565),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, mips_composite_over_8888_0565),
PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, mips_composite_add_n_8_8),
PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, mips_composite_add_n_8_8888),
PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, mips_composite_add_n_8_8888),
@ -340,11 +372,27 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, mips_composite_over_reverse_n_8888),
PIXMAN_STD_FAST_PATH (IN, solid, null, a8, mips_composite_in_n_8),
PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565),
PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565),
PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8888),
PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mips_8888_8888),
PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8888),
PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mips_8888_8888),
PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, mips_0565_8_0565),
PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, mips_0565_8_0565),
PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_0565),
PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_0565),
PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, mips_0565_8888),
PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888),
/* Note: NONE repeat is not supported yet */
SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, mips_0565_8888),
SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, mips_0565_8888),
SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, mips_0565_8888),
SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, mips_0565_8888),
SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565),
SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565),
SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, mips_0565_8_0565),
SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, mips_0565_8_0565),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888),

Просмотреть файл

@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Author: Nemanja Lukic (nlukic@mips.com)
* Author: Nemanja Lukic (nemanja.lukic@rt-rk.com)
*/
#ifndef PIXMAN_MIPS_DSPR2_H
@ -246,6 +246,48 @@ mips_composite_##name (pixman_implementation_t *imp, \
} \
}
/****************************************************************************/
#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST(name, op, \
src_type, dst_type) \
void \
pixman_scaled_nearest_scanline_##name##_##op##_asm_mips ( \
dst_type * dst, \
const src_type * src, \
int32_t w, \
pixman_fixed_t vx, \
pixman_fixed_t unit_x); \
\
static force_inline void \
scaled_nearest_scanline_mips_##name##_##op (dst_type * pd, \
const src_type * ps, \
int32_t w, \
pixman_fixed_t vx, \
pixman_fixed_t unit_x, \
pixman_fixed_t max_vx, \
pixman_bool_t zero_src) \
{ \
pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps, w, \
vx, unit_x); \
} \
\
FAST_NEAREST_MAINLOOP (mips_##name##_cover_##op, \
scaled_nearest_scanline_mips_##name##_##op, \
src_type, dst_type, COVER) \
FAST_NEAREST_MAINLOOP (mips_##name##_none_##op, \
scaled_nearest_scanline_mips_##name##_##op, \
src_type, dst_type, NONE) \
FAST_NEAREST_MAINLOOP (mips_##name##_pad_##op, \
scaled_nearest_scanline_mips_##name##_##op, \
src_type, dst_type, PAD)
/* Provide entries for the fast path table */
#define PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func)
/*****************************************************************************/
#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST(flags, name, op, \
@ -286,12 +328,6 @@ FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_pad_##op, \
scaled_nearest_scanline_mips_##name##_##op, \
src_type, uint8_t, dst_type, PAD, TRUE, FALSE)
/* Provide entries for the fast path table */
#define PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
/****************************************************************************/
#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op, \

Просмотреть файл

@ -44,8 +44,6 @@
#include "pixman-combine32.h"
#include "pixman-inlines.h"
#define no_vERBOSE
#ifdef VERBOSE
#define CHECKPOINT() error_f ("at %s %d\n", __FUNCTION__, __LINE__)
#else
@ -91,21 +89,7 @@ _mm_mulhi_pu16 (__m64 __A, __m64 __B)
return __A;
}
# ifdef __OPTIMIZE__
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_pi16 (__m64 __A, int8_t const __N)
{
__m64 ret;
asm ("pshufw %2, %1, %0\n\t"
: "=y" (ret)
: "y" (__A), "K" (__N)
);
return ret;
}
# else
# define _mm_shuffle_pi16(A, N) \
# define _mm_shuffle_pi16(A, N) \
({ \
__m64 ret; \
\
@ -116,7 +100,6 @@ _mm_shuffle_pi16 (__m64 __A, int8_t const __N)
\
ret; \
})
# endif
# endif
#endif
@ -303,6 +286,29 @@ negate (__m64 mask)
return _mm_xor_si64 (mask, MC (4x00ff));
}
/* Computes the product of two unsigned fixed-point 8-bit values from 0 to 1
* and maps its result to the same range.
*
* Jim Blinn gives multiple ways to compute this in "Jim Blinn's Corner:
* Notation, Notation, Notation", the first of which is
*
* prod(a, b) = (a * b + 128) / 255.
*
* By approximating the division by 255 as 257/65536 it can be replaced by a
* multiply and a right shift. This is the implementation that we use in
* pix_multiply(), but we _mm_mulhi_pu16() by 257 (part of SSE1 or Extended
* 3DNow!, and unavailable at the time of the book's publication) to perform
* the multiplication and right shift in a single operation.
*
* prod(a, b) = ((a * b + 128) * 257) >> 16.
*
* A third way (how pix_multiply() was implemented prior to 14208344) exists
* also that performs the multiplication by 257 with adds and shifts.
*
* Where temp = a * b + 128
*
* prod(a, b) = (temp + (temp >> 8)) >> 8.
*/
static force_inline __m64
pix_multiply (__m64 a, __m64 b)
{
@ -381,8 +387,10 @@ in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest)
static force_inline __m64 ldq_u(__m64 *p)
{
#ifdef USE_X86_MMX
/* x86's alignment restrictions are very relaxed. */
return *(__m64 *)p;
/* x86's alignment restrictions are very relaxed, but that's no excuse */
__m64 r;
memcpy(&r, p, sizeof(__m64));
return r;
#elif defined USE_ARM_IWMMXT
int align = (uintptr_t)p & 7;
__m64 *aligned_p;
@ -401,7 +409,9 @@ static force_inline uint32_t ldl_u(const uint32_t *p)
{
#ifdef USE_X86_MMX
/* x86's alignment restrictions are very relaxed. */
return *p;
uint32_t r;
memcpy(&r, p, sizeof(uint32_t));
return r;
#else
struct __una_u32 { uint32_t x __attribute__((packed)); };
const struct __una_u32 *ptr = (const struct __una_u32 *) p;
@ -3534,13 +3544,111 @@ mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
_mm_empty ();
}
static force_inline void
scaled_nearest_scanline_mmx_8888_8888_OVER (uint32_t* pd,
const uint32_t* ps,
int32_t w,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
pixman_fixed_t src_width_fixed,
pixman_bool_t fully_transparent_src)
{
if (fully_transparent_src)
return;
while (w)
{
__m64 d = load (pd);
__m64 s = load (ps + pixman_fixed_to_int (vx));
vx += unit_x;
while (vx >= 0)
vx -= src_width_fixed;
store8888 (pd, core_combine_over_u_pixel_mmx (s, d));
pd++;
w--;
}
_mm_empty ();
}
FAST_NEAREST_MAINLOOP (mmx_8888_8888_cover_OVER,
scaled_nearest_scanline_mmx_8888_8888_OVER,
uint32_t, uint32_t, COVER)
FAST_NEAREST_MAINLOOP (mmx_8888_8888_none_OVER,
scaled_nearest_scanline_mmx_8888_8888_OVER,
uint32_t, uint32_t, NONE)
FAST_NEAREST_MAINLOOP (mmx_8888_8888_pad_OVER,
scaled_nearest_scanline_mmx_8888_8888_OVER,
uint32_t, uint32_t, PAD)
FAST_NEAREST_MAINLOOP (mmx_8888_8888_normal_OVER,
scaled_nearest_scanline_mmx_8888_8888_OVER,
uint32_t, uint32_t, NORMAL)
static force_inline void
scaled_nearest_scanline_mmx_8888_n_8888_OVER (const uint32_t * mask,
uint32_t * dst,
const uint32_t * src,
int32_t w,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
pixman_fixed_t src_width_fixed,
pixman_bool_t zero_src)
{
__m64 mm_mask;
if (zero_src || (*mask >> 24) == 0)
{
/* A workaround for https://gcc.gnu.org/PR47759 */
_mm_empty ();
return;
}
mm_mask = expand_alpha (load8888 (mask));
while (w)
{
uint32_t s = *(src + pixman_fixed_to_int (vx));
vx += unit_x;
while (vx >= 0)
vx -= src_width_fixed;
if (s)
{
__m64 ms = load8888 (&s);
__m64 alpha = expand_alpha (ms);
__m64 dest = load8888 (dst);
store8888 (dst, (in_over (ms, alpha, mm_mask, dest)));
}
dst++;
w--;
}
_mm_empty ();
}
FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_cover_OVER,
scaled_nearest_scanline_mmx_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)
FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_pad_OVER,
scaled_nearest_scanline_mmx_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)
FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_none_OVER,
scaled_nearest_scanline_mmx_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_normal_OVER,
scaled_nearest_scanline_mmx_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE)
#define BSHIFT ((1 << BILINEAR_INTERPOLATION_BITS))
#define BMSK (BSHIFT - 1)
#define BILINEAR_DECLARE_VARIABLES \
const __m64 mm_wt = _mm_set_pi16 (wt, wt, wt, wt); \
const __m64 mm_wb = _mm_set_pi16 (wb, wb, wb, wb); \
const __m64 mm_BSHIFT = _mm_set_pi16 (BSHIFT, BSHIFT, BSHIFT, BSHIFT); \
const __m64 mm_addc7 = _mm_set_pi16 (0, 1, 0, 1); \
const __m64 mm_xorc7 = _mm_set_pi16 (0, BMSK, 0, BMSK); \
const __m64 mm_ux = _mm_set_pi16 (unit_x, unit_x, unit_x, unit_x); \
@ -3559,36 +3667,16 @@ do { \
__m64 b_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (b, mm_zero), mm_wb); \
__m64 hi = _mm_add_pi16 (t_hi, b_hi); \
__m64 lo = _mm_add_pi16 (t_lo, b_lo); \
vx += unit_x; \
if (BILINEAR_INTERPOLATION_BITS < 8) \
{ \
/* calculate horizontal weights */ \
__m64 mm_wh = _mm_add_pi16 (mm_addc7, _mm_xor_si64 (mm_xorc7, \
/* calculate horizontal weights */ \
__m64 mm_wh = _mm_add_pi16 (mm_addc7, _mm_xor_si64 (mm_xorc7, \
_mm_srli_pi16 (mm_x, \
16 - BILINEAR_INTERPOLATION_BITS))); \
/* horizontal interpolation */ \
__m64 p = _mm_unpacklo_pi16 (lo, hi); \
__m64 q = _mm_unpackhi_pi16 (lo, hi); \
lo = _mm_madd_pi16 (p, mm_wh); \
hi = _mm_madd_pi16 (q, mm_wh); \
} \
else \
{ \
/* calculate horizontal weights */ \
__m64 mm_wh_lo = _mm_sub_pi16 (mm_BSHIFT, _mm_srli_pi16 (mm_x, \
16 - BILINEAR_INTERPOLATION_BITS)); \
__m64 mm_wh_hi = _mm_srli_pi16 (mm_x, \
16 - BILINEAR_INTERPOLATION_BITS); \
/* horizontal interpolation */ \
__m64 mm_lo_lo = _mm_mullo_pi16 (lo, mm_wh_lo); \
__m64 mm_lo_hi = _mm_mullo_pi16 (hi, mm_wh_hi); \
__m64 mm_hi_lo = _mm_mulhi_pu16 (lo, mm_wh_lo); \
__m64 mm_hi_hi = _mm_mulhi_pu16 (hi, mm_wh_hi); \
lo = _mm_add_pi32 (_mm_unpacklo_pi16 (mm_lo_lo, mm_hi_lo), \
_mm_unpacklo_pi16 (mm_lo_hi, mm_hi_hi)); \
hi = _mm_add_pi32 (_mm_unpackhi_pi16 (mm_lo_lo, mm_hi_lo), \
_mm_unpackhi_pi16 (mm_lo_hi, mm_hi_hi)); \
} \
/* horizontal interpolation */ \
__m64 p = _mm_unpacklo_pi16 (lo, hi); \
__m64 q = _mm_unpackhi_pi16 (lo, hi); \
vx += unit_x; \
lo = _mm_madd_pi16 (p, mm_wh); \
hi = _mm_madd_pi16 (q, mm_wh); \
mm_x = _mm_add_pi16 (mm_x, mm_ux); \
/* shift and pack the result */ \
hi = _mm_srli_pi32 (hi, BILINEAR_INTERPOLATION_BITS * 2); \
@ -3866,7 +3954,7 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
while (w && (((uintptr_t)dst) & 15))
{
*dst++ = *(src++) << 24;
*dst++ = (uint32_t)*(src++) << 24;
w--;
}
@ -3893,7 +3981,7 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
while (w)
{
*dst++ = *(src++) << 24;
*dst++ = (uint32_t)*(src++) << 24;
w--;
}
@ -3901,52 +3989,23 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
return iter->buffer;
}
typedef struct
{
pixman_format_code_t format;
pixman_iter_get_scanline_t get_scanline;
} fetcher_info_t;
static const fetcher_info_t fetchers[] =
{
{ PIXMAN_x8r8g8b8, mmx_fetch_x8r8g8b8 },
{ PIXMAN_r5g6b5, mmx_fetch_r5g6b5 },
{ PIXMAN_a8, mmx_fetch_a8 },
{ PIXMAN_null }
};
static pixman_bool_t
mmx_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
{
pixman_image_t *image = iter->image;
#define FLAGS \
#define IMAGE_FLAGS \
(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
if ((iter->iter_flags & ITER_NARROW) &&
(iter->image_flags & FLAGS) == FLAGS)
{
const fetcher_info_t *f;
for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
{
if (image->common.extended_format_code == f->format)
{
uint8_t *b = (uint8_t *)image->bits.bits;
int s = image->bits.rowstride * 4;
iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
iter->stride = s;
iter->get_scanline = f->get_scanline;
return TRUE;
}
}
}
return FALSE;
}
static const pixman_iter_info_t mmx_iters[] =
{
{ PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,
_pixman_iter_init_bits_stride, mmx_fetch_x8r8g8b8, NULL
},
{ PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW,
_pixman_iter_init_bits_stride, mmx_fetch_r5g6b5, NULL
},
{ PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,
_pixman_iter_init_bits_stride, mmx_fetch_a8, NULL
},
{ PIXMAN_null },
};
static const pixman_fast_path_t mmx_fast_paths[] =
{
@ -4024,6 +4083,16 @@ static const pixman_fast_path_t mmx_fast_paths[] =
PIXMAN_STD_FAST_PATH (IN, a8, null, a8, mmx_composite_in_8_8 ),
PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, mmx_composite_in_n_8_8 ),
SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ),
SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8888 ),
SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ),
SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8888 ),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_n_8888 ),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_n_8888 ),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_n_8888 ),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_n_8888 ),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mmx_8888_8888 ),
@ -4076,7 +4145,7 @@ _pixman_implementation_create_mmx (pixman_implementation_t *fallback)
imp->blt = mmx_blt;
imp->fill = mmx_fill;
imp->src_iter_init = mmx_src_iter_init;
imp->iter_info = mmx_iters;
return imp;
}

Просмотреть файл

@ -37,12 +37,6 @@ noop_composite (pixman_implementation_t *imp,
return;
}
static void
dest_write_back_direct (pixman_iter_t *iter)
{
iter->buffer += iter->image->bits.rowstride;
}
static uint32_t *
noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask)
{
@ -53,110 +47,102 @@ noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask)
return result;
}
static uint32_t *
get_scanline_null (pixman_iter_t *iter, const uint32_t *mask)
{
return NULL;
static void
noop_init_solid_narrow (pixman_iter_t *iter,
const pixman_iter_info_t *info)
{
pixman_image_t *image = iter->image;
uint32_t *buffer = iter->buffer;
uint32_t *end = buffer + iter->width;
uint32_t color;
if (iter->image->type == SOLID)
color = image->solid.color_32;
else
color = image->bits.fetch_pixel_32 (&image->bits, 0, 0);
while (buffer < end)
*(buffer++) = color;
}
static pixman_bool_t
noop_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
static void
noop_init_solid_wide (pixman_iter_t *iter,
const pixman_iter_info_t *info)
{
pixman_image_t *image = iter->image;
argb_t *buffer = (argb_t *)iter->buffer;
argb_t *end = buffer + iter->width;
argb_t color;
if (iter->image->type == SOLID)
color = image->solid.color_float;
else
color = image->bits.fetch_pixel_float (&image->bits, 0, 0);
while (buffer < end)
*(buffer++) = color;
}
static void
noop_init_direct_buffer (pixman_iter_t *iter, const pixman_iter_info_t *info)
{
pixman_image_t *image = iter->image;
#define FLAGS \
(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM)
if (!image)
{
iter->get_scanline = get_scanline_null;
}
else if ((iter->iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) ==
(ITER_IGNORE_ALPHA | ITER_IGNORE_RGB))
{
iter->get_scanline = _pixman_iter_get_scanline_noop;
}
else if (image->common.extended_format_code == PIXMAN_solid &&
(iter->image->type == SOLID ||
(iter->image_flags & FAST_PATH_NO_ALPHA_MAP)))
{
if (iter->iter_flags & ITER_NARROW)
{
uint32_t *buffer = iter->buffer;
uint32_t *end = buffer + iter->width;
uint32_t color;
if (image->type == SOLID)
color = image->solid.color_32;
else
color = image->bits.fetch_pixel_32 (&image->bits, 0, 0);
while (buffer < end)
*(buffer++) = color;
}
else
{
argb_t *buffer = (argb_t *)iter->buffer;
argb_t *end = buffer + iter->width;
argb_t color;
if (image->type == SOLID)
color = image->solid.color_float;
else
color = image->bits.fetch_pixel_float (&image->bits, 0, 0);
while (buffer < end)
*(buffer++) = color;
}
iter->get_scanline = _pixman_iter_get_scanline_noop;
}
else if (image->common.extended_format_code == PIXMAN_a8r8g8b8 &&
(iter->iter_flags & ITER_NARROW) &&
(iter->image_flags & FLAGS) == FLAGS &&
iter->x >= 0 && iter->y >= 0 &&
iter->x + iter->width <= image->bits.width &&
iter->y + iter->height <= image->bits.height)
{
iter->buffer =
image->bits.bits + iter->y * image->bits.rowstride + iter->x;
iter->get_scanline = noop_get_scanline;
}
else
{
return FALSE;
}
return TRUE;
iter->buffer =
image->bits.bits + iter->y * image->bits.rowstride + iter->x;
}
static pixman_bool_t
noop_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
static void
dest_write_back_direct (pixman_iter_t *iter)
{
pixman_image_t *image = iter->image;
uint32_t image_flags = iter->image_flags;
uint32_t iter_flags = iter->iter_flags;
if ((image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS &&
(iter_flags & ITER_NARROW) == ITER_NARROW &&
((image->common.extended_format_code == PIXMAN_a8r8g8b8) ||
(image->common.extended_format_code == PIXMAN_x8r8g8b8 &&
(iter_flags & (ITER_LOCALIZED_ALPHA)))))
{
iter->buffer = image->bits.bits + iter->y * image->bits.rowstride + iter->x;
iter->get_scanline = _pixman_iter_get_scanline_noop;
iter->write_back = dest_write_back_direct;
return TRUE;
}
else
{
return FALSE;
}
iter->buffer += iter->image->bits.rowstride;
}
static const pixman_iter_info_t noop_iters[] =
{
/* Source iters */
{ PIXMAN_any,
0, ITER_IGNORE_ALPHA | ITER_IGNORE_RGB | ITER_SRC,
NULL,
_pixman_iter_get_scanline_noop,
NULL
},
{ PIXMAN_solid,
FAST_PATH_NO_ALPHA_MAP, ITER_NARROW | ITER_SRC,
noop_init_solid_narrow,
_pixman_iter_get_scanline_noop,
NULL,
},
{ PIXMAN_solid,
FAST_PATH_NO_ALPHA_MAP, ITER_WIDE | ITER_SRC,
noop_init_solid_wide,
_pixman_iter_get_scanline_noop,
NULL
},
{ PIXMAN_a8r8g8b8,
FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |
FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,
ITER_NARROW | ITER_SRC,
noop_init_direct_buffer,
noop_get_scanline,
NULL
},
/* Dest iters */
{ PIXMAN_a8r8g8b8,
FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST,
noop_init_direct_buffer,
_pixman_iter_get_scanline_noop,
dest_write_back_direct
},
{ PIXMAN_x8r8g8b8,
FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST | ITER_LOCALIZED_ALPHA,
noop_init_direct_buffer,
_pixman_iter_get_scanline_noop,
dest_write_back_direct
},
{ PIXMAN_null },
};
static const pixman_fast_path_t noop_fast_paths[] =
{
{ PIXMAN_OP_DST, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, noop_composite },
@ -169,8 +155,7 @@ _pixman_implementation_create_noop (pixman_implementation_t *fallback)
pixman_implementation_t *imp =
_pixman_implementation_create (fallback, noop_fast_paths);
imp->src_iter_init = noop_src_iter_init;
imp->dest_iter_init = noop_dest_iter_init;
imp->iter_info = noop_iters;
return imp;
}

Просмотреть файл

@ -5,17 +5,8 @@
* The defines which are shared between C and assembly code
*/
/* bilinear interpolation precision (must be <= 8) */
#ifndef MOZILLA_VERSION
#error "Need mozilla headers"
#endif
#ifdef MOZ_GFX_OPTIMIZE_MOBILE
#define LOW_QUALITY_INTERPOLATION
#define LOWER_QUALITY_INTERPOLATION
#define BILINEAR_INTERPOLATION_BITS 4
#else
/* bilinear interpolation precision (must be < 8) */
#define BILINEAR_INTERPOLATION_BITS 7
#endif
#define BILINEAR_INTERPOLATION_RANGE (1 << BILINEAR_INTERPOLATION_BITS)
/*
@ -37,6 +28,7 @@
#include <stdio.h>
#include <string.h>
#include <stddef.h>
#include <float.h>
#include "pixman-compiler.h"
@ -64,7 +56,7 @@ struct argb_t
float b;
};
typedef void (*fetch_scanline_t) (pixman_image_t *image,
typedef void (*fetch_scanline_t) (bits_image_t *image,
int x,
int y,
int width,
@ -188,7 +180,9 @@ struct bits_image
uint32_t * free_me;
int rowstride; /* in number of uint32_t's */
fetch_scanline_t fetch_scanline_16;
pixman_dither_t dither;
uint32_t dither_offset_y;
uint32_t dither_offset_x;
fetch_scanline_t fetch_scanline_32;
fetch_pixel_32_t fetch_pixel_32;
@ -198,8 +192,6 @@ struct bits_image
fetch_pixel_float_t fetch_pixel_float;
store_scanline_t store_scanline_float;
store_scanline_t store_scanline_16;
/* Used for indirect access to the bits */
pixman_read_memory_func_t read_func;
pixman_write_memory_func_t write_func;
@ -220,10 +212,12 @@ union pixman_image
typedef struct pixman_iter_t pixman_iter_t;
typedef uint32_t *(* pixman_iter_get_scanline_t) (pixman_iter_t *iter, const uint32_t *mask);
typedef void (* pixman_iter_write_back_t) (pixman_iter_t *iter);
typedef void (* pixman_iter_fini_t) (pixman_iter_t *iter);
typedef enum
{
ITER_NARROW = (1 << 0),
ITER_NARROW = (1 << 0),
ITER_WIDE = (1 << 1),
/* "Localized alpha" is when the alpha channel is used only to compute
* the alpha value of the destination. This means that the computation
@ -240,16 +234,15 @@ typedef enum
* we can treat it as if it were ARGB, which means in some cases we can
* avoid copying it to a temporary buffer.
*/
ITER_LOCALIZED_ALPHA = (1 << 1),
ITER_IGNORE_ALPHA = (1 << 2),
ITER_IGNORE_RGB = (1 << 3),
ITER_LOCALIZED_ALPHA = (1 << 2),
ITER_IGNORE_ALPHA = (1 << 3),
ITER_IGNORE_RGB = (1 << 4),
/* With the addition of ITER_16 we now have two flags that to represent
* 3 pipelines. This means that there can be an invalid state when
* both ITER_NARROW and ITER_16 are set. In this case
* ITER_16 overrides NARROW and we should use the 16 bit pipeline.
* Note: ITER_16 still has a 32 bit mask, which is a bit weird. */
ITER_16 = (1 << 4)
/* These indicate whether the iterator is for a source
* or a destination image
*/
ITER_SRC = (1 << 5),
ITER_DEST = (1 << 6)
} iter_flags_t;
struct pixman_iter_t
@ -266,6 +259,7 @@ struct pixman_iter_t
/* These function pointers are initialized by the implementation */
pixman_iter_get_scanline_t get_scanline;
pixman_iter_write_back_t write_back;
pixman_iter_fini_t fini;
/* These fields are scratch data that implementations can use */
void * data;
@ -273,6 +267,19 @@ struct pixman_iter_t
int stride;
};
typedef struct pixman_iter_info_t pixman_iter_info_t;
typedef void (* pixman_iter_initializer_t) (pixman_iter_t *iter,
const pixman_iter_info_t *info);
struct pixman_iter_info_t
{
pixman_format_code_t format;
uint32_t image_flags;
iter_flags_t iter_flags;
pixman_iter_initializer_t initializer;
pixman_iter_get_scanline_t get_scanline;
pixman_iter_write_back_t write_back;
};
void
_pixman_bits_image_setup_accessors (bits_image_t *image);
@ -337,13 +344,12 @@ _pixman_image_validate (pixman_image_t *image);
*/
typedef struct
{
uint32_t left_ag;
uint32_t left_rb;
uint32_t right_ag;
uint32_t right_rb;
pixman_fixed_t left_x;
pixman_fixed_t right_x;
pixman_fixed_t stepper;
float a_s, a_b;
float r_s, r_b;
float g_s, g_b;
float b_s, b_b;
pixman_fixed_48_16_t left_x;
pixman_fixed_48_16_t right_x;
pixman_gradient_stop_t *stops;
int num_stops;
@ -361,9 +367,38 @@ void
_pixman_gradient_walker_reset (pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t pos);
uint32_t
_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t x);
typedef void (*pixman_gradient_walker_write_t) (
pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t x,
uint32_t *buffer);
void
_pixman_gradient_walker_write_narrow(pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t x,
uint32_t *buffer);
void
_pixman_gradient_walker_write_wide(pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t x,
uint32_t *buffer);
typedef void (*pixman_gradient_walker_fill_t) (
pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t x,
uint32_t *buffer,
uint32_t *end);
void
_pixman_gradient_walker_fill_narrow(pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t x,
uint32_t *buffer,
uint32_t *end);
void
_pixman_gradient_walker_fill_wide(pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t x,
uint32_t *buffer,
uint32_t *end);
/*
* Edges
@ -473,10 +508,7 @@ typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp,
int width,
int height,
uint32_t filler);
typedef pixman_bool_t (*pixman_iter_init_func_t) (pixman_implementation_t *imp,
pixman_iter_t *iter);
void _pixman_setup_combiner_functions_16 (pixman_implementation_t *imp);
void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp);
void _pixman_setup_combiner_functions_float (pixman_implementation_t *imp);
@ -497,14 +529,11 @@ struct pixman_implementation_t
pixman_implementation_t * toplevel;
pixman_implementation_t * fallback;
const pixman_fast_path_t * fast_paths;
const pixman_iter_info_t * iter_info;
pixman_blt_func_t blt;
pixman_fill_func_t fill;
pixman_iter_init_func_t src_iter_init;
pixman_iter_init_func_t dest_iter_init;
pixman_combine_32_func_t combine_16[PIXMAN_N_OPERATORS];
pixman_combine_32_func_t combine_16_ca[PIXMAN_N_OPERATORS];
pixman_combine_32_func_t combine_32[PIXMAN_N_OPERATORS];
pixman_combine_32_func_t combine_32_ca[PIXMAN_N_OPERATORS];
pixman_combine_float_func_t combine_float[PIXMAN_N_OPERATORS];
@ -536,8 +565,7 @@ pixman_combine_32_func_t
_pixman_implementation_lookup_combiner (pixman_implementation_t *imp,
pixman_op_t op,
pixman_bool_t component_alpha,
pixman_bool_t wide,
pixman_bool_t rgb16);
pixman_bool_t wide);
pixman_bool_t
_pixman_implementation_blt (pixman_implementation_t *imp,
@ -565,29 +593,17 @@ _pixman_implementation_fill (pixman_implementation_t *imp,
int height,
uint32_t filler);
pixman_bool_t
_pixman_implementation_src_iter_init (pixman_implementation_t *imp,
pixman_iter_t *iter,
pixman_image_t *image,
int x,
int y,
int width,
int height,
uint8_t *buffer,
iter_flags_t flags,
uint32_t image_flags);
pixman_bool_t
_pixman_implementation_dest_iter_init (pixman_implementation_t *imp,
pixman_iter_t *iter,
pixman_image_t *image,
int x,
int y,
int width,
int height,
uint8_t *buffer,
iter_flags_t flags,
uint32_t image_flags);
void
_pixman_implementation_iter_init (pixman_implementation_t *imp,
pixman_iter_t *iter,
pixman_image_t *image,
int x,
int y,
int width,
int height,
uint8_t *buffer,
iter_flags_t flags,
uint32_t image_flags);
/* Specific implementations */
pixman_implementation_t *
@ -609,6 +625,11 @@ pixman_implementation_t *
_pixman_implementation_create_sse2 (pixman_implementation_t *fallback);
#endif
#ifdef USE_SSSE3
pixman_implementation_t *
_pixman_implementation_create_ssse3 (pixman_implementation_t *fallback);
#endif
#ifdef USE_ARM_SIMD
pixman_implementation_t *
_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback);
@ -670,6 +691,9 @@ _pixman_compute_composite_region32 (pixman_region32_t * region,
uint32_t *
_pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask);
void
_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info);
/* These "formats" all have depth 0, so they
* will never clash with any real ones
*/
@ -709,7 +733,6 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask);
#define FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR (1 << 24)
#define FAST_PATH_BITS_IMAGE (1 << 25)
#define FAST_PATH_SEPARABLE_CONVOLUTION_FILTER (1 << 26)
#define FAST_PATH_16_FORMAT (1 << 27)
#define FAST_PATH_PAD_REPEAT \
(FAST_PATH_NO_NONE_REPEAT | \
@ -801,6 +824,9 @@ pixman_malloc_ab (unsigned int n, unsigned int b);
void *
pixman_malloc_abc (unsigned int a, unsigned int b, unsigned int c);
void *
pixman_malloc_ab_plus_c (unsigned int a, unsigned int b, unsigned int c);
pixman_bool_t
_pixman_multiply_overflows_size (size_t a, size_t b);
@ -904,6 +930,8 @@ pixman_list_move_to_front (pixman_list_t *list, pixman_link_t *link)
#define CLIP(v, low, high) ((v) < (low) ? (low) : ((v) > (high) ? (high) : (v)))
#define FLOAT_IS_ZERO(f) (-FLT_MIN < (f) && (f) < FLT_MIN)
/* Conversion between 8888 and 0565 */
static force_inline uint16_t
@ -1039,15 +1067,13 @@ float pixman_unorm_to_float (uint16_t u, int n_bits);
#endif
#ifdef DEBUG
void
_pixman_log_error (const char *function, const char *message);
#define return_if_fail(expr) \
do \
{ \
if (!(expr)) \
if (unlikely (!(expr))) \
{ \
_pixman_log_error (FUNC, "The expression " # expr " was false"); \
return; \
@ -1058,7 +1084,7 @@ _pixman_log_error (const char *function, const char *message);
#define return_val_if_fail(expr, retval) \
do \
{ \
if (!(expr)) \
if (unlikely (!(expr))) \
{ \
_pixman_log_error (FUNC, "The expression " # expr " was false"); \
return (retval); \
@ -1069,55 +1095,30 @@ _pixman_log_error (const char *function, const char *message);
#define critical_if_fail(expr) \
do \
{ \
if (!(expr)) \
if (unlikely (!(expr))) \
_pixman_log_error (FUNC, "The expression " # expr " was false"); \
} \
while (0)
#else
#define _pixman_log_error(f,m) do { } while (0)
#define return_if_fail(expr) \
do \
{ \
if (!(expr)) \
return; \
} \
while (0)
#define return_val_if_fail(expr, retval) \
do \
{ \
if (!(expr)) \
return (retval); \
} \
while (0)
#define critical_if_fail(expr) \
do \
{ \
} \
while (0)
#endif
/*
* Matrix
*/
typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t;
PIXMAN_EXPORT
pixman_bool_t
pixman_transform_point_31_16 (const pixman_transform_t *t,
const pixman_vector_48_16_t *v,
pixman_vector_48_16_t *result);
PIXMAN_EXPORT
void
pixman_transform_point_31_16_3d (const pixman_transform_t *t,
const pixman_vector_48_16_t *v,
pixman_vector_48_16_t *result);
PIXMAN_EXPORT
void
pixman_transform_point_31_16_affine (const pixman_transform_t *t,
const pixman_vector_48_16_t *v,

Просмотреть файл

@ -34,8 +34,6 @@
#include <math.h>
#include "pixman-private.h"
#include "pixman-dither.h"
static inline pixman_fixed_32_32_t
dot (pixman_fixed_48_16_t x1,
pixman_fixed_48_16_t y1,
@ -68,15 +66,18 @@ fdot (double x1,
return x1 * x2 + y1 * y2 + z1 * z2;
}
static uint32_t
radial_compute_color (double a,
double b,
double c,
double inva,
double dr,
double mindr,
pixman_gradient_walker_t *walker,
pixman_repeat_t repeat)
static void
radial_write_color (double a,
double b,
double c,
double inva,
double dr,
double mindr,
pixman_gradient_walker_t *walker,
pixman_repeat_t repeat,
int Bpp,
pixman_gradient_walker_write_t write_pixel,
uint32_t *buffer)
{
/*
* In this function error propagation can lead to bad results:
@ -101,21 +102,31 @@ radial_compute_color (double a,
double t;
if (b == 0)
return 0;
{
memset (buffer, 0, Bpp);
return;
}
t = pixman_fixed_1 / 2 * c / b;
if (repeat == PIXMAN_REPEAT_NONE)
{
if (0 <= t && t <= pixman_fixed_1)
return _pixman_gradient_walker_pixel (walker, t);
{
write_pixel (walker, t, buffer);
return;
}
}
else
{
if (t * dr >= mindr)
return _pixman_gradient_walker_pixel (walker, t);
{
write_pixel (walker, t, buffer);
return;
}
}
return 0;
memset (buffer, 0, Bpp);
return;
}
discr = fdot (b, a, 0, b, -c, 0);
@ -141,24 +152,40 @@ radial_compute_color (double a,
if (repeat == PIXMAN_REPEAT_NONE)
{
if (0 <= t0 && t0 <= pixman_fixed_1)
return _pixman_gradient_walker_pixel (walker, t0);
{
write_pixel (walker, t0, buffer);
return;
}
else if (0 <= t1 && t1 <= pixman_fixed_1)
return _pixman_gradient_walker_pixel (walker, t1);
{
write_pixel (walker, t1, buffer);
return;
}
}
else
{
if (t0 * dr >= mindr)
return _pixman_gradient_walker_pixel (walker, t0);
{
write_pixel (walker, t0, buffer);
return;
}
else if (t1 * dr >= mindr)
return _pixman_gradient_walker_pixel (walker, t1);
{
write_pixel (walker, t1, buffer);
return;
}
}
}
return 0;
memset (buffer, 0, Bpp);
return;
}
static uint32_t *
radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
radial_get_scanline (pixman_iter_t *iter,
const uint32_t *mask,
int Bpp,
pixman_gradient_walker_write_t write_pixel)
{
/*
* Implementation of radial gradients following the PDF specification.
@ -249,7 +276,7 @@ radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
gradient_t *gradient = (gradient_t *)image;
radial_gradient_t *radial = (radial_gradient_t *)image;
uint32_t *end = buffer + width;
uint32_t *end = buffer + width * (Bpp / 4);
pixman_gradient_walker_t walker;
pixman_vector_t v, unit;
@ -332,18 +359,21 @@ radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
{
if (!mask || *mask++)
{
*buffer = radial_compute_color (radial->a, b, c,
radial->inva,
radial->delta.radius,
radial->mindr,
&walker,
image->common.repeat);
radial_write_color (radial->a, b, c,
radial->inva,
radial->delta.radius,
radial->mindr,
&walker,
image->common.repeat,
Bpp,
write_pixel,
buffer);
}
b += db;
c += dc;
dc += ddc;
++buffer;
buffer += (Bpp / 4);
}
}
else
@ -377,20 +407,23 @@ radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
pdx, pdy, radial->c1.radius);
/* / pixman_fixed_1 / pixman_fixed_1 */
*buffer = radial_compute_color (radial->a, b, c,
radial->inva,
radial->delta.radius,
radial->mindr,
&walker,
image->common.repeat);
radial_write_color (radial->a, b, c,
radial->inva,
radial->delta.radius,
radial->mindr,
&walker,
image->common.repeat,
Bpp,
write_pixel,
buffer);
}
else
{
*buffer = 0;
memset (buffer, 0, Bpp);
}
}
++buffer;
buffer += (Bpp / 4);
v.vector[0] += unit.vector[0];
v.vector[1] += unit.vector[1];
@ -403,286 +436,35 @@ radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
}
static uint32_t *
radial_get_scanline_16 (pixman_iter_t *iter, const uint32_t *mask)
radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
{
/*
* Implementation of radial gradients following the PDF specification.
* See section 8.7.4.5.4 Type 3 (Radial) Shadings of the PDF Reference
* Manual (PDF 32000-1:2008 at the time of this writing).
*
* In the radial gradient problem we are given two circles (c,r) and
* (c,r) that define the gradient itself.
*
* Mathematically the gradient can be defined as the family of circles
*
* ((1-t)·c + t·(c), (1-t)·r + t·r)
*
* excluding those circles whose radius would be < 0. When a point
* belongs to more than one circle, the one with a bigger t is the only
* one that contributes to its color. When a point does not belong
* to any of the circles, it is transparent black, i.e. RGBA (0, 0, 0, 0).
* Further limitations on the range of values for t are imposed when
* the gradient is not repeated, namely t must belong to [0,1].
*
* The graphical result is the same as drawing the valid (radius > 0)
* circles with increasing t in [-inf, +inf] (or in [0,1] if the gradient
* is not repeated) using SOURCE operator composition.
*
* It looks like a cone pointing towards the viewer if the ending circle
* is smaller than the starting one, a cone pointing inside the page if
* the starting circle is the smaller one and like a cylinder if they
* have the same radius.
*
* What we actually do is, given the point whose color we are interested
* in, compute the t values for that point, solving for t in:
*
* length((1-t)·c + t·(c) - p) = (1-t)·r + t·r
*
* Let's rewrite it in a simpler way, by defining some auxiliary
* variables:
*
* cd = c - c
* pd = p - c
* dr = r - r
* length(t·cd - pd) = r + t·dr
*
* which actually means
*
* hypot(t·cdx - pdx, t·cdy - pdy) = r + t·dr
*
* or
*
* ((t·cdx - pdx)² + (t·cdy - pdy)²) = r + t·dr.
*
* If we impose (as stated earlier) that r + t·dr >= 0, it becomes:
*
* (t·cdx - pdx)² + (t·cdy - pdy)² = (r + t·dr)²
*
* where we can actually expand the squares and solve for t:
*
* t²cdx² - 2t·cdx·pdx + pdx² + t²cdy² - 2t·cdy·pdy + pdy² =
* = r² + 2·r·t·dr + t²·dr²
*
* (cdx² + cdy² - dr²)t² - 2(cdx·pdx + cdy·pdy + r·dr)t +
* (pdx² + pdy² - r²) = 0
*
* A = cdx² + cdy² - dr²
* B = pdx·cdx + pdy·cdy + r·dr
* C = pdx² + pdy² - r²
* At² - 2Bt + C = 0
*
* The solutions (unless the equation degenerates because of A = 0) are:
*
* t = (B ± (B² - A·C)) / A
*
* The solution we are going to prefer is the bigger one, unless the
* radius associated to it is negative (or it falls outside the valid t
* range).
*
* Additional observations (useful for optimizations):
* A does not depend on p
*
* A < 0 <=> one of the two circles completely contains the other one
* <=> for every p, the radiuses associated with the two t solutions
* have opposite sign
*/
pixman_image_t *image = iter->image;
int x = iter->x;
int y = iter->y;
int width = iter->width;
uint16_t *buffer = iter->buffer;
pixman_bool_t toggle = ((x ^ y) & 1);
gradient_t *gradient = (gradient_t *)image;
radial_gradient_t *radial = (radial_gradient_t *)image;
uint16_t *end = buffer + width;
pixman_gradient_walker_t walker;
pixman_vector_t v, unit;
/* reference point is the center of the pixel */
v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
v.vector[2] = pixman_fixed_1;
_pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
if (image->common.transform)
{
if (!pixman_transform_point_3d (image->common.transform, &v))
return iter->buffer;
unit.vector[0] = image->common.transform->matrix[0][0];
unit.vector[1] = image->common.transform->matrix[1][0];
unit.vector[2] = image->common.transform->matrix[2][0];
}
else
{
unit.vector[0] = pixman_fixed_1;
unit.vector[1] = 0;
unit.vector[2] = 0;
}
if (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1)
{
/*
* Given:
*
* t = (B ± (B² - A·C)) / A
*
* where
*
* A = cdx² + cdy² - dr²
* B = pdx·cdx + pdy·cdy + r·dr
* C = pdx² + pdy² - r²
* det = B² - A·C
*
* Since we have an affine transformation, we know that (pdx, pdy)
* increase linearly with each pixel,
*
* pdx = pdx + n·ux,
* pdy = pdy + n·uy,
*
* we can then express B, C and det through multiple differentiation.
*/
pixman_fixed_32_32_t b, db, c, dc, ddc;
/* warning: this computation may overflow */
v.vector[0] -= radial->c1.x;
v.vector[1] -= radial->c1.y;
/*
* B and C are computed and updated exactly.
* If fdot was used instead of dot, in the worst case it would
* lose 11 bits of precision in each of the multiplication and
* summing up would zero out all the bit that were preserved,
* thus making the result 0 instead of the correct one.
* This would mean a worst case of unbound relative error or
* about 2^10 absolute error
*/
b = dot (v.vector[0], v.vector[1], radial->c1.radius,
radial->delta.x, radial->delta.y, radial->delta.radius);
db = dot (unit.vector[0], unit.vector[1], 0,
radial->delta.x, radial->delta.y, 0);
c = dot (v.vector[0], v.vector[1],
-((pixman_fixed_48_16_t) radial->c1.radius),
v.vector[0], v.vector[1], radial->c1.radius);
dc = dot (2 * (pixman_fixed_48_16_t) v.vector[0] + unit.vector[0],
2 * (pixman_fixed_48_16_t) v.vector[1] + unit.vector[1],
0,
unit.vector[0], unit.vector[1], 0);
ddc = 2 * dot (unit.vector[0], unit.vector[1], 0,
unit.vector[0], unit.vector[1], 0);
while (buffer < end)
{
if (!mask || *mask++)
{
*buffer = dither_8888_to_0565(
radial_compute_color (radial->a, b, c,
radial->inva,
radial->delta.radius,
radial->mindr,
&walker,
image->common.repeat),
toggle);
}
toggle ^= 1;
b += db;
c += dc;
dc += ddc;
++buffer;
}
}
else
{
/* projective */
/* Warning:
* error propagation guarantees are much looser than in the affine case
*/
while (buffer < end)
{
if (!mask || *mask++)
{
if (v.vector[2] != 0)
{
double pdx, pdy, invv2, b, c;
invv2 = 1. * pixman_fixed_1 / v.vector[2];
pdx = v.vector[0] * invv2 - radial->c1.x;
/* / pixman_fixed_1 */
pdy = v.vector[1] * invv2 - radial->c1.y;
/* / pixman_fixed_1 */
b = fdot (pdx, pdy, radial->c1.radius,
radial->delta.x, radial->delta.y,
radial->delta.radius);
/* / pixman_fixed_1 / pixman_fixed_1 */
c = fdot (pdx, pdy, -radial->c1.radius,
pdx, pdy, radial->c1.radius);
/* / pixman_fixed_1 / pixman_fixed_1 */
*buffer = dither_8888_to_0565 (
radial_compute_color (radial->a, b, c,
radial->inva,
radial->delta.radius,
radial->mindr,
&walker,
image->common.repeat),
toggle);
}
else
{
*buffer = 0;
}
}
++buffer;
toggle ^= 1;
v.vector[0] += unit.vector[0];
v.vector[1] += unit.vector[1];
v.vector[2] += unit.vector[2];
}
}
iter->y++;
return iter->buffer;
return radial_get_scanline (iter, mask, 4,
_pixman_gradient_walker_write_narrow);
}
static uint32_t *
radial_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
{
uint32_t *buffer = radial_get_scanline_narrow (iter, NULL);
pixman_expand_to_float (
(argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
return buffer;
return radial_get_scanline (iter, NULL, 16,
_pixman_gradient_walker_write_wide);
}
void
_pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter)
{
if (iter->iter_flags & ITER_16)
iter->get_scanline = radial_get_scanline_16;
else if (iter->iter_flags & ITER_NARROW)
if (iter->iter_flags & ITER_NARROW)
iter->get_scanline = radial_get_scanline_narrow;
else
iter->get_scanline = radial_get_scanline_wide;
}
PIXMAN_EXPORT pixman_image_t *
pixman_image_create_radial_gradient (const pixman_point_fixed_t * inner,
const pixman_point_fixed_t * outer,
pixman_fixed_t inner_radius,
pixman_fixed_t outer_radius,
const pixman_gradient_stop_t *stops,
int n_stops)
const pixman_point_fixed_t * outer,
pixman_fixed_t inner_radius,
pixman_fixed_t outer_radius,
const pixman_gradient_stop_t *stops,
int n_stops)
{
pixman_image_t *image;
radial_gradient_t *radial;

Просмотреть файл

@ -298,13 +298,6 @@ PREFIX (_equal) (region_type_t *reg1, region_type_t *reg2)
box_type_t *rects1;
box_type_t *rects2;
/*
* If the region is empty the extents are undefined so we need to check
* for empty before comparing the extents.
*/
if (PIXREGION_NIL (reg1) && PIXREGION_NIL(reg2))
return TRUE;
if (reg1->extents.x1 != reg2->extents.x1)
return FALSE;
@ -1341,15 +1334,6 @@ PREFIX(_intersect_rect) (region_type_t *dest,
region.extents.x2 = x + width;
region.extents.y2 = y + height;
if (!GOOD_RECT (&region.extents))
{
if (BAD_RECT (&region.extents))
_pixman_log_error (FUNC, "Invalid rectangle passed");
FREE_DATA (dest);
PREFIX (_init) (dest);
return TRUE;
}
return PREFIX(_intersect) (dest, source, &region);
}
@ -1874,7 +1858,7 @@ pixman_region_subtract_o (region_type_t * region,
else if (r2->x1 <= x1)
{
/*
* Subtrahend preceeds minuend: nuke left edge of minuend.
* Subtrahend precedes minuend: nuke left edge of minuend.
*/
x1 = r2->x2;
if (x1 >= r1->x2)
@ -1998,7 +1982,7 @@ PREFIX (_subtract) (region_type_t *reg_d,
}
/* Add those rectangles in region 1 that aren't in region 2,
do yucky substraction for overlaps, and
do yucky subtraction for overlaps, and
just throw away rectangles in region 2 that aren't in region 1 */
if (!pixman_op (reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE))
return FALSE;
@ -2058,7 +2042,7 @@ PREFIX (_inverse) (region_type_t *new_reg, /* Destination region */
}
/* Add those rectangles in region 1 that aren't in region 2,
* do yucky substraction for overlaps, and
* do yucky subtraction for overlaps, and
* just throw away rectangles in region 2 that aren't in region 1
*/
inv_reg.extents = *inv_rect;

Просмотреть файл

@ -30,10 +30,10 @@ static uint32_t
color_to_uint32 (const pixman_color_t *color)
{
return
(color->alpha >> 8 << 24) |
(color->red >> 8 << 16) |
(color->green & 0xff00) |
(color->blue >> 8);
((unsigned int) color->alpha >> 8 << 24) |
((unsigned int) color->red >> 8 << 16) |
((unsigned int) color->green & 0xff00) |
((unsigned int) color->blue >> 8);
}
static argb_t

Просмотреть файл

@ -30,6 +30,9 @@
#include <config.h>
#endif
/* PSHUFD is slow on a lot of old processors, and new processors have SSSE3 */
#define PSHUFD_IS_FAST 0
#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
#include <emmintrin.h> /* for SSE2 intrinsics */
#include "pixman-private.h"
@ -515,7 +518,8 @@ core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst)
static force_inline uint32_t
combine1 (const uint32_t *ps, const uint32_t *pm)
{
uint32_t s = *ps;
uint32_t s;
memcpy(&s, ps, sizeof(uint32_t));
if (pm)
{
@ -3253,7 +3257,7 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
while (w >= 4)
{
m = *((uint32_t*)mask);
memcpy(&m, mask, sizeof(uint32_t));
if (srca == 0xff && m == 0xffffffff)
{
@ -3330,8 +3334,8 @@ sse2_fill (pixman_implementation_t *imp,
if (bpp == 8)
{
uint8_t b;
uint16_t w;
uint32_t b;
uint32_t w;
stride = stride * (int) sizeof (uint32_t) / 1;
byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
@ -3525,7 +3529,7 @@ sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
while (w >= 4)
{
m = *((uint32_t*)mask);
memcpy(&m, mask, sizeof(uint32_t));
if (srca == 0xff && m == 0xffffffff)
{
@ -3647,7 +3651,7 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
unpack_565_128_4x128 (xmm_dst,
&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
m = *((uint32_t*)mask);
memcpy(&m, mask, sizeof(uint32_t));
mask += 4;
if (m)
@ -3667,7 +3671,7 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
&xmm_dst0, &xmm_dst1);
}
m = *((uint32_t*)mask);
memcpy(&m, mask, sizeof(uint32_t));
mask += 4;
if (m)
@ -4558,7 +4562,7 @@ sse2_composite_add_n_8888 (pixman_implementation_t *imp,
dst = dst_line;
dst_line += dst_stride;
while (w && (unsigned long)dst & 15)
while (w && (uintptr_t)dst & 15)
{
d = *dst;
*dst++ =
@ -4617,7 +4621,7 @@ sse2_composite_add_n_8_8888 (pixman_implementation_t *imp,
mask_line += mask_stride;
w = width;
while (w && ((unsigned long)dst & 15))
while (w && ((uintptr_t)dst & 15))
{
uint8_t m = *mask++;
if (m)
@ -4633,7 +4637,9 @@ sse2_composite_add_n_8_8888 (pixman_implementation_t *imp,
while (w >= 4)
{
uint32_t m = *(uint32_t*)mask;
uint32_t m;
memcpy(&m, mask, sizeof(uint32_t));
if (m)
{
__m128i xmm_mask_lo, xmm_mask_hi;
@ -4740,7 +4746,7 @@ sse2_blt (pixman_implementation_t *imp,
while (w >= 2 && ((uintptr_t)d & 3))
{
*(uint16_t *)d = *(uint16_t *)s;
memmove(d, s, 2);
w -= 2;
s += 2;
d += 2;
@ -4748,7 +4754,7 @@ sse2_blt (pixman_implementation_t *imp,
while (w >= 4 && ((uintptr_t)d & 15))
{
*(uint32_t *)d = *(uint32_t *)s;
memmove(d, s, 4);
w -= 4;
s += 4;
@ -4785,7 +4791,7 @@ sse2_blt (pixman_implementation_t *imp,
while (w >= 4)
{
*(uint32_t *)d = *(uint32_t *)s;
memmove(d, s, 4);
w -= 4;
s += 4;
@ -4794,7 +4800,7 @@ sse2_blt (pixman_implementation_t *imp,
if (w >= 2)
{
*(uint16_t *)d = *(uint16_t *)s;
memmove(d, s, 2);
w -= 2;
s += 2;
d += 2;
@ -4856,7 +4862,7 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
while (w && (uintptr_t)dst & 15)
{
s = 0xff000000 | *src++;
m = (uint32_t) *mask++;
memcpy(&m, mask++, sizeof(uint32_t));
d = *dst;
ms = unpack_32_1x128 (s);
@ -4874,7 +4880,7 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
while (w >= 4)
{
m = *(uint32_t*) mask;
memcpy(&m, mask, sizeof(uint32_t));
xmm_src = _mm_or_si128 (
load_128_unaligned ((__m128i*)src), mask_ff000000);
@ -4910,7 +4916,7 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
while (w)
{
m = (uint32_t) *mask++;
memcpy(&m, mask++, sizeof(uint32_t));
if (m)
{
@ -5013,7 +5019,7 @@ sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
while (w >= 4)
{
m = *(uint32_t *) mask;
memcpy(&m, mask, sizeof(uint32_t));
if (m)
{
@ -5554,69 +5560,134 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,
scaled_nearest_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE)
#define BMSK ((1 << BILINEAR_INTERPOLATION_BITS) - 1)
#if PSHUFD_IS_FAST
#define BILINEAR_DECLARE_VARIABLES \
/***********************************************************************************/
# define BILINEAR_DECLARE_VARIABLES \
const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \
const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \
const __m128i xmm_xorc8 = _mm_set_epi16 (0, 0, 0, 0, BMSK, BMSK, BMSK, BMSK);\
const __m128i xmm_addc8 = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \
const __m128i xmm_xorc7 = _mm_set_epi16 (0, BMSK, 0, BMSK, 0, BMSK, 0, BMSK);\
const __m128i xmm_addc7 = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \
const __m128i xmm_ux = _mm_set_epi16 (unit_x, unit_x, unit_x, unit_x, \
unit_x, unit_x, unit_x, unit_x); \
const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \
const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \
unit_x, -unit_x, unit_x, -unit_x); \
const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, \
unit_x * 4, -unit_x * 4, \
unit_x * 4, -unit_x * 4, \
unit_x * 4, -unit_x * 4); \
const __m128i xmm_zero = _mm_setzero_si128 (); \
__m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx, vx, vx, vx, vx)
__m128i xmm_x = _mm_set_epi16 (vx + unit_x * 3, -(vx + 1) - unit_x * 3, \
vx + unit_x * 2, -(vx + 1) - unit_x * 2, \
vx + unit_x * 1, -(vx + 1) - unit_x * 1, \
vx + unit_x * 0, -(vx + 1) - unit_x * 0); \
__m128i xmm_wh_state;
#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \
#define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase_) \
do { \
__m128i xmm_wh, xmm_lo, xmm_hi, a; \
int phase = phase_; \
__m128i xmm_wh, xmm_a, xmm_b; \
/* fetch 2x2 pixel block into sse2 registers */ \
__m128i tltr = _mm_loadl_epi64 ( \
(__m128i *)&src_top[pixman_fixed_to_int (vx)]); \
__m128i blbr = _mm_loadl_epi64 ( \
(__m128i *)&src_bottom[pixman_fixed_to_int (vx)]); \
__m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); \
__m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); \
vx += unit_x; \
/* vertical interpolation */ \
a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), \
xmm_wt), \
_mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), \
xmm_wb)); \
if (BILINEAR_INTERPOLATION_BITS < 8) \
xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); \
xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); \
xmm_a = _mm_add_epi16 (xmm_a, xmm_b); \
/* calculate horizontal weights */ \
if (phase <= 0) \
{ \
/* calculate horizontal weights */ \
xmm_wh = _mm_add_epi16 (xmm_addc7, _mm_xor_si128 (xmm_xorc7, \
_mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \
xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
/* horizontal interpolation */ \
a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \
a, _MM_SHUFFLE (1, 0, 3, 2)), a), xmm_wh); \
xmm_wh_state = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \
16 - BILINEAR_INTERPOLATION_BITS)); \
xmm_x = _mm_add_epi16 (xmm_x, (phase < 0) ? xmm_ux1 : xmm_ux4); \
phase = 0; \
} \
else \
{ \
/* calculate horizontal weights */ \
xmm_wh = _mm_add_epi16 (xmm_addc8, _mm_xor_si128 (xmm_xorc8, \
_mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \
xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
/* horizontal interpolation */ \
xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \
xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \
a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \
_mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \
} \
/* shift and pack the result */ \
a = _mm_srli_epi32 (a, BILINEAR_INTERPOLATION_BITS * 2); \
a = _mm_packs_epi32 (a, a); \
a = _mm_packus_epi16 (a, a); \
pix = _mm_cvtsi128_si32 (a); \
xmm_wh = _mm_shuffle_epi32 (xmm_wh_state, _MM_SHUFFLE (phase, phase, \
phase, phase)); \
/* horizontal interpolation */ \
xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \
xmm_a, _MM_SHUFFLE (1, 0, 3, 2)), xmm_a), xmm_wh); \
/* shift the result */ \
pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS * 2); \
} while (0)
#else /************************************************************************/
# define BILINEAR_DECLARE_VARIABLES \
const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \
const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \
const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \
const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \
unit_x, -unit_x, unit_x, -unit_x); \
const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, \
unit_x * 4, -unit_x * 4, \
unit_x * 4, -unit_x * 4, \
unit_x * 4, -unit_x * 4); \
const __m128i xmm_zero = _mm_setzero_si128 (); \
__m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), \
vx, -(vx + 1), vx, -(vx + 1))
#define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase) \
do { \
__m128i xmm_wh, xmm_a, xmm_b; \
/* fetch 2x2 pixel block into sse2 registers */ \
__m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); \
__m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); \
(void)xmm_ux4; /* suppress warning: unused variable 'xmm_ux4' */ \
vx += unit_x; \
/* vertical interpolation */ \
xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); \
xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); \
xmm_a = _mm_add_epi16 (xmm_a, xmm_b); \
/* calculate horizontal weights */ \
xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \
16 - BILINEAR_INTERPOLATION_BITS)); \
xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); \
/* horizontal interpolation */ \
xmm_b = _mm_unpacklo_epi64 (/* any value is fine here */ xmm_b, xmm_a); \
xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); \
/* shift the result */ \
pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS * 2); \
} while (0)
/***********************************************************************************/
#endif
#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix); \
do { \
__m128i xmm_pix; \
BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix, -1); \
xmm_pix = _mm_packs_epi32 (xmm_pix, xmm_pix); \
xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); \
pix = _mm_cvtsi128_si32 (xmm_pix); \
} while(0)
#define BILINEAR_INTERPOLATE_FOUR_PIXELS(pix); \
do { \
__m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; \
BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix1, 0); \
BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix2, 1); \
BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix3, 2); \
BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix4, 3); \
xmm_pix1 = _mm_packs_epi32 (xmm_pix1, xmm_pix2); \
xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); \
pix = _mm_packus_epi16 (xmm_pix1, xmm_pix3); \
} while(0)
#define BILINEAR_SKIP_ONE_PIXEL() \
do { \
vx += unit_x; \
xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); \
} while(0)
#define BILINEAR_SKIP_FOUR_PIXELS() \
do { \
vx += unit_x * 4; \
xmm_x = _mm_add_epi16 (xmm_x, xmm_ux4); \
} while(0)
/***********************************************************************************/
static force_inline void
scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst,
const uint32_t * mask,
@ -5625,24 +5696,28 @@ scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst,
int32_t w,
int wt,
int wb,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
pixman_fixed_t vx_,
pixman_fixed_t unit_x_,
pixman_fixed_t max_vx,
pixman_bool_t zero_src)
{
intptr_t vx = vx_;
intptr_t unit_x = unit_x_;
BILINEAR_DECLARE_VARIABLES;
uint32_t pix1, pix2, pix3, pix4;
uint32_t pix1, pix2;
while ((w -= 4) >= 0)
while (w && ((uintptr_t)dst & 15))
{
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
*dst++ = pix1;
*dst++ = pix2;
*dst++ = pix3;
*dst++ = pix4;
w--;
}
while ((w -= 4) >= 0) {
__m128i xmm_src;
BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src);
_mm_store_si128 ((__m128i *)dst, xmm_src);
dst += 4;
}
if (w & 2)
@ -5661,23 +5736,20 @@ scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst,
}
/* Add extra NULL argument to the existing bilinear fast paths to indicate
* that we don't need two-pass processing */
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,
scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL,
scaled_bilinear_scanline_sse2_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
COVER, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC,
scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL,
scaled_bilinear_scanline_sse2_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
PAD, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC,
scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL,
scaled_bilinear_scanline_sse2_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
NONE, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC,
scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL,
scaled_bilinear_scanline_sse2_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
NORMAL, FLAG_NONE)
@ -5697,7 +5769,7 @@ scaled_bilinear_scanline_sse2_x888_8888_SRC (uint32_t * dst,
intptr_t vx = vx_;
intptr_t unit_x = unit_x_;
BILINEAR_DECLARE_VARIABLES;
uint32_t pix1, pix2, pix3, pix4;
uint32_t pix1, pix2;
while (w && ((uintptr_t)dst & 15))
{
@ -5708,14 +5780,9 @@ scaled_bilinear_scanline_sse2_x888_8888_SRC (uint32_t * dst,
while ((w -= 4) >= 0) {
__m128i xmm_src;
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
_mm_store_si128 ((__m128i *)dst, _mm_or_si128 (xmm_src, mask_ff000000));
dst += 4;
BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src);
_mm_store_si128 ((__m128i *)dst, _mm_or_si128 (xmm_src, mask_ff000000));
dst += 4;
}
if (w & 2)
@ -5734,23 +5801,18 @@ scaled_bilinear_scanline_sse2_x888_8888_SRC (uint32_t * dst,
}
FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_cover_SRC,
scaled_bilinear_scanline_sse2_x888_8888_SRC, NULL,
scaled_bilinear_scanline_sse2_x888_8888_SRC,
uint32_t, uint32_t, uint32_t,
COVER, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_pad_SRC,
scaled_bilinear_scanline_sse2_x888_8888_SRC, NULL,
scaled_bilinear_scanline_sse2_x888_8888_SRC,
uint32_t, uint32_t, uint32_t,
PAD, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_normal_SRC,
scaled_bilinear_scanline_sse2_x888_8888_SRC, NULL,
scaled_bilinear_scanline_sse2_x888_8888_SRC,
uint32_t, uint32_t, uint32_t,
NORMAL, FLAG_NONE)
#if 0
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER,
scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL,
uint32_t, uint32_t, uint32_t,
PAD, FLAG_NONE)
#endif
static force_inline void
scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst,
const uint32_t * mask,
@ -5759,13 +5821,15 @@ scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst,
int32_t w,
int wt,
int wb,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
pixman_fixed_t vx_,
pixman_fixed_t unit_x_,
pixman_fixed_t max_vx,
pixman_bool_t zero_src)
{
intptr_t vx = vx_;
intptr_t unit_x = unit_x_;
BILINEAR_DECLARE_VARIABLES;
uint32_t pix1, pix2, pix3, pix4;
uint32_t pix1, pix2;
while (w && ((uintptr_t)dst & 15))
{
@ -5787,12 +5851,7 @@ scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst,
__m128i xmm_src_hi, xmm_src_lo, xmm_dst_hi, xmm_dst_lo;
__m128i xmm_alpha_hi, xmm_alpha_lo;
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src);
if (!is_zero (xmm_src))
{
@ -5835,56 +5894,22 @@ scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst,
}
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_OVER,
scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL,
scaled_bilinear_scanline_sse2_8888_8888_OVER,
uint32_t, uint32_t, uint32_t,
COVER, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER,
scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL,
scaled_bilinear_scanline_sse2_8888_8888_OVER,
uint32_t, uint32_t, uint32_t,
PAD, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_OVER,
scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL,
scaled_bilinear_scanline_sse2_8888_8888_OVER,
uint32_t, uint32_t, uint32_t,
NONE, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_OVER,
scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL,
scaled_bilinear_scanline_sse2_8888_8888_OVER,
uint32_t, uint32_t, uint32_t,
NORMAL, FLAG_NONE)
/* An example of SSE2 two-stage bilinear_over_8888_0565 fast path, which is implemented
as scaled_bilinear_scanline_sse2_8888_8888_SRC + op_bilinear_over_8888_0565 */
void op_bilinear_over_8888_0565(uint16_t *dst, const uint32_t *mask, const uint32_t *src, int width)
{
/* Note: this is not really fast and should be based on 8 pixel loop from sse2_composite_over_8888_0565 */
while (--width >= 0)
{
*dst = composite_over_8888_0565pixel (*src, *dst);
src++;
dst++;
}
}
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_cover_OVER,
scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565,
uint32_t, uint32_t, uint16_t,
COVER, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_pad_OVER,
scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565,
uint32_t, uint32_t, uint16_t,
PAD, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_none_OVER,
scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565,
uint32_t, uint32_t, uint16_t,
NONE, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_normal_OVER,
scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565,
uint32_t, uint32_t, uint16_t,
NORMAL, FLAG_NONE)
/*****************************/
static force_inline void
scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst,
const uint8_t * mask,
@ -5893,13 +5918,15 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst,
int32_t w,
int wt,
int wb,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
pixman_fixed_t vx_,
pixman_fixed_t unit_x_,
pixman_fixed_t max_vx,
pixman_bool_t zero_src)
{
intptr_t vx = vx_;
intptr_t unit_x = unit_x_;
BILINEAR_DECLARE_VARIABLES;
uint32_t pix1, pix2, pix3, pix4;
uint32_t pix1, pix2;
uint32_t m;
while (w && ((uintptr_t)dst & 15))
@ -5946,16 +5973,11 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst,
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
m = *(uint32_t*)mask;
memcpy(&m, mask, sizeof(uint32_t));
if (m)
{
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src);
if (m == 0xffffffff && is_opaque (xmm_src))
{
@ -5982,10 +6004,7 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst,
}
else
{
BILINEAR_SKIP_ONE_PIXEL ();
BILINEAR_SKIP_ONE_PIXEL ();
BILINEAR_SKIP_ONE_PIXEL ();
BILINEAR_SKIP_ONE_PIXEL ();
BILINEAR_SKIP_FOUR_PIXELS ();
}
w -= 4;
@ -6033,19 +6052,19 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst,
}
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_cover_OVER,
scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL,
scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
uint32_t, uint8_t, uint32_t,
COVER, FLAG_HAVE_NON_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_pad_OVER,
scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL,
scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
uint32_t, uint8_t, uint32_t,
PAD, FLAG_HAVE_NON_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_none_OVER,
scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL,
scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
uint32_t, uint8_t, uint32_t,
NONE, FLAG_HAVE_NON_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER,
scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL,
scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
uint32_t, uint8_t, uint32_t,
NORMAL, FLAG_HAVE_NON_SOLID_MASK)
@ -6057,13 +6076,15 @@ scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst,
int32_t w,
int wt,
int wb,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
pixman_fixed_t vx_,
pixman_fixed_t unit_x_,
pixman_fixed_t max_vx,
pixman_bool_t zero_src)
{
intptr_t vx = vx_;
intptr_t unit_x = unit_x_;
BILINEAR_DECLARE_VARIABLES;
uint32_t pix1, pix2, pix3, pix4;
uint32_t pix1;
__m128i xmm_mask;
if (zero_src || (*mask >> 24) == 0)
@ -6093,19 +6114,15 @@ scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst,
while (w >= 4)
{
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
__m128i xmm_src;
BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src);
if (pix1 | pix2 | pix3 | pix4)
if (!is_zero (xmm_src))
{
__m128i xmm_src, xmm_src_lo, xmm_src_hi;
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
__m128i xmm_alpha_lo, xmm_alpha_hi;
xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
xmm_dst = load_128_aligned ((__m128i*)dst);
unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
@ -6148,19 +6165,19 @@ scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst,
}
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
scaled_bilinear_scanline_sse2_8888_n_8888_OVER, NULL,
scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t,
COVER, FLAG_HAVE_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,
scaled_bilinear_scanline_sse2_8888_n_8888_OVER, NULL,
scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t,
PAD, FLAG_HAVE_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
scaled_bilinear_scanline_sse2_8888_n_8888_OVER, NULL,
scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t,
NONE, FLAG_HAVE_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,
scaled_bilinear_scanline_sse2_8888_n_8888_OVER, NULL,
scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t,
NORMAL, FLAG_HAVE_SOLID_MASK)
@ -6260,31 +6277,15 @@ static const pixman_fast_path_t sse2_fast_paths[] =
PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8),
PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8),
SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
@ -6315,11 +6316,6 @@ static const pixman_fast_path_t sse2_fast_paths[] =
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888),
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8_8888),
/* and here the needed entries are added to the fast path table */
SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, sse2_8888_0565),
SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, b5g6r5, sse2_8888_0565),
{ PIXMAN_OP_NONE },
};
@ -6451,52 +6447,23 @@ sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
return iter->buffer;
}
typedef struct
{
pixman_format_code_t format;
pixman_iter_get_scanline_t get_scanline;
} fetcher_info_t;
static const fetcher_info_t fetchers[] =
{
{ PIXMAN_x8r8g8b8, sse2_fetch_x8r8g8b8 },
{ PIXMAN_r5g6b5, sse2_fetch_r5g6b5 },
{ PIXMAN_a8, sse2_fetch_a8 },
{ PIXMAN_null }
};
static pixman_bool_t
sse2_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
{
pixman_image_t *image = iter->image;
#define FLAGS \
#define IMAGE_FLAGS \
(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
if ((iter->iter_flags & ITER_NARROW) &&
(iter->image_flags & FLAGS) == FLAGS)
{
const fetcher_info_t *f;
for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
{
if (image->common.extended_format_code == f->format)
{
uint8_t *b = (uint8_t *)image->bits.bits;
int s = image->bits.rowstride * 4;
iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
iter->stride = s;
iter->get_scanline = f->get_scanline;
return TRUE;
}
}
}
return FALSE;
}
static const pixman_iter_info_t sse2_iters[] =
{
{ PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,
_pixman_iter_init_bits_stride, sse2_fetch_x8r8g8b8, NULL
},
{ PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW,
_pixman_iter_init_bits_stride, sse2_fetch_r5g6b5, NULL
},
{ PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,
_pixman_iter_init_bits_stride, sse2_fetch_a8, NULL
},
{ PIXMAN_null },
};
#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
__attribute__((__force_align_arg_pointer__))
@ -6554,7 +6521,7 @@ _pixman_implementation_create_sse2 (pixman_implementation_t *fallback)
imp->blt = sse2_blt;
imp->fill = sse2_fill;
imp->src_iter_init = sse2_src_iter_init;
imp->iter_info = sse2_iters;
return imp;
}

Просмотреть файл

@ -0,0 +1,351 @@
/*
* Copyright © 2013 Soren Sandmann Pedersen
* Copyright © 2013 Red Hat, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Author: Soren Sandmann (soren.sandmann@gmail.com)
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stdlib.h>
#include <mmintrin.h>
#include <xmmintrin.h>
#include <emmintrin.h>
#include <tmmintrin.h>
#include "pixman-private.h"
#include "pixman-inlines.h"
typedef struct
{
int y;
uint64_t * buffer;
} line_t;
typedef struct
{
line_t lines[2];
pixman_fixed_t y;
pixman_fixed_t x;
uint64_t data[1];
} bilinear_info_t;
static void
ssse3_fetch_horizontal (bits_image_t *image, line_t *line,
int y, pixman_fixed_t x, pixman_fixed_t ux, int n)
{
uint32_t *bits = image->bits + y * image->rowstride;
__m128i vx = _mm_set_epi16 (
- (x + 1), x, - (x + 1), x,
- (x + ux + 1), x + ux, - (x + ux + 1), x + ux);
__m128i vux = _mm_set_epi16 (
- 2 * ux, 2 * ux, - 2 * ux, 2 * ux,
- 2 * ux, 2 * ux, - 2 * ux, 2 * ux);
__m128i vaddc = _mm_set_epi16 (1, 0, 1, 0, 1, 0, 1, 0);
__m128i *b = (__m128i *)line->buffer;
__m128i vrl0, vrl1;
while ((n -= 2) >= 0)
{
__m128i vw, vr, s;
vrl1 = _mm_loadl_epi64 (
(__m128i *)(bits + pixman_fixed_to_int (x + ux)));
/* vrl1: R1, L1 */
final_pixel:
vrl0 = _mm_loadl_epi64 (
(__m128i *)(bits + pixman_fixed_to_int (x)));
/* vrl0: R0, L0 */
/* The weights are based on vx which is a vector of
*
* - (x + 1), x, - (x + 1), x,
* - (x + ux + 1), x + ux, - (x + ux + 1), x + ux
*
* so the 16 bit weights end up like this:
*
* iw0, w0, iw0, w0, iw1, w1, iw1, w1
*
* and after shifting and packing, we get these bytes:
*
* iw0, w0, iw0, w0, iw1, w1, iw1, w1,
* iw0, w0, iw0, w0, iw1, w1, iw1, w1,
*
* which means the first and the second input pixel
* have to be interleaved like this:
*
* la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
* lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
*
* before maddubsw can be used.
*/
vw = _mm_add_epi16 (
vaddc, _mm_srli_epi16 (vx, 16 - BILINEAR_INTERPOLATION_BITS));
/* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1
*/
vw = _mm_packus_epi16 (vw, vw);
/* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1,
* iw0, w0, iw0, w0, iw1, w1, iw1, w1
*/
vx = _mm_add_epi16 (vx, vux);
x += 2 * ux;
vr = _mm_unpacklo_epi16 (vrl1, vrl0);
/* vr: rar0, rar1, rgb0, rgb1, lar0, lar1, lgb0, lgb1 */
s = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (1, 0, 3, 2));
/* s: lar0, lar1, lgb0, lgb1, rar0, rar1, rgb0, rgb1 */
vr = _mm_unpackhi_epi8 (vr, s);
/* vr: la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
* lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
*/
vr = _mm_maddubs_epi16 (vr, vw);
/* When the weight is 0, the inverse weight is
* 128 which can't be represented in a signed byte.
* As a result maddubsw computes the following:
*
* r = l * -128 + r * 0
*
* rather than the desired
*
* r = l * 128 + r * 0
*
* We fix this by taking the absolute value of the
* result.
*/
vr = _mm_abs_epi16 (vr);
/* vr: A0, R0, A1, R1, G0, B0, G1, B1 */
_mm_store_si128 (b++, vr);
}
if (n == -1)
{
vrl1 = _mm_setzero_si128();
goto final_pixel;
}
line->y = y;
}
static uint32_t *
ssse3_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask)
{
pixman_fixed_t fx, ux;
bilinear_info_t *info = iter->data;
line_t *line0, *line1;
int y0, y1;
int32_t dist_y;
__m128i vw;
int i;
fx = info->x;
ux = iter->image->common.transform->matrix[0][0];
y0 = pixman_fixed_to_int (info->y);
y1 = y0 + 1;
line0 = &info->lines[y0 & 0x01];
line1 = &info->lines[y1 & 0x01];
if (line0->y != y0)
{
ssse3_fetch_horizontal (
&iter->image->bits, line0, y0, fx, ux, iter->width);
}
if (line1->y != y1)
{
ssse3_fetch_horizontal (
&iter->image->bits, line1, y1, fx, ux, iter->width);
}
dist_y = pixman_fixed_to_bilinear_weight (info->y);
dist_y <<= (16 - BILINEAR_INTERPOLATION_BITS);
vw = _mm_set_epi16 (
dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y);
for (i = 0; i + 3 < iter->width; i += 4)
{
__m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i));
__m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i));
__m128i top1 = _mm_load_si128 ((__m128i *)(line0->buffer + i + 2));
__m128i bot1 = _mm_load_si128 ((__m128i *)(line1->buffer + i + 2));
__m128i r0, r1, tmp, p;
r0 = _mm_mulhi_epu16 (
_mm_sub_epi16 (bot0, top0), vw);
tmp = _mm_cmplt_epi16 (bot0, top0);
tmp = _mm_and_si128 (tmp, vw);
r0 = _mm_sub_epi16 (r0, tmp);
r0 = _mm_add_epi16 (r0, top0);
r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS);
/* r0: A0 R0 A1 R1 G0 B0 G1 B1 */
r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1));
/* r0: A1 R1 G1 B1 A0 R0 G0 B0 */
r1 = _mm_mulhi_epu16 (
_mm_sub_epi16 (bot1, top1), vw);
tmp = _mm_cmplt_epi16 (bot1, top1);
tmp = _mm_and_si128 (tmp, vw);
r1 = _mm_sub_epi16 (r1, tmp);
r1 = _mm_add_epi16 (r1, top1);
r1 = _mm_srli_epi16 (r1, BILINEAR_INTERPOLATION_BITS);
r1 = _mm_shuffle_epi32 (r1, _MM_SHUFFLE (2, 0, 3, 1));
/* r1: A3 R3 G3 B3 A2 R2 G2 B2 */
p = _mm_packus_epi16 (r0, r1);
_mm_storeu_si128 ((__m128i *)(iter->buffer + i), p);
}
while (i < iter->width)
{
__m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i));
__m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i));
__m128i r0, tmp, p;
r0 = _mm_mulhi_epu16 (
_mm_sub_epi16 (bot0, top0), vw);
tmp = _mm_cmplt_epi16 (bot0, top0);
tmp = _mm_and_si128 (tmp, vw);
r0 = _mm_sub_epi16 (r0, tmp);
r0 = _mm_add_epi16 (r0, top0);
r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS);
/* r0: A0 R0 A1 R1 G0 B0 G1 B1 */
r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1));
/* r0: A1 R1 G1 B1 A0 R0 G0 B0 */
p = _mm_packus_epi16 (r0, r0);
if (iter->width - i == 1)
{
*(uint32_t *)(iter->buffer + i) = _mm_cvtsi128_si32 (p);
i++;
}
else
{
_mm_storel_epi64 ((__m128i *)(iter->buffer + i), p);
i += 2;
}
}
info->y += iter->image->common.transform->matrix[1][1];
return iter->buffer;
}
static void
ssse3_bilinear_cover_iter_fini (pixman_iter_t *iter)
{
free (iter->data);
}
static void
ssse3_bilinear_cover_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *iter_info)
{
int width = iter->width;
bilinear_info_t *info;
pixman_vector_t v;
/* Reference point is the center of the pixel */
v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2;
v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2;
v.vector[2] = pixman_fixed_1;
if (!pixman_transform_point_3d (iter->image->common.transform, &v))
goto fail;
info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t) + 64);
if (!info)
goto fail;
info->x = v.vector[0] - pixman_fixed_1 / 2;
info->y = v.vector[1] - pixman_fixed_1 / 2;
#define ALIGN(addr) \
((void *)((((uintptr_t)(addr)) + 15) & (~15)))
/* It is safe to set the y coordinates to -1 initially
* because COVER_CLIP_BILINEAR ensures that we will only
* be asked to fetch lines in the [0, height) interval
*/
info->lines[0].y = -1;
info->lines[0].buffer = ALIGN (&(info->data[0]));
info->lines[1].y = -1;
info->lines[1].buffer = ALIGN (info->lines[0].buffer + width);
iter->get_scanline = ssse3_fetch_bilinear_cover;
iter->fini = ssse3_bilinear_cover_iter_fini;
iter->data = info;
return;
fail:
/* Something went wrong, either a bad matrix or OOM; in such cases,
* we don't guarantee any particular rendering.
*/
_pixman_log_error (
FUNC, "Allocation failure or bad matrix, skipping rendering\n");
iter->get_scanline = _pixman_iter_get_scanline_noop;
iter->fini = NULL;
}
static const pixman_iter_info_t ssse3_iters[] =
{
{ PIXMAN_a8r8g8b8,
(FAST_PATH_STANDARD_FLAGS |
FAST_PATH_SCALE_TRANSFORM |
FAST_PATH_BILINEAR_FILTER |
FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR),
ITER_NARROW | ITER_SRC,
ssse3_bilinear_cover_iter_init,
NULL, NULL
},
{ PIXMAN_null },
};
static const pixman_fast_path_t ssse3_fast_paths[] =
{
{ PIXMAN_OP_NONE },
};
pixman_implementation_t *
_pixman_implementation_create_ssse3 (pixman_implementation_t *fallback)
{
pixman_implementation_t *imp =
_pixman_implementation_create (fallback, ssse3_fast_paths);
imp->iter_info = ssse3_iters;
return imp;
}

Просмотреть файл

@ -27,7 +27,6 @@
#endif
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include "pixman-private.h"
@ -49,6 +48,15 @@ _pixman_addition_overflows_int (unsigned int a, unsigned int b)
return a > INT32_MAX - b;
}
void *
pixman_malloc_ab_plus_c (unsigned int a, unsigned int b, unsigned int c)
{
if (!b || a >= INT32_MAX / b || (a * b) > INT32_MAX - c)
return NULL;
return malloc (a * b + c);
}
void *
pixman_malloc_ab (unsigned int a,
unsigned int b)
@ -198,7 +206,7 @@ pixman_contract_from_float (uint32_t *dst,
for (i = 0; i < width; ++i)
{
uint8_t a, r, g, b;
uint32_t a, r, g, b;
a = float_to_unorm (src[i].a, 8);
r = float_to_unorm (src[i].r, 8);
@ -215,6 +223,17 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask)
return iter->buffer;
}
void
_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info)
{
pixman_image_t *image = iter->image;
uint8_t *b = (uint8_t *)image->bits.bits;
int s = image->bits.rowstride * 4;
iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (info->format) / 8;
iter->stride = s;
}
#define N_TMP_BOXES (16)
pixman_bool_t
@ -293,8 +312,6 @@ _pixman_internal_only_get_implementation (void)
return get_implementation ();
}
#ifdef DEBUG
void
_pixman_log_error (const char *function, const char *message)
{
@ -311,5 +328,3 @@ _pixman_log_error (const char *function, const char *message)
n_messages++;
}
}
#endif

Просмотреть файл

@ -32,10 +32,10 @@
#endif
#define PIXMAN_VERSION_MAJOR 0
#define PIXMAN_VERSION_MINOR 27
#define PIXMAN_VERSION_MICRO 1
#define PIXMAN_VERSION_MINOR 40
#define PIXMAN_VERSION_MICRO 0
#define PIXMAN_VERSION_STRING "0.27.1"
#define PIXMAN_VERSION_STRING "0.40.0"
#define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \
((major) * 10000) \
@ -47,4 +47,8 @@
PIXMAN_VERSION_MINOR, \
PIXMAN_VERSION_MICRO)
#ifndef PIXMAN_API
# define PIXMAN_API
#endif
#endif /* PIXMAN_VERSION_H__ */

Просмотреть файл

@ -0,0 +1,54 @@
/*
* Copyright © 2008 Red Hat, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Author: Carl D. Worth <cworth@cworth.org>
*/
#ifndef PIXMAN_VERSION_H__
#define PIXMAN_VERSION_H__
#ifndef PIXMAN_H__
# error pixman-version.h should only be included by pixman.h
#endif
#define PIXMAN_VERSION_MAJOR @PIXMAN_VERSION_MAJOR@
#define PIXMAN_VERSION_MINOR @PIXMAN_VERSION_MINOR@
#define PIXMAN_VERSION_MICRO @PIXMAN_VERSION_MICRO@
#define PIXMAN_VERSION_STRING "@PIXMAN_VERSION_MAJOR@.@PIXMAN_VERSION_MINOR@.@PIXMAN_VERSION_MICRO@"
#define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \
((major) * 10000) \
+ ((minor) * 100) \
+ ((micro) * 1))
#define PIXMAN_VERSION PIXMAN_VERSION_ENCODE( \
PIXMAN_VERSION_MAJOR, \
PIXMAN_VERSION_MINOR, \
PIXMAN_VERSION_MICRO)
#ifndef PIXMAN_API
# define PIXMAN_API
#endif
#endif /* PIXMAN_VERSION_H__ */

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,263 +0,0 @@
#ifndef MMX_X64_H_INCLUDED
#define MMX_X64_H_INCLUDED
/* Implementation of x64 MMX substitition functions, before
* pixman is reimplemented not to use __m64 type on Visual C++
*
* Copyright (C)2009 by George Yohng
* Released in public domain.
*/
#include <intrin.h>
#define M64C(a) (*(const __m64 *)(&a))
#define M64U(a) (*(const unsigned long long *)(&a))
__inline __m64
_m_from_int (int a)
{
long long i64 = a;
return M64C (i64);
}
__inline __m64
_mm_setzero_si64 ()
{
long long i64 = 0;
return M64C (i64);
}
__inline __m64
_mm_set_pi32 (int i1, int i0)
{
unsigned long long i64 = ((unsigned)i0) + (((unsigned long long)(unsigned)i1) << 32);
return M64C (i64);
}
__inline void
_m_empty ()
{
}
__inline __m64
_mm_set1_pi16 (short w)
{
unsigned long long i64 = ((unsigned long long)(unsigned short)(w)) * 0x0001000100010001ULL;
return M64C (i64);
}
__inline int
_m_to_int (__m64 m)
{
return m.m64_i32[0];
}
__inline __m64
_mm_movepi64_pi64 (__m128i a)
{
return M64C (a.m128i_i64[0]);
}
__inline __m64
_m_pand (__m64 a, __m64 b)
{
unsigned long long i64 = M64U (a) & M64U (b);
return M64C (i64);
}
__inline __m64
_m_por (__m64 a, __m64 b)
{
unsigned long long i64 = M64U (a) | M64U (b);
return M64C (i64);
}
__inline __m64
_m_pxor (__m64 a, __m64 b)
{
unsigned long long i64 = M64U (a) ^ M64U (b);
return M64C (i64);
}
__inline __m64
_m_pmulhuw (__m64 a, __m64 b) /* unoptimized */
{
unsigned short d[4] =
{
(unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]) >> 16),
(unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]) >> 16),
(unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]) >> 16),
(unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]) >> 16)
};
return M64C (d[0]);
}
__inline __m64
_m_pmullw2 (__m64 a, __m64 b) /* unoptimized */
{
unsigned short d[4] =
{
(unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0])),
(unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1])),
(unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2])),
(unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))
};
return M64C (d[0]);
}
__inline __m64
_m_pmullw (__m64 a, __m64 b) /* unoptimized */
{
unsigned long long x =
((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]))) +
(((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]))) << 16) +
(((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]))) << 32) +
(((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))) << 48);
return M64C (x);
}
__inline __m64
_m_paddusb (__m64 a, __m64 b) /* unoptimized */
{
unsigned long long x = (M64U (a) & 0x00FF00FF00FF00FFULL) +
(M64U (b) & 0x00FF00FF00FF00FFULL);
unsigned long long y = ((M64U (a) >> 8) & 0x00FF00FF00FF00FFULL) +
((M64U (b) >> 8) & 0x00FF00FF00FF00FFULL);
x |= ((x & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF;
y |= ((y & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF;
x = (x & 0x00FF00FF00FF00FFULL) | ((y & 0x00FF00FF00FF00FFULL) << 8);
return M64C (x);
}
__inline __m64
_m_paddusw (__m64 a, __m64 b) /* unoptimized */
{
unsigned long long x = (M64U (a) & 0x0000FFFF0000FFFFULL) +
(M64U (b) & 0x0000FFFF0000FFFFULL);
unsigned long long y = ((M64U (a) >> 16) & 0x0000FFFF0000FFFFULL) +
((M64U (b) >> 16) & 0x0000FFFF0000FFFFULL);
x |= ((x & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF;
y |= ((y & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF;
x = (x & 0x0000FFFF0000FFFFULL) | ((y & 0x0000FFFF0000FFFFULL) << 16);
return M64C (x);
}
__inline __m64
_m_pshufw (__m64 a, int n) /* unoptimized */
{
unsigned short d[4] =
{
a.m64_u16[n & 3],
a.m64_u16[(n >> 2) & 3],
a.m64_u16[(n >> 4) & 3],
a.m64_u16[(n >> 6) & 3]
};
return M64C (d[0]);
}
__inline unsigned char
sat16 (unsigned short d)
{
if (d > 0xFF) return 0xFF;
else return d & 0xFF;
}
__inline __m64
_m_packuswb (__m64 m1, __m64 m2) /* unoptimized */
{
unsigned char d[8] =
{
sat16 (m1.m64_u16[0]),
sat16 (m1.m64_u16[1]),
sat16 (m1.m64_u16[2]),
sat16 (m1.m64_u16[3]),
sat16 (m2.m64_u16[0]),
sat16 (m2.m64_u16[1]),
sat16 (m2.m64_u16[2]),
sat16 (m2.m64_u16[3])
};
return M64C (d[0]);
}
__inline __m64 _m_punpcklbw (__m64 m1, __m64 m2) /* unoptimized */
{
unsigned char d[8] =
{
m1.m64_u8[0],
m2.m64_u8[0],
m1.m64_u8[1],
m2.m64_u8[1],
m1.m64_u8[2],
m2.m64_u8[2],
m1.m64_u8[3],
m2.m64_u8[3],
};
return M64C (d[0]);
}
__inline __m64 _m_punpckhbw (__m64 m1, __m64 m2) /* unoptimized */
{
unsigned char d[8] =
{
m1.m64_u8[4],
m2.m64_u8[4],
m1.m64_u8[5],
m2.m64_u8[5],
m1.m64_u8[6],
m2.m64_u8[6],
m1.m64_u8[7],
m2.m64_u8[7],
};
return M64C (d[0]);
}
__inline __m64 _m_psrlwi (__m64 a, int n) /* unoptimized */
{
unsigned short d[4] =
{
a.m64_u16[0] >> n,
a.m64_u16[1] >> n,
a.m64_u16[2] >> n,
a.m64_u16[3] >> n
};
return M64C (d[0]);
}
__inline __m64 _m_psrlqi (__m64 m, int n)
{
unsigned long long x = M64U (m) >> n;
return M64C (x);
}
__inline __m64 _m_psllqi (__m64 m, int n)
{
unsigned long long x = M64U (m) << n;
return M64C (x);
}
#endif /* MMX_X64_H_INCLUDED */

Просмотреть файл

@ -25,7 +25,7 @@
#include "pixman-private.h"
#if defined(USE_X86_MMX) || defined (USE_SSE2)
#if defined(USE_X86_MMX) || defined (USE_SSE2) || defined (USE_SSSE3)
/* The CPU detection code needs to be in a file not compiled with
* "-mmmx -msse", as gcc would generate CMOV instructions otherwise
@ -39,7 +39,8 @@ typedef enum
X86_MMX_EXTENSIONS = (1 << 1),
X86_SSE = (1 << 2) | X86_MMX_EXTENSIONS,
X86_SSE2 = (1 << 3),
X86_CMOV = (1 << 4)
X86_CMOV = (1 << 4),
X86_SSSE3 = (1 << 5)
} cpu_features_t;
#ifdef HAVE_GETISAX
@ -64,6 +65,8 @@ detect_cpu_features (void)
features |= X86_SSE;
if (result & AV_386_SSE2)
features |= X86_SSE2;
if (result & AV_386_SSSE3)
features |= X86_SSSE3;
}
return features;
@ -106,10 +109,6 @@ have_cpuid (void)
#endif
}
#ifdef _MSC_VER
#include <intrin.h> /* for __cpuid */
#endif
static void
pixman_cpuid (uint32_t feature,
uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d)
@ -171,6 +170,8 @@ detect_cpu_features (void)
features |= X86_SSE;
if (d & (1 << 26))
features |= X86_SSE2;
if (c & (1 << 9))
features |= X86_SSSE3;
/* Check for AMD specific features */
if ((features & X86_MMX) && !(features & X86_SSE))
@ -186,6 +187,7 @@ detect_cpu_features (void)
memcpy (vendor + 8, &c, 4);
if (strcmp (vendor, "AuthenticAMD") == 0 ||
strcmp (vendor, "HygonGenuine") == 0 ||
strcmp (vendor, "Geode by NSC") == 0)
{
pixman_cpuid (0x80000000, &a, &b, &c, &d);
@ -226,6 +228,7 @@ _pixman_x86_get_implementations (pixman_implementation_t *imp)
{
#define MMX_BITS (X86_MMX | X86_MMX_EXTENSIONS)
#define SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2)
#define SSSE3_BITS (X86_SSE | X86_SSE2 | X86_SSSE3)
#ifdef USE_X86_MMX
if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS))
@ -237,5 +240,10 @@ _pixman_x86_get_implementations (pixman_implementation_t *imp)
imp = _pixman_implementation_create_sse2 (imp);
#endif
#ifdef USE_SSSE3
if (!_pixman_disabled ("ssse3") && have_feature (SSSE3_BITS))
imp = _pixman_implementation_create_ssse3 (imp);
#endif
return imp;
}

Просмотреть файл

@ -325,18 +325,20 @@ _pixman_compute_composite_region32 (pixman_region32_t * region,
return TRUE;
}
typedef struct
typedef struct box_48_16 box_48_16_t;
struct box_48_16
{
pixman_fixed_48_16_t x1;
pixman_fixed_48_16_t y1;
pixman_fixed_48_16_t x2;
pixman_fixed_48_16_t y2;
} box_48_16_t;
pixman_fixed_48_16_t x1;
pixman_fixed_48_16_t y1;
pixman_fixed_48_16_t x2;
pixman_fixed_48_16_t y2;
};
static pixman_bool_t
compute_transformed_extents (pixman_transform_t *transform,
compute_transformed_extents (pixman_transform_t *transform,
const pixman_box32_t *extents,
box_48_16_t *transformed)
box_48_16_t *transformed)
{
pixman_fixed_48_16_t tx1, ty1, tx2, ty2;
pixman_fixed_t x1, y1, x2, y2;
@ -495,21 +497,12 @@ analyze_extent (pixman_image_t *image,
if (!compute_transformed_extents (transform, extents, &transformed))
return FALSE;
/* Expand the source area by a tiny bit so account of different rounding that
* may happen during sampling. Note that (8 * pixman_fixed_e) is very far from
* 0.5 so this won't cause the area computed to be overly pessimistic.
*/
transformed.x1 -= 8 * pixman_fixed_e;
transformed.y1 -= 8 * pixman_fixed_e;
transformed.x2 += 8 * pixman_fixed_e;
transformed.y2 += 8 * pixman_fixed_e;
if (image->common.type == BITS)
{
if (pixman_fixed_to_int (transformed.x1) >= 0 &&
pixman_fixed_to_int (transformed.y1) >= 0 &&
pixman_fixed_to_int (transformed.x2) < image->bits.width &&
pixman_fixed_to_int (transformed.y2) < image->bits.height)
if (pixman_fixed_to_int (transformed.x1 - pixman_fixed_e) >= 0 &&
pixman_fixed_to_int (transformed.y1 - pixman_fixed_e) >= 0 &&
pixman_fixed_to_int (transformed.x2 - pixman_fixed_e) < image->bits.width &&
pixman_fixed_to_int (transformed.y2 - pixman_fixed_e) < image->bits.height)
{
*flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
}
@ -605,7 +598,7 @@ pixman_image_composite32 (pixman_op_t op,
else
{
mask_format = PIXMAN_null;
info.mask_flags = FAST_PATH_IS_OPAQUE;
info.mask_flags = FAST_PATH_IS_OPAQUE | FAST_PATH_NO_ALPHA_MAP;
}
dest_format = dest->common.extended_format_code;
@ -784,6 +777,11 @@ color_to_pixel (const pixman_color_t *color,
{
uint32_t c = color_to_uint32 (color);
if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_RGBA_FLOAT)
{
return FALSE;
}
if (!(format == PIXMAN_a8r8g8b8 ||
format == PIXMAN_x8r8g8b8 ||
format == PIXMAN_a8b8g8r8 ||

Просмотреть файл

@ -69,11 +69,6 @@ SOFTWARE.
#ifndef PIXMAN_H__
#define PIXMAN_H__
#ifdef MOZILLA_VERSION
#include "cairo/pixman-rename.h"
#endif
#include <pixman-version.h>
#ifdef __cplusplus
@ -132,7 +127,7 @@ typedef pixman_fixed_16_16_t pixman_fixed_t;
#define pixman_fixed_1_minus_e (pixman_fixed_1 - pixman_fixed_e)
#define pixman_fixed_minus_1 (pixman_int_to_fixed(-1))
#define pixman_fixed_to_int(f) ((int) ((f) >> 16))
#define pixman_int_to_fixed(i) ((pixman_fixed_t) ((uint32_t)(i) << 16))
#define pixman_int_to_fixed(i) ((pixman_fixed_t) ((uint32_t) (i) << 16))
#define pixman_fixed_to_double(f) (double) ((f) / (double) pixman_fixed_1)
#define pixman_double_to_fixed(d) ((pixman_fixed_t) ((d) * 65536.0))
#define pixman_fixed_frac(f) ((f) & pixman_fixed_1_minus_e)
@ -189,42 +184,73 @@ struct pixman_transform
struct pixman_box16;
typedef union pixman_image pixman_image_t;
PIXMAN_API
void pixman_transform_init_identity (struct pixman_transform *matrix);
PIXMAN_API
pixman_bool_t pixman_transform_point_3d (const struct pixman_transform *transform,
struct pixman_vector *vector);
PIXMAN_API
pixman_bool_t pixman_transform_point (const struct pixman_transform *transform,
struct pixman_vector *vector);
PIXMAN_API
pixman_bool_t pixman_transform_multiply (struct pixman_transform *dst,
const struct pixman_transform *l,
const struct pixman_transform *r);
PIXMAN_API
void pixman_transform_init_scale (struct pixman_transform *t,
pixman_fixed_t sx,
pixman_fixed_t sy);
PIXMAN_API
pixman_bool_t pixman_transform_scale (struct pixman_transform *forward,
struct pixman_transform *reverse,
pixman_fixed_t sx,
pixman_fixed_t sy);
PIXMAN_API
void pixman_transform_init_rotate (struct pixman_transform *t,
pixman_fixed_t cos,
pixman_fixed_t sin);
PIXMAN_API
pixman_bool_t pixman_transform_rotate (struct pixman_transform *forward,
struct pixman_transform *reverse,
pixman_fixed_t c,
pixman_fixed_t s);
PIXMAN_API
void pixman_transform_init_translate (struct pixman_transform *t,
pixman_fixed_t tx,
pixman_fixed_t ty);
PIXMAN_API
pixman_bool_t pixman_transform_translate (struct pixman_transform *forward,
struct pixman_transform *reverse,
pixman_fixed_t tx,
pixman_fixed_t ty);
PIXMAN_API
pixman_bool_t pixman_transform_bounds (const struct pixman_transform *matrix,
struct pixman_box16 *b);
PIXMAN_API
pixman_bool_t pixman_transform_invert (struct pixman_transform *dst,
const struct pixman_transform *src);
PIXMAN_API
pixman_bool_t pixman_transform_is_identity (const struct pixman_transform *t);
PIXMAN_API
pixman_bool_t pixman_transform_is_scale (const struct pixman_transform *t);
PIXMAN_API
pixman_bool_t pixman_transform_is_int_translate (const struct pixman_transform *t);
PIXMAN_API
pixman_bool_t pixman_transform_is_inverse (const struct pixman_transform *a,
const struct pixman_transform *b);
@ -244,42 +270,70 @@ struct pixman_f_transform
double m[3][3];
};
PIXMAN_API
pixman_bool_t pixman_transform_from_pixman_f_transform (struct pixman_transform *t,
const struct pixman_f_transform *ft);
PIXMAN_API
void pixman_f_transform_from_pixman_transform (struct pixman_f_transform *ft,
const struct pixman_transform *t);
PIXMAN_API
pixman_bool_t pixman_f_transform_invert (struct pixman_f_transform *dst,
const struct pixman_f_transform *src);
PIXMAN_API
pixman_bool_t pixman_f_transform_point (const struct pixman_f_transform *t,
struct pixman_f_vector *v);
PIXMAN_API
void pixman_f_transform_point_3d (const struct pixman_f_transform *t,
struct pixman_f_vector *v);
PIXMAN_API
void pixman_f_transform_multiply (struct pixman_f_transform *dst,
const struct pixman_f_transform *l,
const struct pixman_f_transform *r);
PIXMAN_API
void pixman_f_transform_init_scale (struct pixman_f_transform *t,
double sx,
double sy);
PIXMAN_API
pixman_bool_t pixman_f_transform_scale (struct pixman_f_transform *forward,
struct pixman_f_transform *reverse,
double sx,
double sy);
PIXMAN_API
void pixman_f_transform_init_rotate (struct pixman_f_transform *t,
double cos,
double sin);
PIXMAN_API
pixman_bool_t pixman_f_transform_rotate (struct pixman_f_transform *forward,
struct pixman_f_transform *reverse,
double c,
double s);
PIXMAN_API
void pixman_f_transform_init_translate (struct pixman_f_transform *t,
double tx,
double ty);
PIXMAN_API
pixman_bool_t pixman_f_transform_translate (struct pixman_f_transform *forward,
struct pixman_f_transform *reverse,
double tx,
double ty);
PIXMAN_API
pixman_bool_t pixman_f_transform_bounds (const struct pixman_f_transform *t,
struct pixman_box16 *b);
PIXMAN_API
void pixman_f_transform_init_identity (struct pixman_f_transform *t);
typedef enum
@ -290,6 +344,16 @@ typedef enum
PIXMAN_REPEAT_REFLECT
} pixman_repeat_t;
typedef enum
{
PIXMAN_DITHER_NONE,
PIXMAN_DITHER_FAST,
PIXMAN_DITHER_GOOD,
PIXMAN_DITHER_BEST,
PIXMAN_DITHER_ORDERED_BAYER_8,
PIXMAN_DITHER_ORDERED_BLUE_NOISE_64,
} pixman_dither_t;
typedef enum
{
PIXMAN_FILTER_FAST,
@ -428,73 +492,120 @@ typedef enum
/* This function exists only to make it possible to preserve
* the X ABI - it should go away at first opportunity.
*/
PIXMAN_API
void pixman_region_set_static_pointers (pixman_box16_t *empty_box,
pixman_region16_data_t *empty_data,
pixman_region16_data_t *broken_data);
/* creation/destruction */
PIXMAN_API
void pixman_region_init (pixman_region16_t *region);
PIXMAN_API
void pixman_region_init_rect (pixman_region16_t *region,
int x,
int y,
unsigned int width,
unsigned int height);
PIXMAN_API
pixman_bool_t pixman_region_init_rects (pixman_region16_t *region,
const pixman_box16_t *boxes,
int count);
PIXMAN_API
void pixman_region_init_with_extents (pixman_region16_t *region,
pixman_box16_t *extents);
PIXMAN_API
void pixman_region_init_from_image (pixman_region16_t *region,
pixman_image_t *image);
PIXMAN_API
void pixman_region_fini (pixman_region16_t *region);
/* manipulation */
PIXMAN_API
void pixman_region_translate (pixman_region16_t *region,
int x,
int y);
PIXMAN_API
pixman_bool_t pixman_region_copy (pixman_region16_t *dest,
pixman_region16_t *source);
PIXMAN_API
pixman_bool_t pixman_region_intersect (pixman_region16_t *new_reg,
pixman_region16_t *reg1,
pixman_region16_t *reg2);
PIXMAN_API
pixman_bool_t pixman_region_union (pixman_region16_t *new_reg,
pixman_region16_t *reg1,
pixman_region16_t *reg2);
PIXMAN_API
pixman_bool_t pixman_region_union_rect (pixman_region16_t *dest,
pixman_region16_t *source,
int x,
int y,
unsigned int width,
unsigned int height);
PIXMAN_API
pixman_bool_t pixman_region_intersect_rect (pixman_region16_t *dest,
pixman_region16_t *source,
int x,
int y,
unsigned int width,
unsigned int height);
PIXMAN_API
pixman_bool_t pixman_region_subtract (pixman_region16_t *reg_d,
pixman_region16_t *reg_m,
pixman_region16_t *reg_s);
PIXMAN_API
pixman_bool_t pixman_region_inverse (pixman_region16_t *new_reg,
pixman_region16_t *reg1,
pixman_box16_t *inv_rect);
PIXMAN_API
pixman_bool_t pixman_region_contains_point (pixman_region16_t *region,
int x,
int y,
pixman_box16_t *box);
PIXMAN_API
pixman_region_overlap_t pixman_region_contains_rectangle (pixman_region16_t *region,
pixman_box16_t *prect);
PIXMAN_API
pixman_bool_t pixman_region_not_empty (pixman_region16_t *region);
PIXMAN_API
pixman_box16_t * pixman_region_extents (pixman_region16_t *region);
PIXMAN_API
int pixman_region_n_rects (pixman_region16_t *region);
PIXMAN_API
pixman_box16_t * pixman_region_rectangles (pixman_region16_t *region,
int *n_rects);
PIXMAN_API
pixman_bool_t pixman_region_equal (pixman_region16_t *region1,
pixman_region16_t *region2);
PIXMAN_API
pixman_bool_t pixman_region_selfcheck (pixman_region16_t *region);
PIXMAN_API
void pixman_region_reset (pixman_region16_t *region,
pixman_box16_t *box);
PIXMAN_API
void pixman_region_clear (pixman_region16_t *region);
/*
* 32 bit regions
@ -528,72 +639,119 @@ struct pixman_region32
};
/* creation/destruction */
PIXMAN_API
void pixman_region32_init (pixman_region32_t *region);
PIXMAN_API
void pixman_region32_init_rect (pixman_region32_t *region,
int x,
int y,
unsigned int width,
unsigned int height);
PIXMAN_API
pixman_bool_t pixman_region32_init_rects (pixman_region32_t *region,
const pixman_box32_t *boxes,
int count);
PIXMAN_API
void pixman_region32_init_with_extents (pixman_region32_t *region,
pixman_box32_t *extents);
PIXMAN_API
void pixman_region32_init_from_image (pixman_region32_t *region,
pixman_image_t *image);
PIXMAN_API
void pixman_region32_fini (pixman_region32_t *region);
/* manipulation */
PIXMAN_API
void pixman_region32_translate (pixman_region32_t *region,
int x,
int y);
PIXMAN_API
pixman_bool_t pixman_region32_copy (pixman_region32_t *dest,
pixman_region32_t *source);
PIXMAN_API
pixman_bool_t pixman_region32_intersect (pixman_region32_t *new_reg,
pixman_region32_t *reg1,
pixman_region32_t *reg2);
PIXMAN_API
pixman_bool_t pixman_region32_union (pixman_region32_t *new_reg,
pixman_region32_t *reg1,
pixman_region32_t *reg2);
PIXMAN_API
pixman_bool_t pixman_region32_intersect_rect (pixman_region32_t *dest,
pixman_region32_t *source,
int x,
int y,
unsigned int width,
unsigned int height);
PIXMAN_API
pixman_bool_t pixman_region32_union_rect (pixman_region32_t *dest,
pixman_region32_t *source,
int x,
int y,
unsigned int width,
unsigned int height);
PIXMAN_API
pixman_bool_t pixman_region32_subtract (pixman_region32_t *reg_d,
pixman_region32_t *reg_m,
pixman_region32_t *reg_s);
PIXMAN_API
pixman_bool_t pixman_region32_inverse (pixman_region32_t *new_reg,
pixman_region32_t *reg1,
pixman_box32_t *inv_rect);
PIXMAN_API
pixman_bool_t pixman_region32_contains_point (pixman_region32_t *region,
int x,
int y,
pixman_box32_t *box);
PIXMAN_API
pixman_region_overlap_t pixman_region32_contains_rectangle (pixman_region32_t *region,
pixman_box32_t *prect);
PIXMAN_API
pixman_bool_t pixman_region32_not_empty (pixman_region32_t *region);
PIXMAN_API
pixman_box32_t * pixman_region32_extents (pixman_region32_t *region);
PIXMAN_API
int pixman_region32_n_rects (pixman_region32_t *region);
PIXMAN_API
pixman_box32_t * pixman_region32_rectangles (pixman_region32_t *region,
int *n_rects);
PIXMAN_API
pixman_bool_t pixman_region32_equal (pixman_region32_t *region1,
pixman_region32_t *region2);
PIXMAN_API
pixman_bool_t pixman_region32_selfcheck (pixman_region32_t *region);
PIXMAN_API
void pixman_region32_reset (pixman_region32_t *region,
pixman_box32_t *box);
PIXMAN_API
void pixman_region32_clear (pixman_region32_t *region);
/* Copy / Fill / Misc */
PIXMAN_API
pixman_bool_t pixman_blt (uint32_t *src_bits,
uint32_t *dst_bits,
int src_stride,
@ -606,6 +764,8 @@ pixman_bool_t pixman_blt (uint32_t *src_bits,
int dest_y,
int width,
int height);
PIXMAN_API
pixman_bool_t pixman_fill (uint32_t *bits,
int stride,
int bpp,
@ -615,7 +775,11 @@ pixman_bool_t pixman_fill (uint32_t *bits,
int height,
uint32_t _xor);
PIXMAN_API
int pixman_version (void);
PIXMAN_API
const char* pixman_version_string (void);
/*
@ -659,12 +823,24 @@ struct pixman_indexed
((g) << 4) | \
((b)))
#define PIXMAN_FORMAT_BPP(f) (((f) >> 24) )
#define PIXMAN_FORMAT_TYPE(f) (((f) >> 16) & 0xff)
#define PIXMAN_FORMAT_A(f) (((f) >> 12) & 0x0f)
#define PIXMAN_FORMAT_R(f) (((f) >> 8) & 0x0f)
#define PIXMAN_FORMAT_G(f) (((f) >> 4) & 0x0f)
#define PIXMAN_FORMAT_B(f) (((f) ) & 0x0f)
#define PIXMAN_FORMAT_BYTE(bpp,type,a,r,g,b) \
(((bpp >> 3) << 24) | \
(3 << 22) | ((type) << 16) | \
((a >> 3) << 12) | \
((r >> 3) << 8) | \
((g >> 3) << 4) | \
((b >> 3)))
#define PIXMAN_FORMAT_RESHIFT(val, ofs, num) \
(((val >> (ofs)) & ((1 << (num)) - 1)) << ((val >> 22) & 3))
#define PIXMAN_FORMAT_BPP(f) PIXMAN_FORMAT_RESHIFT(f, 24, 8)
#define PIXMAN_FORMAT_SHIFT(f) ((uint32_t)((f >> 22) & 3))
#define PIXMAN_FORMAT_TYPE(f) (((f) >> 16) & 0x3f)
#define PIXMAN_FORMAT_A(f) PIXMAN_FORMAT_RESHIFT(f, 12, 4)
#define PIXMAN_FORMAT_R(f) PIXMAN_FORMAT_RESHIFT(f, 8, 4)
#define PIXMAN_FORMAT_G(f) PIXMAN_FORMAT_RESHIFT(f, 4, 4)
#define PIXMAN_FORMAT_B(f) PIXMAN_FORMAT_RESHIFT(f, 0, 4)
#define PIXMAN_FORMAT_RGB(f) (((f) ) & 0xfff)
#define PIXMAN_FORMAT_VIS(f) (((f) ) & 0xffff)
#define PIXMAN_FORMAT_DEPTH(f) (PIXMAN_FORMAT_A(f) + \
@ -683,15 +859,22 @@ struct pixman_indexed
#define PIXMAN_TYPE_BGRA 8
#define PIXMAN_TYPE_RGBA 9
#define PIXMAN_TYPE_ARGB_SRGB 10
#define PIXMAN_TYPE_RGBA_FLOAT 11
#define PIXMAN_FORMAT_COLOR(f) \
(PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ARGB || \
PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ABGR || \
PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_BGRA || \
PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_RGBA)
PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_RGBA || \
PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_RGBA_FLOAT)
typedef enum {
/* 128bpp formats */
PIXMAN_rgba_float = PIXMAN_FORMAT_BYTE(128,PIXMAN_TYPE_RGBA_FLOAT,32,32,32,32),
/* 96bpp formats */
PIXMAN_rgb_float = PIXMAN_FORMAT_BYTE(96,PIXMAN_TYPE_RGBA_FLOAT,0,32,32,32),
/* 32bpp formats */
typedef enum {
PIXMAN_a8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,8,8,8,8),
PIXMAN_x8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,8,8,8),
PIXMAN_a8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,8,8,8,8),
@ -762,30 +945,44 @@ typedef enum {
} pixman_format_code_t;
/* Querying supported format values. */
PIXMAN_API
pixman_bool_t pixman_format_supported_destination (pixman_format_code_t format);
PIXMAN_API
pixman_bool_t pixman_format_supported_source (pixman_format_code_t format);
/* Constructors */
PIXMAN_API
pixman_image_t *pixman_image_create_solid_fill (const pixman_color_t *color);
PIXMAN_API
pixman_image_t *pixman_image_create_linear_gradient (const pixman_point_fixed_t *p1,
const pixman_point_fixed_t *p2,
const pixman_gradient_stop_t *stops,
int n_stops);
PIXMAN_API
pixman_image_t *pixman_image_create_radial_gradient (const pixman_point_fixed_t *inner,
const pixman_point_fixed_t *outer,
pixman_fixed_t inner_radius,
pixman_fixed_t outer_radius,
const pixman_gradient_stop_t *stops,
int n_stops);
PIXMAN_API
pixman_image_t *pixman_image_create_conical_gradient (const pixman_point_fixed_t *center,
pixman_fixed_t angle,
const pixman_gradient_stop_t *stops,
int n_stops);
PIXMAN_API
pixman_image_t *pixman_image_create_bits (pixman_format_code_t format,
int width,
int height,
uint32_t *bits,
int rowstride_bytes);
PIXMAN_API
pixman_image_t *pixman_image_create_bits_no_clear (pixman_format_code_t format,
int width,
int height,
@ -793,48 +990,99 @@ pixman_image_t *pixman_image_create_bits_no_clear (pixman_format_code_t forma
int rowstride_bytes);
/* Destructor */
PIXMAN_API
pixman_image_t *pixman_image_ref (pixman_image_t *image);
PIXMAN_API
pixman_bool_t pixman_image_unref (pixman_image_t *image);
PIXMAN_API
void pixman_image_set_destroy_function (pixman_image_t *image,
pixman_image_destroy_func_t function,
void *data);
PIXMAN_API
void * pixman_image_get_destroy_data (pixman_image_t *image);
/* Set properties */
PIXMAN_API
pixman_bool_t pixman_image_set_clip_region (pixman_image_t *image,
pixman_region16_t *region);
PIXMAN_API
pixman_bool_t pixman_image_set_clip_region32 (pixman_image_t *image,
pixman_region32_t *region);
PIXMAN_API
void pixman_image_set_has_client_clip (pixman_image_t *image,
pixman_bool_t clien_clip);
PIXMAN_API
pixman_bool_t pixman_image_set_transform (pixman_image_t *image,
const pixman_transform_t *transform);
PIXMAN_API
void pixman_image_set_repeat (pixman_image_t *image,
pixman_repeat_t repeat);
PIXMAN_API
void pixman_image_set_dither (pixman_image_t *image,
pixman_dither_t dither);
PIXMAN_API
void pixman_image_set_dither_offset (pixman_image_t *image,
int offset_x,
int offset_y);
PIXMAN_API
pixman_bool_t pixman_image_set_filter (pixman_image_t *image,
pixman_filter_t filter,
const pixman_fixed_t *filter_params,
int n_filter_params);
PIXMAN_API
void pixman_image_set_source_clipping (pixman_image_t *image,
pixman_bool_t source_clipping);
PIXMAN_API
void pixman_image_set_alpha_map (pixman_image_t *image,
pixman_image_t *alpha_map,
int16_t x,
int16_t y);
PIXMAN_API
void pixman_image_set_component_alpha (pixman_image_t *image,
pixman_bool_t component_alpha);
PIXMAN_API
pixman_bool_t pixman_image_get_component_alpha (pixman_image_t *image);
PIXMAN_API
void pixman_image_set_accessors (pixman_image_t *image,
pixman_read_memory_func_t read_func,
pixman_write_memory_func_t write_func);
PIXMAN_API
void pixman_image_set_indexed (pixman_image_t *image,
const pixman_indexed_t *indexed);
PIXMAN_API
uint32_t *pixman_image_get_data (pixman_image_t *image);
PIXMAN_API
int pixman_image_get_width (pixman_image_t *image);
PIXMAN_API
int pixman_image_get_height (pixman_image_t *image);
PIXMAN_API
int pixman_image_get_stride (pixman_image_t *image); /* in bytes */
PIXMAN_API
int pixman_image_get_depth (pixman_image_t *image);
PIXMAN_API
pixman_format_code_t pixman_image_get_format (pixman_image_t *image);
typedef enum
@ -852,6 +1100,7 @@ typedef enum
/* Create the parameter list for a SEPARABLE_CONVOLUTION filter
* with the given kernels and scale parameters.
*/
PIXMAN_API
pixman_fixed_t *
pixman_filter_create_separable_convolution (int *n_values,
pixman_fixed_t scale_x,
@ -863,11 +1112,15 @@ pixman_filter_create_separable_convolution (int *n_values,
int subsample_bits_x,
int subsample_bits_y);
PIXMAN_API
pixman_bool_t pixman_image_fill_rectangles (pixman_op_t op,
pixman_image_t *image,
const pixman_color_t *color,
int n_rects,
const pixman_rectangle16_t *rects);
PIXMAN_API
pixman_bool_t pixman_image_fill_boxes (pixman_op_t op,
pixman_image_t *dest,
const pixman_color_t *color,
@ -875,6 +1128,7 @@ pixman_bool_t pixman_image_fill_boxes (pixman_op_t
const pixman_box32_t *boxes);
/* Composite */
PIXMAN_API
pixman_bool_t pixman_compute_composite_region (pixman_region16_t *region,
pixman_image_t *src_image,
pixman_image_t *mask_image,
@ -887,6 +1141,8 @@ pixman_bool_t pixman_compute_composite_region (pixman_region16_t *region,
int16_t dest_y,
uint16_t width,
uint16_t height);
PIXMAN_API
void pixman_image_composite (pixman_op_t op,
pixman_image_t *src,
pixman_image_t *mask,
@ -899,6 +1155,8 @@ void pixman_image_composite (pixman_op_t op,
int16_t dest_y,
uint16_t width,
uint16_t height);
PIXMAN_API
void pixman_image_composite32 (pixman_op_t op,
pixman_image_t *src,
pixman_image_t *mask,
@ -930,6 +1188,7 @@ void pixman_image_composite32 (pixman_op_t op,
* Since 0.21.2, pixman doesn't do these workarounds anymore, so now this
* function is a no-op.
*/
PIXMAN_API
void pixman_disable_out_of_bounds_workaround (void);
/*
@ -942,29 +1201,48 @@ typedef struct
const void *glyph;
} pixman_glyph_t;
PIXMAN_API
pixman_glyph_cache_t *pixman_glyph_cache_create (void);
PIXMAN_API
void pixman_glyph_cache_destroy (pixman_glyph_cache_t *cache);
PIXMAN_API
void pixman_glyph_cache_freeze (pixman_glyph_cache_t *cache);
PIXMAN_API
void pixman_glyph_cache_thaw (pixman_glyph_cache_t *cache);
PIXMAN_API
const void * pixman_glyph_cache_lookup (pixman_glyph_cache_t *cache,
void *font_key,
void *glyph_key);
PIXMAN_API
const void * pixman_glyph_cache_insert (pixman_glyph_cache_t *cache,
void *font_key,
void *glyph_key,
int origin_x,
int origin_y,
pixman_image_t *glyph_image);
PIXMAN_API
void pixman_glyph_cache_remove (pixman_glyph_cache_t *cache,
void *font_key,
void *glyph_key);
PIXMAN_API
void pixman_glyph_get_extents (pixman_glyph_cache_t *cache,
int n_glyphs,
pixman_glyph_t *glyphs,
pixman_box32_t *extents);
PIXMAN_API
pixman_format_code_t pixman_glyph_get_mask_format (pixman_glyph_cache_t *cache,
int n_glyphs,
const pixman_glyph_t *glyphs);
PIXMAN_API
void pixman_composite_glyphs (pixman_op_t op,
pixman_image_t *src,
pixman_image_t *dest,
@ -980,6 +1258,8 @@ void pixman_composite_glyphs (pixman_op_t op,
pixman_glyph_cache_t *cache,
int n_glyphs,
const pixman_glyph_t *glyphs);
PIXMAN_API
void pixman_composite_glyphs_no_mask (pixman_op_t op,
pixman_image_t *src,
pixman_image_t *dest,
@ -1035,7 +1315,7 @@ struct pixman_triangle
#define pixman_trapezoid_valid(t) \
((t)->left.p1.y != (t)->left.p2.y && \
(t)->right.p1.y != (t)->right.p2.y && \
(int) ((t)->bottom - (t)->top) > 0)
((t)->bottom > (t)->top))
struct pixman_span_fix
{
@ -1047,12 +1327,19 @@ struct pixman_trap
pixman_span_fix_t top, bot;
};
PIXMAN_API
pixman_fixed_t pixman_sample_ceil_y (pixman_fixed_t y,
int bpp);
PIXMAN_API
pixman_fixed_t pixman_sample_floor_y (pixman_fixed_t y,
int bpp);
PIXMAN_API
void pixman_edge_step (pixman_edge_t *e,
int n);
PIXMAN_API
void pixman_edge_init (pixman_edge_t *e,
int bpp,
pixman_fixed_t y_start,
@ -1060,31 +1347,43 @@ void pixman_edge_init (pixman_edge_t *e,
pixman_fixed_t y_top,
pixman_fixed_t x_bot,
pixman_fixed_t y_bot);
PIXMAN_API
void pixman_line_fixed_edge_init (pixman_edge_t *e,
int bpp,
pixman_fixed_t y,
const pixman_line_fixed_t *line,
int x_off,
int y_off);
PIXMAN_API
void pixman_rasterize_edges (pixman_image_t *image,
pixman_edge_t *l,
pixman_edge_t *r,
pixman_fixed_t t,
pixman_fixed_t b);
PIXMAN_API
void pixman_add_traps (pixman_image_t *image,
int16_t x_off,
int16_t y_off,
int ntrap,
const pixman_trap_t *traps);
PIXMAN_API
void pixman_add_trapezoids (pixman_image_t *image,
int16_t x_off,
int y_off,
int ntraps,
const pixman_trapezoid_t *traps);
PIXMAN_API
void pixman_rasterize_trapezoid (pixman_image_t *image,
const pixman_trapezoid_t *trap,
int x_off,
int y_off);
PIXMAN_API
void pixman_composite_trapezoids (pixman_op_t op,
pixman_image_t * src,
pixman_image_t * dst,
@ -1095,6 +1394,8 @@ void pixman_composite_trapezoids (pixman_op_t op,
int y_dst,
int n_traps,
const pixman_trapezoid_t * traps);
PIXMAN_API
void pixman_composite_triangles (pixman_op_t op,
pixman_image_t * src,
pixman_image_t * dst,
@ -1105,6 +1406,8 @@ void pixman_composite_triangles (pixman_op_t op,
int y_dst,
int n_tris,
const pixman_triangle_t * tris);
PIXMAN_API
void pixman_add_triangles (pixman_image_t *image,
int32_t x_off,
int32_t y_off,

Просмотреть файл

@ -1,478 +0,0 @@
Roadmap
- Move all the fetchers etc. into pixman-image to make pixman-compose.c
less intimidating.
DONE
- Make combiners for unified alpha take a mask argument. That way
we won't need two separate paths for unified vs component in the
general compositing code.
DONE, except that the Altivec code needs to be updated. Luca is
looking into that.
- Delete separate 'unified alpha' path
DONE
- Split images into their own files
DONE
- Split the gradient walker code out into its own file
DONE
- Add scanline getters per image
DONE
- Generic 64 bit fetcher
DONE
- Split fast path tables into their respective architecture dependent
files.
See "Render Algorithm" below for rationale
Images will eventually have these virtual functions:
get_scanline()
get_scanline_wide()
get_pixel()
get_pixel_wide()
get_untransformed_pixel()
get_untransformed_pixel_wide()
get_unfiltered_pixel()
get_unfiltered_pixel_wide()
store_scanline()
store_scanline_wide()
1.
Initially we will just have get_scanline() and get_scanline_wide();
these will be based on the ones in pixman-compose. Hopefully this will
reduce the complexity in pixman_composite_rect_general().
Note that there is access considerations - the compose function is
being compiled twice.
2.
Split image types into their own source files. Export noop virtual
reinit() call. Call this whenever a property of the image changes.
3.
Split the get_scanline() call into smaller functions that are
initialized by the reinit() call.
The Render Algorithm:
(first repeat, then filter, then transform, then clip)
Starting from a destination pixel (x, y), do
1 x = x - xDst + xSrc
y = y - yDst + ySrc
2 reject pixel that is outside the clip
This treats clipping as something that happens after
transformation, which I think is correct for client clips. For
hierarchy clips it is wrong, but who really cares? Without
GraphicsExposes hierarchy clips are basically irrelevant. Yes,
you could imagine cases where the pixels of a subwindow of a
redirected, transformed window should be treated as
transparent. I don't really care
Basically, I think the render spec should say that pixels that
are unavailable due to the hierarcy have undefined content,
and that GraphicsExposes are not generated. Ie., basically
that using non-redirected windows as sources is fail. This is
at least consistent with the current implementation and we can
update the spec later if someone makes it work.
The implication for render is that it should stop passing the
hierarchy clip to pixman. In pixman, if a souce image has a
clip it should be used in computing the composite region and
nowhere else, regardless of what "has_client_clip" says. The
default should be for there to not be any clip.
I would really like to get rid of the client clip as well for
source images, but unfortunately there is at least one
application in the wild that uses them.
3 Transform pixel: (x, y) = T(x, y)
4 Call p = GetUntransformedPixel (x, y)
5 If the image has an alpha map, then
Call GetUntransformedPixel (x, y) on the alpha map
add resulting alpha channel to p
return p
Where GetUnTransformedPixel is:
6 switch (filter)
{
case NEAREST:
return GetUnfilteredPixel (x, y);
break;
case BILINEAR:
return GetUnfilteredPixel (...) // 4 times
break;
case CONVOLUTION:
return GetUnfilteredPixel (...) // as many times as necessary.
break;
}
Where GetUnfilteredPixel (x, y) is
7 switch (repeat)
{
case REPEAT_NORMAL:
case REPEAT_PAD:
case REPEAT_REFLECT:
// adjust x, y as appropriate
break;
case REPEAT_NONE:
if (x, y) is outside image bounds
return 0;
break;
}
return GetRawPixel(x, y)
Where GetRawPixel (x, y) is
8 Compute the pixel in question, depending on image type.
For gradients, repeat has a totally different meaning, so
UnfilteredPixel() and RawPixel() must be the same function so that
gradients can do their own repeat algorithm.
So, the GetRawPixel
for bits must deal with repeats
for gradients must deal with repeats (differently)
for solids, should ignore repeats.
for polygons, when we add them, either ignore repeats or do
something similar to bits (in which case, we may want an extra
layer of indirection to modify the coordinates).
It is then possible to build things like "get scanline" or "get tile" on
top of this. In the simplest case, just repeatedly calling GetPixel()
would work, but specialized get_scanline()s or get_tile()s could be
plugged in for common cases.
By not plugging anything in for images with access functions, we only
have to compile the pixel functions twice, not the scanline functions.
And we can get rid of fetchers for the bizarre formats that no one
uses. Such as b2g3r3 etc. r1g2b1? Seriously? It is also worth
considering a generic format based pixel fetcher for these edge cases.
Since the actual routines depend on the image attributes, the images
must be notified when those change and update their function pointers
appropriately. So there should probably be a virtual function called
(* reinit) or something like that.
There will also be wide fetchers for both pixels and lines. The line
fetcher will just call the wide pixel fetcher. The wide pixel fetcher
will just call expand, except for 10 bit formats.
Rendering pipeline:
Drawable:
0. if (picture has alpha map)
0.1. Position alpha map according to the alpha_x/alpha_y
0.2. Where the two drawables intersect, the alpha channel
Replace the alpha channel of source with the one
from the alpha map. Replacement only takes place
in the intersection of the two drawables' geometries.
1. Repeat the drawable according to the repeat attribute
2. Reconstruct a continuous image according to the filter
3. Transform according to the transform attribute
4. Position image such that src_x, src_y is over dst_x, dst_y
5. Sample once per destination pixel
6. Clip. If a pixel is not within the source clip, then no
compositing takes place at that pixel. (Ie., it's *not*
treated as 0).
Sampling a drawable:
- If the channel does not have an alpha channel, the pixels in it
are treated as opaque.
Note on reconstruction:
- The top left pixel has coordinates (0.5, 0.5) and pixels are
spaced 1 apart.
Gradient:
1. Unless gradient type is conical, repeat the underlying (0, 1)
gradient according to the repeat attribute
2. Integrate the gradient across the plane according to type.
3. Transform according to transform attribute
4. Position gradient
5. Sample once per destination pixel.
6. Clip
Solid Fill:
1. Repeat has no effect
2. Image is already continuous and defined for the entire plane
3. Transform has no effect
4. Positioning has no effect
5. Sample once per destination pixel.
6. Clip
Polygon:
1. Repeat has no effect
2. Image is already continuous and defined on the whole plane
3. Transform according to transform attribute
4. Position image
5. Supersample 15x17 per destination pixel.
6. Clip
Possibly interesting additions:
- More general transformations, such as warping, or general
shading.
- Shader image where a function is called to generate the
pixel (ie., uploading assembly code).
- Resampling kernels
In principle the polygon image uses a 15x17 box filter for
resampling. If we allow general resampling filters, then we
get all the various antialiasing types for free.
Bilinear downsampling looks terrible and could be much
improved by a resampling filter. NEAREST reconstruction
combined with a box resampling filter is what GdkPixbuf
does, I believe.
Useful for high frequency gradients as well.
(Note that the difference between a reconstruction and a
resampling filter is mainly where in the pipeline they
occur. High quality resampling should use a correctly
oriented kernel so it should happen after transformation.
An implementation can transform the resampling kernel and
convolve it with the reconstruction if it so desires, but it
will need to deal with the fact that the resampling kernel
will not necessarily be pixel aligned.
"Output kernels"
One could imagine doing the resampling after compositing,
ie., for each destination pixel sample each source image 16
times, then composite those subpixels individually, then
finally apply a kernel.
However, this is effectively the same as full screen
antialiasing, which is a simpler way to think about it. So
resampling kernels may make sense for individual images, but
not as a post-compositing step.
Fullscreen AA is inefficient without chained compositing
though. Consider an (image scaled up to oversample size IN
some polygon) scaled down to screen size. With the current
implementation, there will be a huge temporary. With chained
compositing, the whole thing ends up being equivalent to the
output kernel from above.
- Color space conversion
The complete model here is that each surface has a color
space associated with it and that the compositing operation
also has one associated with it. Note also that gradients
should have associcated colorspaces.
- Dithering
If people dither something that is already dithered, it will
look terrible, but don't do that, then. (Dithering happens
after resampling if at all - what is the relationship
with color spaces? Presumably dithering should happen in linear
intensity space).
- Floating point surfaces, 16, 32 and possibly 64 bit per
channel.
Maybe crack:
- Glyph polygons
If glyphs could be given as polygons, they could be
positioned and rasterized more accurately. The glyph
structure would need subpixel positioning though.
- Luminance vs. coverage for the alpha channel
Whether the alpha channel should be interpreted as luminance
modulation or as coverage (intensity modulation). This is a
bit of a departure from the rendering model though. It could
also be considered whether it should be possible to have
both channels in the same drawable.
- Alternative for component alpha
- Set component-alpha on the output image.
- This means each of the components are sampled
independently and composited in the corresponding
channel only.
- Have 3 x oversampled mask
- Scale it down by 3 horizontally, with [ 1/3, 1/3, 1/3 ]
resampling filter.
Is this equivalent to just using a component alpha mask?
Incompatible changes:
- Gradients could be specified with premultiplied colors. (You
can use a mask to get things like gradients from solid red to
transparent red.
Refactoring pixman
The pixman code is not particularly nice to put it mildly. Among the
issues are
- inconsistent naming style (fb vs Fb, camelCase vs
underscore_naming). Sometimes there is even inconsistency *within*
one name.
fetchProc32 ACCESS(pixman_fetchProcForPicture32)
may be one of the uglies names ever created.
coding style:
use the one from cairo except that pixman uses this brace style:
while (blah)
{
}
Format do while like this:
do
{
}
while (...);
- PIXMAN_COMPOSITE_RECT_GENERAL() is horribly complex
- switch case logic in pixman-access.c
Instead it would be better to just store function pointers in the
image objects themselves,
get_pixel()
get_scanline()
- Much of the scanline fetching code is for formats that no one
ever uses. a2r2g2b2 anyone?
It would probably be worthwhile having a generic fetcher for any
pixman format whatsoever.
- Code related to particular image types should be split into individual
files.
pixman-bits-image.c
pixman-linear-gradient-image.c
pixman-radial-gradient-image.c
pixman-solid-image.c
- Fast path code should be split into files based on architecture:
pixman-mmx-fastpath.c
pixman-sse2-fastpath.c
pixman-c-fastpath.c
etc.
Each of these files should then export a fastpath table, which would
be declared in pixman-private.h. This should allow us to get rid
of the pixman-mmx.h files.
The fast path table should describe each fast path. Ie there should
be bitfields indicating what things the fast path can handle, rather than
like now where it is only allowed to take one format per src/mask/dest. Ie.,
{
FAST_a8r8g8b8 | FAST_x8r8g8b8,
FAST_null,
FAST_x8r8g8b8,
FAST_repeat_normal | FAST_repeat_none,
the_fast_path
}
There should then be *one* file that implements pixman_image_composite().
This should do this:
optimize_operator();
convert 1x1 repeat to solid (actually this should be done at
image creation time).
is there a useful fastpath?
There should be a file called pixman-cpu.c that contains all the
architecture specific stuff to detect what CPU features we have.
Issues that must be kept in mind:
- we need accessor code to be preserved
- maybe there should be a "store_scanline" too?
Is this sufficient?
We should preserve the optimization where the
compositing happens directly in the destination
whenever possible.
- It should be possible to create GPU samplers from the
images.
The "horizontal" classification should be a bit in the image, the
"vertical" classification should just happen inside the gradient
file. Note though that
(a) these will change if the tranformation/repeat changes.
(b) at the moment the optimization for linear gradients
takes the source rectangle into account. Presumably
this is to also optimize the case where the gradient
is close enough to horizontal?
Who is responsible for repeats? In principle it should be the scanline
fetch. Right now NORMAL repeats are handled by walk_composite_region()
while other repeats are handled by the scanline code.
(Random note on filtering: do you filter before or after
transformation? Hardware is going to filter after transformation;
this is also what pixman does currently). It's not completely clear
what filtering *after* transformation means. One thing that might look
good would be to do *supersampling*, ie., compute multiple subpixels
per destination pixel, then average them together.

Просмотреть файл

@ -0,0 +1,30 @@
###############################################################################
#
# Copyright 2009, Oracle and/or its affiliates. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#
###############################################################################
#
# Override the linker's detection of CMOV/MMX/SSE instructions so this
# library isn't flagged as only usable on CPU's with those ISA's, since it
# checks at runtime for availability before calling them
hwcap_1 = V0x0 FPU OVERRIDE;