|
| 1 | +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project |
| 2 | +// SPDX-License-Identifier: GPL-2.0-or-later |
| 3 | + |
| 4 | +#version 450 |
| 5 | + |
| 6 | +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; |
| 7 | + |
| 8 | +layout(std430, binding = 0) buffer input_buf { |
| 9 | + uint in_data[]; |
| 10 | +}; |
| 11 | +layout(std430, binding = 1) buffer output_buf { |
| 12 | + uint out_data[]; |
| 13 | +}; |
| 14 | + |
| 15 | +layout(push_constant) uniform image_info { |
| 16 | + uint num_levels; |
| 17 | + uint pitch; |
| 18 | + uint height; |
| 19 | + uint c0; |
| 20 | + uint c1; |
| 21 | +} info; |
| 22 | + |
| 23 | +const uint lut_8bpp[][16] = { |
| 24 | + { |
| 25 | + 0x05040100, 0x45444140, |
| 26 | + 0x07060302, 0x47464342, |
| 27 | + 0x0d0c0908, 0x4d4c4948, |
| 28 | + 0x0f0e0b0a, 0x4f4e4b4a, |
| 29 | + 0x85848180, 0xc5c4c1c0, |
| 30 | + 0x87868382, 0xc7c6c3c2, |
| 31 | + 0x8d8c8988, 0xcdccc9c8, |
| 32 | + 0x8f8e8b8a, 0xcfcecbca, |
| 33 | + }, |
| 34 | + { |
| 35 | + 0x15141110, 0x55545150, |
| 36 | + 0x17161312, 0x57565352, |
| 37 | + 0x1d1c1918, 0x5d5c5958, |
| 38 | + 0x1f1e1b1a, 0x5f5e5b5a, |
| 39 | + 0x95949190, 0xd5d4d1d0, |
| 40 | + 0x97969392, 0xd7d6d3d2, |
| 41 | + 0x9d9c9998, 0xdddcd9d8, |
| 42 | + 0x9f9e9b9a, 0xdfdedbda, |
| 43 | + }, |
| 44 | + { |
| 45 | + 0x25242120, 0x65646160, |
| 46 | + 0x27262322, 0x67666362, |
| 47 | + 0x2d2c2928, 0x6d6c6968, |
| 48 | + 0x2f2e2b2a, 0x6f6e6b6a, |
| 49 | + 0xa5a4a1a0, 0xe5e4e1e0, |
| 50 | + 0xa7a6a3a2, 0xe7e6e3e2, |
| 51 | + 0xadaca9a8, 0xedece9e8, |
| 52 | + 0xafaeabaa, 0xefeeebea, |
| 53 | + }, |
| 54 | + { |
| 55 | + 0x35343130, 0x75747170, |
| 56 | + 0x37363332, 0x77767372, |
| 57 | + 0x3d3c3938, 0x7d7c7978, |
| 58 | + 0x3f3e3b3a, 0x7f7e7b7a, |
| 59 | + 0xb5b4b1b0, 0xf5f4f1f0, |
| 60 | + 0xb7b6b3b2, 0xf7f6f3f2, |
| 61 | + 0xbdbcb9b8, 0xfdfcf9f8, |
| 62 | + 0xbfbebbba, 0xfffefbfa, |
| 63 | + }, |
| 64 | +}; |
| 65 | + |
| 66 | +#define MICRO_TILE_DIM (8) |
| 67 | +#define MICRO_TILE_SZ (256) |
| 68 | +#define TEXELS_PER_ELEMENT (1) |
| 69 | +#define BPP (8) |
| 70 | + |
| 71 | +shared uint scratch[16]; |
| 72 | + |
| 73 | +void main() { |
| 74 | + uint slot = gl_LocalInvocationID.x >> 2u; |
| 75 | + atomicAnd(scratch[slot], 0); |
| 76 | + |
| 77 | + uint x = gl_GlobalInvocationID.x % info.pitch; |
| 78 | + uint y = (gl_GlobalInvocationID.x / info.pitch) % info.height; |
| 79 | + uint z = gl_GlobalInvocationID.x / (info.pitch * info.height); |
| 80 | + |
| 81 | + uint col = bitfieldExtract(x, 0, 3); |
| 82 | + uint row = bitfieldExtract(y, 0, 3); |
| 83 | + uint lut = bitfieldExtract(z, 0, 2); |
| 84 | + uint idx_dw = lut_8bpp[lut][(col + row * MICRO_TILE_DIM) >> 2u]; |
| 85 | + uint byte_ofs = (gl_LocalInvocationID.x & 3u) * 8; |
| 86 | + uint idx = bitfieldExtract(idx_dw >> byte_ofs, 0, 8); |
| 87 | + |
| 88 | + uint slice_offs = (z >> 2u) * info.c1 * MICRO_TILE_SZ; |
| 89 | + uint tile_row = y / MICRO_TILE_DIM; |
| 90 | + uint tile_column = x / MICRO_TILE_DIM; |
| 91 | + uint tile_offs = ((tile_row * info.c0) + tile_column) * MICRO_TILE_SZ; |
| 92 | + uint offs = (slice_offs + tile_offs) + (idx * BPP / 8); |
| 93 | + |
| 94 | + uint p0 = in_data[offs >> 2u]; |
| 95 | + uint byte = bitfieldExtract(p0 >> (offs * 8), 0, 8); |
| 96 | + atomicOr(scratch[slot], byte << byte_ofs); |
| 97 | + |
| 98 | + if (byte_ofs == 0) { |
| 99 | + out_data[gl_GlobalInvocationID.x >> 2u] = scratch[slot]; |
| 100 | + } |
| 101 | +} |
0 commit comments