-
Notifications
You must be signed in to change notification settings - Fork 254
/
Copy pathastcenc_internal.h
2227 lines (1953 loc) · 79.1 KB
/
astcenc_internal.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2024 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------
/**
* @brief Functions and data declarations.
*/
#ifndef ASTCENC_INTERNAL_INCLUDED
#define ASTCENC_INTERNAL_INCLUDED
#include <algorithm>
#include <cstddef>
#include <cstdint>
#if defined(ASTCENC_DIAGNOSTICS)
#include <cstdio>
#endif
#include <cstdlib>
#include <limits>
#include "astcenc.h"
#include "astcenc_mathlib.h"
#include "astcenc_vecmathlib.h"
/**
* @brief Make a promise to the compiler's optimizer.
*
* A promise is an expression that the optimizer is can assume is true for to help it generate
* faster code. Common use cases for this are to promise that a for loop will iterate more than
* once, or that the loop iteration count is a multiple of a vector length, which avoids pre-loop
* checks and can avoid loop tails if loops are unrolled by the auto-vectorizer.
*/
#if defined(NDEBUG)
#if !defined(__clang__) && defined(_MSC_VER)
#define promise(cond) __assume(cond)
#elif defined(__clang__)
#if __has_builtin(__builtin_assume)
#define promise(cond) __builtin_assume(cond)
#elif __has_builtin(__builtin_unreachable)
#define promise(cond) if (!(cond)) { __builtin_unreachable(); }
#else
#define promise(cond)
#endif
#else // Assume GCC
#define promise(cond) if (!(cond)) { __builtin_unreachable(); }
#endif
#else
#define promise(cond) assert(cond)
#endif
/* ============================================================================
Constants
============================================================================ */
#if !defined(ASTCENC_BLOCK_MAX_TEXELS)
#define ASTCENC_BLOCK_MAX_TEXELS 216 // A 3D 6x6x6 block
#endif
/** @brief The maximum number of texels a block can support (6x6x6 block). */
static constexpr unsigned int BLOCK_MAX_TEXELS { ASTCENC_BLOCK_MAX_TEXELS };
/** @brief The maximum number of components a block can support. */
static constexpr unsigned int BLOCK_MAX_COMPONENTS { 4 };
/** @brief The maximum number of partitions a block can support. */
static constexpr unsigned int BLOCK_MAX_PARTITIONS { 4 };
/** @brief The number of partitionings, per partition count, suported by the ASTC format. */
static constexpr unsigned int BLOCK_MAX_PARTITIONINGS { 1024 };
/** @brief The maximum number of texels used during partition selection for texel clustering. */
static constexpr uint8_t BLOCK_MAX_KMEANS_TEXELS { 64 };
/** @brief The maximum number of weights a block can support. */
static constexpr unsigned int BLOCK_MAX_WEIGHTS { 64 };
/** @brief The maximum number of weights a block can support per plane in 2 plane mode. */
static constexpr unsigned int BLOCK_MAX_WEIGHTS_2PLANE { BLOCK_MAX_WEIGHTS / 2 };
/** @brief The minimum number of weight bits a candidate encoding must encode. */
static constexpr unsigned int BLOCK_MIN_WEIGHT_BITS { 24 };
/** @brief The maximum number of weight bits a candidate encoding can encode. */
static constexpr unsigned int BLOCK_MAX_WEIGHT_BITS { 96 };
/** @brief The index indicating a bad (unused) block mode in the remap array. */
static constexpr uint16_t BLOCK_BAD_BLOCK_MODE { 0xFFFFu };
/** @brief The index indicating a bad (unused) partitioning in the remap array. */
static constexpr uint16_t BLOCK_BAD_PARTITIONING { 0xFFFFu };
/** @brief The number of partition index bits supported by the ASTC format . */
static constexpr unsigned int PARTITION_INDEX_BITS { 10 };
/** @brief The offset of the plane 2 weights in shared weight arrays. */
static constexpr unsigned int WEIGHTS_PLANE2_OFFSET { BLOCK_MAX_WEIGHTS_2PLANE };
/** @brief The sum of quantized weights for one texel. */
static constexpr float WEIGHTS_TEXEL_SUM { 16.0f };
/** @brief The number of block modes supported by the ASTC format. */
static constexpr unsigned int WEIGHTS_MAX_BLOCK_MODES { 2048 };
/** @brief The number of weight grid decimation modes supported by the ASTC format. */
static constexpr unsigned int WEIGHTS_MAX_DECIMATION_MODES { 87 };
/** @brief The high default error used to initialize error trackers. */
static constexpr float ERROR_CALC_DEFAULT { 1e30f };
/**
* @brief The minimum tuning setting threshold for the one partition fast path.
*/
static constexpr float TUNE_MIN_SEARCH_MODE0 { 0.85f };
/**
* @brief The maximum number of candidate encodings tested for each encoding mode.
*
* This can be dynamically reduced by the compression quality preset.
*/
static constexpr unsigned int TUNE_MAX_TRIAL_CANDIDATES { 8 };
/**
* @brief The maximum number of candidate partitionings tested for each encoding mode.
*
* This can be dynamically reduced by the compression quality preset.
*/
static constexpr unsigned int TUNE_MAX_PARTITIONING_CANDIDATES { 8 };
/**
* @brief The maximum quant level using full angular endpoint search method.
*
* The angular endpoint search is used to find the min/max weight that should
* be used for a given quantization level. It is effective but expensive, so
* we only use it where it has the most value - low quant levels with wide
* spacing. It is used below TUNE_MAX_ANGULAR_QUANT (inclusive). Above this we
* assume the min weight is 0.0f, and the max weight is 1.0f.
*
* Note the angular algorithm is vectorized, and using QUANT_12 exactly fills
* one 8-wide vector. Decreasing by one doesn't buy much performance, and
* increasing by one is disproportionately expensive.
*/
static constexpr unsigned int TUNE_MAX_ANGULAR_QUANT { 7 }; /* QUANT_12 */
static_assert((BLOCK_MAX_TEXELS % ASTCENC_SIMD_WIDTH) == 0,
"BLOCK_MAX_TEXELS must be multiple of ASTCENC_SIMD_WIDTH");
static_assert(BLOCK_MAX_TEXELS <= 216,
"BLOCK_MAX_TEXELS must not be greater than 216");
static_assert((BLOCK_MAX_WEIGHTS % ASTCENC_SIMD_WIDTH) == 0,
"BLOCK_MAX_WEIGHTS must be multiple of ASTCENC_SIMD_WIDTH");
static_assert((WEIGHTS_MAX_BLOCK_MODES % ASTCENC_SIMD_WIDTH) == 0,
"WEIGHTS_MAX_BLOCK_MODES must be multiple of ASTCENC_SIMD_WIDTH");
/* ============================================================================
Commonly used data structures
============================================================================ */
/**
* @brief The ASTC endpoint formats.
*
* Note, the values here are used directly in the encoding in the format so do not rearrange.
*/
enum endpoint_formats
{
FMT_LUMINANCE = 0,
FMT_LUMINANCE_DELTA = 1,
FMT_HDR_LUMINANCE_LARGE_RANGE = 2,
FMT_HDR_LUMINANCE_SMALL_RANGE = 3,
FMT_LUMINANCE_ALPHA = 4,
FMT_LUMINANCE_ALPHA_DELTA = 5,
FMT_RGB_SCALE = 6,
FMT_HDR_RGB_SCALE = 7,
FMT_RGB = 8,
FMT_RGB_DELTA = 9,
FMT_RGB_SCALE_ALPHA = 10,
FMT_HDR_RGB = 11,
FMT_RGBA = 12,
FMT_RGBA_DELTA = 13,
FMT_HDR_RGB_LDR_ALPHA = 14,
FMT_HDR_RGBA = 15
};
/**
* @brief The ASTC quantization methods.
*
* Note, the values here are used directly in the encoding in the format so do not rearrange.
*/
enum quant_method
{
QUANT_2 = 0,
QUANT_3 = 1,
QUANT_4 = 2,
QUANT_5 = 3,
QUANT_6 = 4,
QUANT_8 = 5,
QUANT_10 = 6,
QUANT_12 = 7,
QUANT_16 = 8,
QUANT_20 = 9,
QUANT_24 = 10,
QUANT_32 = 11,
QUANT_40 = 12,
QUANT_48 = 13,
QUANT_64 = 14,
QUANT_80 = 15,
QUANT_96 = 16,
QUANT_128 = 17,
QUANT_160 = 18,
QUANT_192 = 19,
QUANT_256 = 20
};
/**
* @brief The number of levels use by an ASTC quantization method.
*
* @param method The quantization method
*
* @return The number of levels used by @c method.
*/
static inline unsigned int get_quant_level(quant_method method)
{
switch (method)
{
case QUANT_2: return 2;
case QUANT_3: return 3;
case QUANT_4: return 4;
case QUANT_5: return 5;
case QUANT_6: return 6;
case QUANT_8: return 8;
case QUANT_10: return 10;
case QUANT_12: return 12;
case QUANT_16: return 16;
case QUANT_20: return 20;
case QUANT_24: return 24;
case QUANT_32: return 32;
case QUANT_40: return 40;
case QUANT_48: return 48;
case QUANT_64: return 64;
case QUANT_80: return 80;
case QUANT_96: return 96;
case QUANT_128: return 128;
case QUANT_160: return 160;
case QUANT_192: return 192;
case QUANT_256: return 256;
}
// Unreachable - the enum is fully described
return 0;
}
/**
* @brief Computed metrics about a partition in a block.
*/
struct partition_metrics
{
/** @brief The error-weighted average color in the partition. */
vfloat4 avg;
/** @brief The dominant error-weighted direction in the partition. */
vfloat4 dir;
};
/**
* @brief Computed lines for a a three component analysis.
*/
struct partition_lines3
{
/** @brief Line for uncorrelated chroma. */
line3 uncor_line;
/** @brief Line for correlated chroma, passing though the origin. */
line3 samec_line;
/** @brief Post-processed line for uncorrelated chroma. */
processed_line3 uncor_pline;
/** @brief Post-processed line for correlated chroma, passing though the origin. */
processed_line3 samec_pline;
/**
* @brief The length of the line for uncorrelated chroma.
*
* This is used for both the uncorrelated and same chroma lines - they are normally very similar
* and only used for the relative ranking of partitionings against one another.
*/
float line_length;
};
/**
* @brief The partition information for a single partition.
*
* ASTC has a total of 1024 candidate partitions for each of 2/3/4 partition counts, although this
* 1024 includes seeds that generate duplicates of other seeds and seeds that generate completely
* empty partitions. These are both valid encodings, but astcenc will skip both during compression
* as they are not useful.
*/
struct partition_info
{
/** @brief The number of partitions in this partitioning. */
uint16_t partition_count;
/** @brief The index (seed) of this partitioning. */
uint16_t partition_index;
/**
* @brief The number of texels in each partition.
*
* Note that some seeds result in zero texels assigned to a partition. These are valid, but are
* skipped by this compressor as there is no point spending bits encoding an unused endpoints.
*/
uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS];
/** @brief The partition of each texel in the block. */
ASTCENC_ALIGNAS uint8_t partition_of_texel[BLOCK_MAX_TEXELS];
/** @brief The list of texels in each partition. */
ASTCENC_ALIGNAS uint8_t texels_of_partition[BLOCK_MAX_PARTITIONS][BLOCK_MAX_TEXELS];
};
/**
* @brief The weight grid information for a single decimation pattern.
*
* ASTC can store one weight per texel, but is also capable of storing lower resolution weight grids
* that are interpolated during decompression to assign a with to a texel. Storing fewer weights
* can free up a substantial amount of bits that we can then spend on more useful things, such as
* more accurate endpoints and weights, or additional partitions.
*
* This data structure is used to store information about a single weight grid decimation pattern,
* for a single block size.
*/
struct decimation_info
{
/** @brief The total number of texels in the block. */
uint8_t texel_count;
/** @brief The maximum number of stored weights that contribute to each texel, between 1 and 4. */
uint8_t max_texel_weight_count;
/** @brief The total number of weights stored. */
uint8_t weight_count;
/** @brief The number of stored weights in the X dimension. */
uint8_t weight_x;
/** @brief The number of stored weights in the Y dimension. */
uint8_t weight_y;
/** @brief The number of stored weights in the Z dimension. */
uint8_t weight_z;
/**
* @brief The number of weights that contribute to each texel.
* Value is between 1 and 4.
*/
ASTCENC_ALIGNAS uint8_t texel_weight_count[BLOCK_MAX_TEXELS];
/**
* @brief The weight index of the N weights that are interpolated for each texel.
* Stored transposed to improve vectorization.
*/
ASTCENC_ALIGNAS uint8_t texel_weights_tr[4][BLOCK_MAX_TEXELS];
/**
* @brief The bilinear contribution of the N weights that are interpolated for each texel.
* Value is between 0 and 16, stored transposed to improve vectorization.
*/
ASTCENC_ALIGNAS uint8_t texel_weight_contribs_int_tr[4][BLOCK_MAX_TEXELS];
/**
* @brief The bilinear contribution of the N weights that are interpolated for each texel.
* Value is between 0 and 1, stored transposed to improve vectorization.
*/
ASTCENC_ALIGNAS float texel_weight_contribs_float_tr[4][BLOCK_MAX_TEXELS];
/** @brief The number of texels that each stored weight contributes to. */
ASTCENC_ALIGNAS uint8_t weight_texel_count[BLOCK_MAX_WEIGHTS];
/**
* @brief The list of texels that use a specific weight index.
* Stored transposed to improve vectorization.
*/
ASTCENC_ALIGNAS uint8_t weight_texels_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
/**
* @brief The bilinear contribution to the N texels that use each weight.
* Value is between 0 and 1, stored transposed to improve vectorization.
*/
ASTCENC_ALIGNAS float weights_texel_contribs_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
/**
* @brief The bilinear contribution to the Nth texel that uses each weight.
* Value is between 0 and 1, stored transposed to improve vectorization.
*/
float texel_contrib_for_weight[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
};
/**
* @brief Metadata for single block mode for a specific block size.
*/
struct block_mode
{
/** @brief The block mode index in the ASTC encoded form. */
uint16_t mode_index;
/** @brief The decimation mode index in the compressor reindexed list. */
uint8_t decimation_mode;
/** @brief The weight quantization used by this block mode. */
uint8_t quant_mode;
/** @brief The weight quantization used by this block mode. */
uint8_t weight_bits;
/** @brief Is a dual weight plane used by this block mode? */
uint8_t is_dual_plane : 1;
/**
* @brief Get the weight quantization used by this block mode.
*
* @return The quantization level.
*/
inline quant_method get_weight_quant_mode() const
{
return static_cast<quant_method>(this->quant_mode);
}
};
/**
* @brief Metadata for single decimation mode for a specific block size.
*/
struct decimation_mode
{
/** @brief The max weight precision for 1 plane, or -1 if not supported. */
int8_t maxprec_1plane;
/** @brief The max weight precision for 2 planes, or -1 if not supported. */
int8_t maxprec_2planes;
/**
* @brief Bitvector indicating weight quant modes used by active 1 plane block modes.
*
* Bit 0 = QUANT_2, Bit 1 = QUANT_3, etc.
*/
uint16_t refprec_1plane;
/**
* @brief Bitvector indicating weight quant methods used by active 2 plane block modes.
*
* Bit 0 = QUANT_2, Bit 1 = QUANT_3, etc.
*/
uint16_t refprec_2planes;
/**
* @brief Set a 1 plane weight quant as active.
*
* @param weight_quant The quant method to set.
*/
void set_ref_1plane(quant_method weight_quant)
{
refprec_1plane |= (1 << weight_quant);
}
/**
* @brief Test if this mode is active below a given 1 plane weight quant (inclusive).
*
* @param max_weight_quant The max quant method to test.
*/
bool is_ref_1plane(quant_method max_weight_quant) const
{
uint16_t mask = static_cast<uint16_t>((1 << (max_weight_quant + 1)) - 1);
return (refprec_1plane & mask) != 0;
}
/**
* @brief Set a 2 plane weight quant as active.
*
* @param weight_quant The quant method to set.
*/
void set_ref_2plane(quant_method weight_quant)
{
refprec_2planes |= static_cast<uint16_t>(1 << weight_quant);
}
/**
* @brief Test if this mode is active below a given 2 plane weight quant (inclusive).
*
* @param max_weight_quant The max quant method to test.
*/
bool is_ref_2plane(quant_method max_weight_quant) const
{
uint16_t mask = static_cast<uint16_t>((1 << (max_weight_quant + 1)) - 1);
return (refprec_2planes & mask) != 0;
}
};
/**
* @brief Data tables for a single block size.
*
* The decimation tables store the information to apply weight grid dimension reductions. We only
* store the decimation modes that are actually needed by the current context; many of the possible
* modes will be unused (too many weights for the current block size or disabled by heuristics). The
* actual number of weights stored is @c decimation_mode_count, and the @c decimation_modes and
* @c decimation_tables arrays store the active modes contiguously at the start of the array. These
* entries are not stored in any particular order.
*
* The block mode tables store the unpacked block mode settings. Block modes are stored in the
* compressed block as an 11 bit field, but for any given block size and set of compressor
* heuristics, only a subset of the block modes will be used. The actual number of block modes
* stored is indicated in @c block_mode_count, and the @c block_modes array store the active modes
* contiguously at the start of the array. These entries are stored in incrementing "packed" value
* order, which doesn't mean much once unpacked. To allow decompressors to reference the packed data
* efficiently the @c block_mode_packed_index array stores the mapping between physical ID and the
* actual remapped array index.
*/
struct block_size_descriptor
{
/** @brief The block X dimension, in texels. */
uint8_t xdim;
/** @brief The block Y dimension, in texels. */
uint8_t ydim;
/** @brief The block Z dimension, in texels. */
uint8_t zdim;
/** @brief The block total texel count. */
uint8_t texel_count;
/**
* @brief The number of stored decimation modes which are "always" modes.
*
* Always modes are stored at the start of the decimation_modes list.
*/
unsigned int decimation_mode_count_always;
/** @brief The number of stored decimation modes for selected encodings. */
unsigned int decimation_mode_count_selected;
/** @brief The number of stored decimation modes for any encoding. */
unsigned int decimation_mode_count_all;
/**
* @brief The number of stored block modes which are "always" modes.
*
* Always modes are stored at the start of the block_modes list.
*/
unsigned int block_mode_count_1plane_always;
/** @brief The number of stored block modes for active 1 plane encodings. */
unsigned int block_mode_count_1plane_selected;
/** @brief The number of stored block modes for active 1 and 2 plane encodings. */
unsigned int block_mode_count_1plane_2plane_selected;
/** @brief The number of stored block modes for any encoding. */
unsigned int block_mode_count_all;
/** @brief The number of selected partitionings for 1/2/3/4 partitionings. */
unsigned int partitioning_count_selected[BLOCK_MAX_PARTITIONS];
/** @brief The number of partitionings for 1/2/3/4 partitionings. */
unsigned int partitioning_count_all[BLOCK_MAX_PARTITIONS];
/** @brief The active decimation modes, stored in low indices. */
decimation_mode decimation_modes[WEIGHTS_MAX_DECIMATION_MODES];
/** @brief The active decimation tables, stored in low indices. */
ASTCENC_ALIGNAS decimation_info decimation_tables[WEIGHTS_MAX_DECIMATION_MODES];
/** @brief The packed block mode array index, or @c BLOCK_BAD_BLOCK_MODE if not active. */
uint16_t block_mode_packed_index[WEIGHTS_MAX_BLOCK_MODES];
/** @brief The active block modes, stored in low indices. */
block_mode block_modes[WEIGHTS_MAX_BLOCK_MODES];
/** @brief The active partition tables, stored in low indices per-count. */
partition_info partitionings[(3 * BLOCK_MAX_PARTITIONINGS) + 1];
/**
* @brief The packed partition table array index, or @c BLOCK_BAD_PARTITIONING if not active.
*
* Indexed by partition_count - 2, containing 2, 3 and 4 partitions.
*/
uint16_t partitioning_packed_index[3][BLOCK_MAX_PARTITIONINGS];
/** @brief The active texels for k-means partition selection. */
uint8_t kmeans_texels[BLOCK_MAX_KMEANS_TEXELS];
/**
* @brief The canonical 2-partition coverage pattern used during block partition search.
*
* Indexed by remapped index, not physical index.
*/
uint64_t coverage_bitmaps_2[BLOCK_MAX_PARTITIONINGS][2];
/**
* @brief The canonical 3-partition coverage pattern used during block partition search.
*
* Indexed by remapped index, not physical index.
*/
uint64_t coverage_bitmaps_3[BLOCK_MAX_PARTITIONINGS][3];
/**
* @brief The canonical 4-partition coverage pattern used during block partition search.
*
* Indexed by remapped index, not physical index.
*/
uint64_t coverage_bitmaps_4[BLOCK_MAX_PARTITIONINGS][4];
/**
* @brief Get the block mode structure for index @c block_mode.
*
* This function can only return block modes that are enabled by the current compressor config.
* Decompression from an arbitrary source should not use this without first checking that the
* packed block mode index is not @c BLOCK_BAD_BLOCK_MODE.
*
* @param block_mode The packed block mode index.
*
* @return The block mode structure.
*/
const block_mode& get_block_mode(unsigned int block_mode) const
{
unsigned int packed_index = this->block_mode_packed_index[block_mode];
assert(packed_index != BLOCK_BAD_BLOCK_MODE && packed_index < this->block_mode_count_all);
return this->block_modes[packed_index];
}
/**
* @brief Get the decimation mode structure for index @c decimation_mode.
*
* This function can only return decimation modes that are enabled by the current compressor
* config. The mode array is stored packed, but this is only ever indexed by the packed index
* stored in the @c block_mode and never exists in an unpacked form.
*
* @param decimation_mode The packed decimation mode index.
*
* @return The decimation mode structure.
*/
const decimation_mode& get_decimation_mode(unsigned int decimation_mode) const
{
return this->decimation_modes[decimation_mode];
}
/**
* @brief Get the decimation info structure for index @c decimation_mode.
*
* This function can only return decimation modes that are enabled by the current compressor
* config. The mode array is stored packed, but this is only ever indexed by the packed index
* stored in the @c block_mode and never exists in an unpacked form.
*
* @param decimation_mode The packed decimation mode index.
*
* @return The decimation info structure.
*/
const decimation_info& get_decimation_info(unsigned int decimation_mode) const
{
return this->decimation_tables[decimation_mode];
}
/**
* @brief Get the partition info table for a given partition count.
*
* @param partition_count The number of partitions we want the table for.
*
* @return The pointer to the table of 1024 entries (for 2/3/4 parts) or 1 entry (for 1 part).
*/
const partition_info* get_partition_table(unsigned int partition_count) const
{
if (partition_count == 1)
{
partition_count = 5;
}
unsigned int index = (partition_count - 2) * BLOCK_MAX_PARTITIONINGS;
return this->partitionings + index;
}
/**
* @brief Get the partition info structure for a given partition count and seed.
*
* @param partition_count The number of partitions we want the info for.
* @param index The partition seed (between 0 and 1023).
*
* @return The partition info structure.
*/
const partition_info& get_partition_info(unsigned int partition_count, unsigned int index) const
{
unsigned int packed_index = 0;
if (partition_count >= 2)
{
packed_index = this->partitioning_packed_index[partition_count - 2][index];
}
assert(packed_index != BLOCK_BAD_PARTITIONING && packed_index < this->partitioning_count_all[partition_count - 1]);
auto& result = get_partition_table(partition_count)[packed_index];
assert(index == result.partition_index);
return result;
}
/**
* @brief Get the partition info structure for a given partition count and seed.
*
* @param partition_count The number of partitions we want the info for.
* @param packed_index The raw array offset.
*
* @return The partition info structure.
*/
const partition_info& get_raw_partition_info(unsigned int partition_count, unsigned int packed_index) const
{
assert(packed_index != BLOCK_BAD_PARTITIONING && packed_index < this->partitioning_count_all[partition_count - 1]);
auto& result = get_partition_table(partition_count)[packed_index];
return result;
}
};
/**
* @brief The image data for a single block.
*
* The @c data_[rgba] fields store the image data in an encoded SoA float form designed for easy
* vectorization. Input data is converted to float and stored as values between 0 and 65535. LDR
* data is stored as direct UNORM data, HDR data is stored as LNS data. They are allocated SIMD
* elements over-size to allow vectorized stores of unaligned and partial SIMD lanes (e.g. in a
* 6x6x6 block the final row write will read elements 210-217 (vec8) or 214-217 (vec4), which is
* two elements above the last real data element). The overspill values are never written to memory,
* and would be benign, but the padding avoids hitting undefined behavior.
*
* The @c rgb_lns and @c alpha_lns fields that assigned a per-texel use of HDR are only used during
* decompression. The current compressor will always use HDR endpoint formats when in HDR mode.
*/
struct image_block
{
/** @brief The input (compress) or output (decompress) data for the red color component. */
ASTCENC_ALIGNAS float data_r[BLOCK_MAX_TEXELS + ASTCENC_SIMD_WIDTH - 1];
/** @brief The input (compress) or output (decompress) data for the green color component. */
ASTCENC_ALIGNAS float data_g[BLOCK_MAX_TEXELS + ASTCENC_SIMD_WIDTH - 1];
/** @brief The input (compress) or output (decompress) data for the blue color component. */
ASTCENC_ALIGNAS float data_b[BLOCK_MAX_TEXELS + ASTCENC_SIMD_WIDTH - 1];
/** @brief The input (compress) or output (decompress) data for the alpha color component. */
ASTCENC_ALIGNAS float data_a[BLOCK_MAX_TEXELS + ASTCENC_SIMD_WIDTH - 1];
/** @brief The number of texels in the block. */
uint8_t texel_count;
/** @brief The original data for texel 0 for constant color block encoding. */
vfloat4 origin_texel;
/** @brief The min component value of all texels in the block. */
vfloat4 data_min;
/** @brief The mean component value of all texels in the block. */
vfloat4 data_mean;
/** @brief The max component value of all texels in the block. */
vfloat4 data_max;
/** @brief The relative error significance of the color channels. */
vfloat4 channel_weight;
/** @brief Is this grayscale block where R == G == B for all texels? */
bool grayscale;
/** @brief Is the eventual decode using decode_unorm8 rounding? */
bool decode_unorm8;
/** @brief Set to 1 if a texel is using HDR RGB endpoints (decompression only). */
uint8_t rgb_lns[BLOCK_MAX_TEXELS];
/** @brief Set to 1 if a texel is using HDR alpha endpoints (decompression only). */
uint8_t alpha_lns[BLOCK_MAX_TEXELS];
/** @brief The X position of this block in the input or output image. */
unsigned int xpos;
/** @brief The Y position of this block in the input or output image. */
unsigned int ypos;
/** @brief The Z position of this block in the input or output image. */
unsigned int zpos;
/**
* @brief Get an RGBA texel value from the data.
*
* @param index The texel index.
*
* @return The texel in RGBA component ordering.
*/
inline vfloat4 texel(unsigned int index) const
{
return vfloat4(data_r[index],
data_g[index],
data_b[index],
data_a[index]);
}
/**
* @brief Get an RGB texel value from the data.
*
* @param index The texel index.
*
* @return The texel in RGB0 component ordering.
*/
inline vfloat4 texel3(unsigned int index) const
{
return vfloat3(data_r[index],
data_g[index],
data_b[index]);
}
/**
* @brief Get the default alpha value for endpoints that don't store it.
*
* The default depends on whether the alpha endpoint is LDR or HDR.
*
* @return The alpha value in the scaled range used by the compressor.
*/
inline float get_default_alpha() const
{
return this->alpha_lns[0] ? static_cast<float>(0x7800) : static_cast<float>(0xFFFF);
}
/**
* @brief Test if a single color channel is constant across the block.
*
* Constant color channels are easier to compress as interpolating between two identical colors
* always returns the same value, irrespective of the weight used. They therefore can be ignored
* for the purposes of weight selection and use of a second weight plane.
*
* @return @c true if the channel is constant across the block, @c false otherwise.
*/
inline bool is_constant_channel(int channel) const
{
vmask4 lane_mask = vint4::lane_id() == vint4(channel);
vmask4 color_mask = this->data_min == this->data_max;
return any(lane_mask & color_mask);
}
/**
* @brief Test if this block is a luminance block with constant 1.0 alpha.
*
* @return @c true if the block is a luminance block , @c false otherwise.
*/
inline bool is_luminance() const
{
float default_alpha = this->get_default_alpha();
bool alpha1 = (this->data_min.lane<3>() == default_alpha) &&
(this->data_max.lane<3>() == default_alpha);
return this->grayscale && alpha1;
}
/**
* @brief Test if this block is a luminance block with variable alpha.
*
* @return @c true if the block is a luminance + alpha block , @c false otherwise.
*/
inline bool is_luminancealpha() const
{
float default_alpha = this->get_default_alpha();
bool alpha1 = (this->data_min.lane<3>() == default_alpha) &&
(this->data_max.lane<3>() == default_alpha);
return this->grayscale && !alpha1;
}
};
/**
* @brief Data structure storing the color endpoints for a block.
*/
struct endpoints
{
/** @brief The number of partition endpoints stored. */
unsigned int partition_count;
/** @brief The colors for endpoint 0. */
vfloat4 endpt0[BLOCK_MAX_PARTITIONS];
/** @brief The colors for endpoint 1. */
vfloat4 endpt1[BLOCK_MAX_PARTITIONS];
};
/**
* @brief Data structure storing the color endpoints and weights.
*/
struct endpoints_and_weights
{
/** @brief True if all active values in weight_error_scale are the same. */
bool is_constant_weight_error_scale;
/** @brief The color endpoints. */
endpoints ep;
/** @brief The ideal weight for each texel; may be undecimated or decimated. */
ASTCENC_ALIGNAS float weights[BLOCK_MAX_TEXELS];
/** @brief The ideal weight error scaling for each texel; may be undecimated or decimated. */
ASTCENC_ALIGNAS float weight_error_scale[BLOCK_MAX_TEXELS];
};
/**
* @brief Utility storing estimated errors from choosing particular endpoint encodings.
*/
struct encoding_choice_errors
{
/** @brief Error of using LDR RGB-scale instead of complete endpoints. */
float rgb_scale_error;
/** @brief Error of using HDR RGB-scale instead of complete endpoints. */
float rgb_luma_error;
/** @brief Error of using luminance instead of RGB. */
float luminance_error;
/** @brief Error of discarding alpha and using a constant 1.0 alpha. */
float alpha_drop_error;
/** @brief Can we use delta offset encoding? */
bool can_offset_encode;
/** @brief Can we use blue contraction encoding? */
bool can_blue_contract;
};
/**
* @brief Preallocated working buffers, allocated per thread during context creation.
*/
struct ASTCENC_ALIGNAS compression_working_buffers
{
/** @brief Ideal endpoints and weights for plane 1. */
endpoints_and_weights ei1;
/** @brief Ideal endpoints and weights for plane 2. */
endpoints_and_weights ei2;
/**
* @brief Decimated ideal weight values in the ~0-1 range.
*
* Note that values can be slightly below zero or higher than one due to
* endpoint extents being inside the ideal color representation.
*
* For two planes, second plane starts at @c WEIGHTS_PLANE2_OFFSET offsets.
*/
ASTCENC_ALIGNAS float dec_weights_ideal[WEIGHTS_MAX_DECIMATION_MODES * BLOCK_MAX_WEIGHTS];
/**
* @brief Decimated quantized weight values in the unquantized 0-64 range.
*
* For two planes, second plane starts at @c WEIGHTS_PLANE2_OFFSET offsets.
*/
ASTCENC_ALIGNAS uint8_t dec_weights_uquant[WEIGHTS_MAX_BLOCK_MODES * BLOCK_MAX_WEIGHTS];
/** @brief Error of the best encoding combination for each block mode. */
ASTCENC_ALIGNAS float errors_of_best_combination[WEIGHTS_MAX_BLOCK_MODES];
/** @brief The best color quant for each block mode. */
uint8_t best_quant_levels[WEIGHTS_MAX_BLOCK_MODES];
/** @brief The best color quant for each block mode if modes are the same and we have spare bits. */
uint8_t best_quant_levels_mod[WEIGHTS_MAX_BLOCK_MODES];
/** @brief The best endpoint format for each partition. */
uint8_t best_ep_formats[WEIGHTS_MAX_BLOCK_MODES][BLOCK_MAX_PARTITIONS];
/** @brief The total bit storage needed for quantized weights for each block mode. */
int8_t qwt_bitcounts[WEIGHTS_MAX_BLOCK_MODES];
/** @brief The cumulative error for quantized weights for each block mode. */
float qwt_errors[WEIGHTS_MAX_BLOCK_MODES];
/** @brief The low weight value in plane 1 for each block mode. */
float weight_low_value1[WEIGHTS_MAX_BLOCK_MODES];
/** @brief The high weight value in plane 1 for each block mode. */
float weight_high_value1[WEIGHTS_MAX_BLOCK_MODES];
/** @brief The low weight value in plane 1 for each quant level and decimation mode. */
float weight_low_values1[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1];
/** @brief The high weight value in plane 1 for each quant level and decimation mode. */
float weight_high_values1[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1];
/** @brief The low weight value in plane 2 for each block mode. */
float weight_low_value2[WEIGHTS_MAX_BLOCK_MODES];
/** @brief The high weight value in plane 2 for each block mode. */
float weight_high_value2[WEIGHTS_MAX_BLOCK_MODES];