@@ -186,30 +186,38 @@ typedef struct
186186} varattrib_1b_e ;
187187
188188/*
189- * Bit layouts for varlena headers: (GPDB always stores this big-endian format)
189+ * Bit layouts for varlena headers on big-endian machines:
190190 *
191191 * 00xxxxxx 4-byte length word, aligned, uncompressed data (up to 1G)
192192 * 01xxxxxx 4-byte length word, aligned, *compressed* data (up to 1G)
193193 * 10000000 1-byte length word, unaligned, TOAST pointer
194194 * 1xxxxxxx 1-byte length word, unaligned, uncompressed data (up to 126b)
195195 *
196- * Cloudberry differs from PostgreSQL here... In Postgres, they use different
197- * macros for big-endian and little-endian machines, so the length is contiguous,
198- * while the 4 byte lengths are stored in native endian format.
196+ * Bit layouts for varlena headers on little-endian machines:
199197 *
200- * Cloudberry stored the 4 byte varlena header in network byte order, so it always
201- * look big-endian in the tuple. This is a bit ugly, but changing it would require
202- * all our customers to initdb.
198+ * xxxxxx00 4-byte length word, aligned, uncompressed data (up to 1G)
199+ * xxxxxx10 4-byte length word, aligned, *compressed* data (up to 1G)
200+ * 00000001 1-byte length word, unaligned, TOAST pointer
201+ * xxxxxxx1 1-byte length word, unaligned, uncompressed data (up to 126b)
203202 *
204203 * The "xxx" bits are the length field (which includes itself in all cases).
205- * In the big-endian case we mask to extract the length.
206- * Note that in both cases the flag bits are in the physically
204+ * In the big-endian case we mask to extract the length, in the little-endian
205+ * case we shift. Note that in both cases the flag bits are in the physically
207206 * first byte. Also, it is not possible for a 1-byte length word to be zero;
208207 * this lets us disambiguate alignment padding bytes from the start of an
209208 * unaligned datum. (We now *require* pad bytes to be filled with zero!)
210209 *
211210 * In TOAST pointers the va_tag field (see varattrib_1b_e) is used to discern
212211 * the specific type and length of the pointer datum.
212+ *
213+ * NOTE:
214+ * Greenplum differs from PostgreSQL here... In Postgres, it use different
215+ * macros for big-endian and little-endian machines, so the length is contiguous,
216+ * while the 4 byte lengths are stored in native endian format.
217+ *
218+ * Greenplum stored the 4 byte varlena header in network byte order, so it always
219+ * look big-endian in the tuple.
220+ *
213221 */
214222
215223/*
0 commit comments