diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 077a75d8917..1912faf9976 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -167,8 +167,9 @@ Bitmapset **acquire_func_colLargeRowIndexes; static void do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, - AcquireSampleRowsFunc acquirefunc, BlockNumber relpages, - bool inh, bool in_outer_xact, int elevel); + BlockNumber relpages, + bool inh, bool in_outer_xact, int elevel, + gp_acquire_sample_rows_context *ctx); static void compute_index_stats(Relation onerel, double totalrows, AnlIndexData *indexdata, int nindexes, HeapTuple *rows, int numrows, @@ -187,8 +188,10 @@ static void update_attstats(Oid relid, bool inh, static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); + static void analyze_rel_internal(Oid relid, VacuumStmt *vacstmt, - bool in_outer_xact, BufferAccessStrategy bstrategy); + bool in_outer_xact, BufferAccessStrategy bstrategy, + gp_acquire_sample_rows_context *ctx); static void acquire_hll_by_query(Relation onerel, int nattrs, VacAttrStats **attrstats, int elevel); bool gp_use_fastanalyze; @@ -197,8 +200,9 @@ bool gp_use_fastanalyze; * analyze_rel() -- analyze one relation */ void -analyze_rel(Oid relid, VacuumStmt *vacstmt, - bool in_outer_xact, BufferAccessStrategy bstrategy) + +analyze_rel(Oid relid, VacuumStmt *vacstmt, bool in_outer_xact, + BufferAccessStrategy bstrategy, gp_acquire_sample_rows_context *ctx) { bool optimizerBackup; @@ -212,7 +216,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, PG_TRY(); { - analyze_rel_internal(relid, vacstmt, in_outer_xact, bstrategy); + analyze_rel_internal(relid, vacstmt, + in_outer_xact, bstrategy, ctx); } /* Clean up in case of error. */ PG_CATCH(); @@ -228,8 +233,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, } static void -analyze_rel_internal(Oid relid, VacuumStmt *vacstmt, - bool in_outer_xact, BufferAccessStrategy bstrategy) +analyze_rel_internal(Oid relid, VacuumStmt *vacstmt, bool in_outer_xact, + BufferAccessStrategy bstrategy, gp_acquire_sample_rows_context *ctx) { Relation onerel; int elevel; @@ -411,15 +416,16 @@ analyze_rel_internal(Oid relid, VacuumStmt *vacstmt, */ PartStatus ps = rel_part_status(relid); if (!(ps == PART_STATUS_ROOT || ps == PART_STATUS_INTERIOR)) - do_analyze_rel(onerel, vacstmt, acquirefunc, relpages, - false, in_outer_xact, elevel); + + do_analyze_rel(onerel, vacstmt, relpages, + false, in_outer_xact, elevel, ctx); /* * If there are child tables, do recursive ANALYZE. */ if (onerel->rd_rel->relhassubclass) - do_analyze_rel(onerel, vacstmt, acquirefunc, relpages, - true, in_outer_xact, elevel); + do_analyze_rel(onerel, vacstmt, relpages, + true, in_outer_xact, elevel, ctx); /* MPP-6929: metadata tracking */ if (!vacuumStatement_IsTemporary(onerel) && (Gp_role == GP_ROLE_DISPATCH)) @@ -464,8 +470,8 @@ analyze_rel_internal(Oid relid, VacuumStmt *vacstmt, */ static void do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, - AcquireSampleRowsFunc acquirefunc, BlockNumber relpages, - bool inh, bool in_outer_xact, int elevel) + BlockNumber relpages, bool inh, bool in_outer_xact, + int elevel, gp_acquire_sample_rows_context *ctx) { int attr_cnt, tcnt, @@ -682,6 +688,8 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, sample_needed = needs_sample(vacattrstats, attr_cnt); if (sample_needed) { + if (ctx) + MemoryContextSwitchTo(caller_context); rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple)); /* @@ -696,10 +704,12 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, rows, targrows, &totalrows, &totaldeadrows,vacstmt->options); else - numrows = (*acquirefunc) (onerel, elevel, + numrows = acquire_sample_rows (onerel, elevel, rows, targrows, &totalrows, &totaldeadrows); acquire_func_colLargeRowIndexes = NULL; + if (ctx) + MemoryContextSwitchTo(anl_context); } else { @@ -710,6 +720,14 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, rows = NULL; } + if (ctx) + { + ctx->sample_rows = rows; + ctx->num_sample_rows = numrows; + ctx->totalrows = totalrows; + ctx->totaldeadrows = totaldeadrows; + } + /* * Compute the statistics. Temporary results during the calculations for * each column are stored in a child context. The calc routines are @@ -1784,6 +1802,8 @@ acquire_sample_rows(Relation onerel, int elevel, else if (RelationIsAppendOptimized(onerel)) return acquire_sample_rows_ao(onerel, elevel, rows, targrows, totalrows, totaldeadrows); + else if (RelationIsForeign(onerel)) + return 0; else elog(ERROR, "unsupported table type"); } diff --git a/src/backend/commands/analyzefuncs.c b/src/backend/commands/analyzefuncs.c index 12b1a94bdbf..26843c37348 100644 --- a/src/backend/commands/analyzefuncs.c +++ b/src/backend/commands/analyzefuncs.c @@ -8,6 +8,7 @@ #include "cdb/cdbaocsam.h" #include "cdb/cdbvars.h" #include "commands/vacuum.h" +#include "nodes/makefuncs.h" #include "storage/bufmgr.h" #include "utils/acl.h" #include "utils/builtins.h" @@ -24,29 +25,6 @@ bool gp_statistics_pullup_from_child_partition = FALSE; bool gp_statistics_use_fkeys = FALSE; -typedef struct -{ - /* Table being sampled */ - Relation onerel; - - /* Sampled rows and estimated total number of rows in the table. */ - HeapTuple *sample_rows; - int num_sample_rows; - double totalrows; - double totaldeadrows; - - /* - * Result tuple descriptor. Each returned row consists of three "fixed" - * columns, plus all the columns of the sampled table (excluding dropped - * columns). - */ - TupleDesc outDesc; -#define NUM_SAMPLE_FIXED_COLS 3 - - /* SRF state, to track which rows have already been returned. */ - int index; - bool summary_sent; -} gp_acquire_sample_rows_context; Datum gp_acquire_sample_rows_int(FunctionCallInfo fcinfo, Oid relOid,int32 targrows,bool inherited,int32 vacopts){ @@ -63,12 +41,10 @@ gp_acquire_sample_rows_int(FunctionCallInfo fcinfo, Oid relOid,int32 targrows,bo if (SRF_IS_FIRSTCALL()) { - double totalrows; - double totaldeadrows; Relation onerel; int attno; - int num_sample_rows; int outattno; + RangeVar *this_rangevar; funcctx = SRF_FIRSTCALL_INIT(); @@ -78,6 +54,9 @@ gp_acquire_sample_rows_int(FunctionCallInfo fcinfo, Oid relOid,int32 targrows,bo */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + /* Construct the context to keep across calls. */ + ctx = (gp_acquire_sample_rows_context *) palloc(sizeof(gp_acquire_sample_rows_context)); + if (!pg_class_ownercheck(relOid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS, get_rel_name(relOid)); @@ -85,6 +64,30 @@ gp_acquire_sample_rows_int(FunctionCallInfo fcinfo, Oid relOid,int32 targrows,bo onerel = relation_open(relOid, AccessShareLock); relDesc = RelationGetDescr(onerel); + { + VacuumStmt vacstmt; + + /* Set up command parameters --- use local variables instead of palloc */ + MemSet(&vacstmt, 0, sizeof(vacstmt)); + + this_rangevar = makeRangeVar(get_namespace_name(onerel->rd_rel->relnamespace), + pstrdup(RelationGetRelationName(onerel)), + -1); + vacstmt.type = T_VacuumStmt; + vacstmt.options |= VACOPT_ANALYZE; + vacstmt.freeze_min_age = -1; + vacstmt.freeze_table_age = -1; + vacstmt.multixact_freeze_min_age = -1; + vacstmt.multixact_freeze_table_age = -1; + vacstmt.relation = &this_rangevar; + vacstmt.va_cols = NIL; + vacstmt.auto_stats = false; + analyze_rel(relOid, &vacstmt, + true, GetAccessStrategy(BAS_VACUUM), ctx); + + } + + /* Count the number of non-dropped cols */ live_natts = 0; for (attno = 1; attno <= relDesc->natts; attno++) @@ -144,37 +147,9 @@ gp_acquire_sample_rows_int(FunctionCallInfo fcinfo, Oid relOid,int32 targrows,bo BlessTupleDesc(outDesc); funcctx->tuple_desc = outDesc; - /* - * Collect the actual sample. (We do this only after blessing the output - * tuple, to avoid the very expensive work of scanning the table, if we're - * going to error out because of incorrect column definition, anyway. - * ANALYZE should always get this right, but makes testing manually a bit - * more comfortable.) - */ - sample_rows = (HeapTuple *) palloc0(targrows * sizeof(HeapTuple)); - if (inherited) - { - num_sample_rows = - acquire_inherited_sample_rows(onerel, DEBUG1, - sample_rows, targrows, - &totalrows, &totaldeadrows,vacopts); - } - else - { - num_sample_rows = - acquire_sample_rows(onerel, DEBUG1, sample_rows, targrows, - &totalrows, &totaldeadrows); - } - - /* Construct the context to keep across calls. */ - ctx = (gp_acquire_sample_rows_context *) palloc(sizeof(gp_acquire_sample_rows_context)); ctx->onerel = onerel; funcctx->user_fctx = ctx; ctx->outDesc = outDesc; - ctx->sample_rows = sample_rows; - ctx->num_sample_rows = num_sample_rows; - ctx->totalrows = totalrows; - ctx->totaldeadrows = totaldeadrows; ctx->index = 0; ctx->summary_sent = false; @@ -194,7 +169,7 @@ gp_acquire_sample_rows_int(FunctionCallInfo fcinfo, Oid relOid,int32 targrows,bo HeapTuple res; /* First return all the sample rows */ - if (ctx->index < ctx->num_sample_rows) + if (ctx->index < ctx->num_sample_rows && ctx->index < targrows) { HeapTuple relTuple = ctx->sample_rows[ctx->index]; int attno; diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 0f088f32fec..9a5eb60b998 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -399,7 +399,7 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast, PushActiveSnapshot(GetTransactionSnapshot()); } - analyze_rel(relid, vacstmt, in_outer_xact, vac_strategy); + analyze_rel(relid, vacstmt, in_outer_xact, vac_strategy, NULL); if (use_own_xacts) { diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index f14362a36e5..860afb92bd0 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -162,6 +162,47 @@ typedef struct VPgClassStats BlockNumber relallvisible; } VPgClassStats; +/* + * Parameters customizing behavior of VACUUM and ANALYZE. + */ +typedef struct VacuumParams +{ + int freeze_min_age; /* min freeze age, -1 to use default */ + int freeze_table_age; /* age at which to scan whole table */ + int multixact_freeze_min_age; /* min multixact freeze age, + * -1 to use default */ + int multixact_freeze_table_age; /* multixact age at which to + * scan whole table */ + bool is_wraparound; /* force a for-wraparound vacuum */ + int log_min_duration; /* minimum execution threshold in ms + * at which verbose logs are + * activated, -1 to use default */ +} VacuumParams; + +typedef struct +{ + /* Table being sampled */ + Relation onerel; + + /* Sampled rows and estimated total number of rows in the table. */ + HeapTuple *sample_rows; + int num_sample_rows; + double totalrows; + double totaldeadrows; + + /* + * Result tuple descriptor. Each returned row consists of three "fixed" + * columns, plus all the columns of the sampled table (excluding dropped + * columns). + */ + TupleDesc outDesc; +#define NUM_SAMPLE_FIXED_COLS 3 + + /* SRF state, to track which rows have already been returned. */ + int index; + bool summary_sent; +} gp_acquire_sample_rows_context; + /* GUC parameters */ extern PGDLLIMPORT int default_statistics_target; /* PGDLLIMPORT for * PostGIS */ @@ -219,8 +260,8 @@ extern int vacuum_appendonly_indexes(Relation aoRelation, VacuumStmt *vacstmt, B extern void vacuum_aocs_rel(Relation aorel, void *vacrelstats, bool isVacFull); /* in commands/analyze.c */ -extern void analyze_rel(Oid relid, VacuumStmt *vacstmt, - bool in_outer_xact, BufferAccessStrategy bstrategy); +extern void analyze_rel(Oid relid,VacuumStmt *vacstmt, bool in_outer_xact, + BufferAccessStrategy bstrategy, gp_acquire_sample_rows_context *ctx); extern void analyzeStatement(VacuumStmt *vacstmt, List *relids, BufferAccessStrategy start, bool isTopLevel); extern bool std_typanalyze(VacAttrStats *stats); diff --git a/src/test/regress/expected/bfv_statistic.out b/src/test/regress/expected/bfv_statistic.out index 0d8579a0388..71132b190cf 100644 --- a/src/test/regress/expected/bfv_statistic.out +++ b/src/test/regress/expected/bfv_statistic.out @@ -478,3 +478,20 @@ explain select * from tiny_freq where a=12; (4 rows) RESET optimizer_trace_fallback; +-- Test if the table pg_statistic has data in segments +DROP TABLE IF EXISTS test_statistic_1; +CREATE TABLE test_statistic_1(a int, b int); +INSERT INTO test_statistic_1 SELECT i, i FROM generate_series(1, 1000)i; +select count(*) from pg_class c, pg_statistic s where c.oid = s.starelid and relname = 'test_statistic_1'; + count +------- + 2 +(1 row) + +select count(*) from pg_class c, gp_dist_random('pg_statistic') s where c.oid = s.starelid and relname = 'test_statistic_1'; + count +------- + 6 +(1 row) + +DROP TABLE test_statistic_1; diff --git a/src/test/regress/expected/bfv_statistic_optimizer.out b/src/test/regress/expected/bfv_statistic_optimizer.out index 4d8ebfb5770..4365573ae77 100644 --- a/src/test/regress/expected/bfv_statistic_optimizer.out +++ b/src/test/regress/expected/bfv_statistic_optimizer.out @@ -506,3 +506,20 @@ explain select * from tiny_freq where a=12; (4 rows) RESET optimizer_trace_fallback; +-- Test if the table pg_statistic has data in segments +DROP TABLE IF EXISTS test_statistic_1; +CREATE TABLE test_statistic_1(a int, b int); +INSERT INTO test_statistic_1 SELECT i, i FROM generate_series(1, 1000)i; +select count(*) from pg_class c, pg_statistic s where c.oid = s.starelid and relname = 'test_statistic_1'; + count +------- + 2 +(1 row) + +select count(*) from pg_class c, gp_dist_random('pg_statistic') s where c.oid = s.starelid and relname = 'test_statistic_1'; + count +------- + 6 +(1 row) + +DROP TABLE test_statistic_1; diff --git a/src/test/regress/expected/vacuum_gp.out b/src/test/regress/expected/vacuum_gp.out index 576642885eb..95e80db5886 100644 --- a/src/test/regress/expected/vacuum_gp.out +++ b/src/test/regress/expected/vacuum_gp.out @@ -15,8 +15,6 @@ insert into ao_age_test select i, (i%123 > 50), (i/11) || '', '2008/10/12'::date + (i || ' days')::interval from generate_series(0, 99) i; create index ao_age_test_i on ao_age_test(i); -NOTICE: building index for child partition "ao_age_test_1_prt_b1" -NOTICE: building index for child partition "ao_age_test_1_prt_b2" -- MPP-23647 Create a empty table with no segments, let its age -- increase during the test. We will vacuum it at the end of the -- test. @@ -291,6 +289,8 @@ DROP ROLE r_priv_test; -- VACUUM and ANALYZE. set gp_autostats_mode='none'; CREATE TABLE vacuum_gp_pt (a int, b int) DISTRIBUTED BY (a) PARTITION BY range (b) (END(5), START(5)); +NOTICE: CREATE TABLE will create partition "vacuum_gp_pt_1_prt_1" for table "vacuum_gp_pt" +NOTICE: CREATE TABLE will create partition "vacuum_gp_pt_1_prt_2" for table "vacuum_gp_pt" INSERT INTO vacuum_gp_pt SELECT 0, 6 FROM generate_series(1, 12); SELECT relname, reltuples, relpages FROM pg_catalog.pg_class WHERE relname like 'vacuum_gp_pt%'; relname | reltuples | relpages @@ -338,12 +338,19 @@ WARNING: skipping "__gp_log_master_ext" --- cannot vacuum non-tables, external -- Vacuum related access control tests (Issue: https://github.com/greenplum-db/gpdb/issues/9001) -- Given a non-super-user role CREATE ROLE non_super_user_vacuum; +NOTICE: resource queue required -- using default resource queue "pg_default" -- And a heap table with auxiliary relations under the pg_toast namespace. CREATE TABLE vac_acl_heap(i int, j text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -- And an AO table with auxiliary relations under the pg_aoseg namespace. CREATE TABLE vac_acl_ao(i int, j text) with (appendonly=true); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -- And an AOCS table with auxiliary relations under the pg_aocsseg namespace. CREATE TABLE vac_acl_aocs(i int, j text) with (appendonly=true, orientation=column); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -- And all the tables belong to the non-super-user role. ALTER TABLE vac_acl_heap OWNER TO non_super_user_vacuum; ALTER TABLE vac_acl_ao OWNER TO non_super_user_vacuum; @@ -380,9 +387,9 @@ ORDER BY gp_segment_id; gp_segment_id | relname | reltuples | relpages | age ---------------+------------------------+-----------+----------+----- -1 | vacuum_test_heap_table | 10 | 3 | 3 - 0 | vacuum_test_heap_table | 0 | 0 | 4 - 1 | vacuum_test_heap_table | 0 | 0 | 4 - 2 | vacuum_test_heap_table | 0 | 0 | 4 + 0 | vacuum_test_heap_table | 5 | 1 | 4 + 1 | vacuum_test_heap_table | 1 | 1 | 4 + 2 | vacuum_test_heap_table | 4 | 1 | 4 (4 rows) VACUUM FREEZE vacuum_test_heap_table; diff --git a/src/test/regress/sql/bfv_statistic.sql b/src/test/regress/sql/bfv_statistic.sql index 4353b92789c..4c94ecf0d10 100644 --- a/src/test/regress/sql/bfv_statistic.sql +++ b/src/test/regress/sql/bfv_statistic.sql @@ -384,3 +384,14 @@ reset allow_system_table_mods; explain select * from tiny_freq where a=12; RESET optimizer_trace_fallback; + +-- Test if the table pg_statistic has data in segments + +DROP TABLE IF EXISTS test_statistic_1; +CREATE TABLE test_statistic_1(a int, b int); +INSERT INTO test_statistic_1 SELECT i, i FROM generate_series(1, 1000)i; + +select count(*) from pg_class c, pg_statistic s where c.oid = s.starelid and relname = 'test_statistic_1'; +select count(*) from pg_class c, gp_dist_random('pg_statistic') s where c.oid = s.starelid and relname = 'test_statistic_1'; + +DROP TABLE test_statistic_1;