Skip to content
This repository was archived by the owner on May 24, 2024. It is now read-only.

Commit fb5eb8d

Browse files
authored
Add analyze functionality for mpp file fdw. (#16716)
For file_fdw, the analyze method `fileAnalyzeForeignTable()` is just forked from upstream. There are no adaptation modifications for this analyze callback. So when the file_fdw foreign table in greenplum point to multiple files with mpp_execute option is set to `all segments`, it can not do all the sampling work from QD process, because the remote files may be stored on each segment which may be different hosts. So this PR adds some adaptation work for the analyze callback function to deal with this situation. Also this PR tries to fix this issue: greenplum-db/gpdb#16700
1 parent 3ee6c9d commit fb5eb8d

File tree

8 files changed

+50
-4
lines changed

8 files changed

+50
-4
lines changed

contrib/file_fdw/file_fdw.c

+39
Original file line numberDiff line numberDiff line change
@@ -791,6 +791,26 @@ fileEndForeignScan(ForeignScanState *node)
791791
EndCopyFrom(festate->cstate);
792792
}
793793

794+
/*
795+
* Modify the filename when it contains <SEGID> or <SEG_DATA_DIR> if any.
796+
*
797+
* Replaces the "<SEGID>" token in the filename with this segment's ID.
798+
* Replaces the "<SEG_DATA_DIR>" token in the filename with DataDir.
799+
*/
800+
static char *
801+
fileFdwMangleFileName(const char *filename)
802+
{
803+
StringInfoData filepath;
804+
char segid_buf[8];
805+
snprintf(segid_buf, 8, "%d", GpIdentity.segindex);
806+
807+
initStringInfo(&filepath);
808+
appendStringInfoString(&filepath, filename);
809+
810+
replaceStringInfoString(&filepath, "<SEG_DATA_DIR>", DataDir);
811+
replaceStringInfoString(&filepath, "<SEGID>", segid_buf);
812+
return filepath.data;
813+
}
794814
/*
795815
* fileAnalyzeForeignTable
796816
* Test whether analyzing this foreign table is supported
@@ -804,6 +824,7 @@ fileAnalyzeForeignTable(Relation relation,
804824
bool is_program;
805825
List *options;
806826
struct stat stat_buf;
827+
ForeignTable *table = NULL;
807828

808829
/* Fetch options of foreign table */
809830
fileGetOptions(RelationGetRelid(relation), &filename, &is_program, &options);
@@ -818,6 +839,24 @@ fileAnalyzeForeignTable(Relation relation,
818839
if (is_program)
819840
return false;
820841

842+
table = GetForeignTable(RelationGetRelid(relation));
843+
if (Gp_role == GP_ROLE_DISPATCH && table->exec_location == FTEXECLOCATION_ALL_SEGMENTS)
844+
{
845+
/*
846+
* It is not easy to fetch all the reomte files from all segments, so
847+
* we set it to the same default value in estimate_size()
848+
*/
849+
*totalpages = 10;
850+
/* This function could dispatch gp_acquire_sample_rows to all segments */
851+
*func = gp_acquire_sample_rows_func;
852+
return true;
853+
}
854+
855+
/* Copy codes from MangleCopyFileName function */
856+
if (table->exec_location == FTEXECLOCATION_ALL_SEGMENTS)
857+
{
858+
filename = fileFdwMangleFileName(filename);
859+
}
821860
/*
822861
* Get size of the file. (XXX if we fail here, would it be better to just
823862
* return false to skip analyzing the table?)

contrib/file_fdw/input/gp_file_fdw.source

+2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ CREATE FOREIGN TABLE text_csv_all (
3434
OPTIONS (format 'csv', filename '@abs_srcdir@/data/text<SEGID>.csv', mpp_execute 'all segments');
3535
EXPLAIN SELECT * FROM text_csv_all ORDER BY word1;
3636
SELECT * FROM text_csv_all ORDER BY word1;
37+
-- Analyze 'all segments' option table when filename is like "text<SEGID>.csv"
38+
Analyze text_csv_all;
3739
CREATE FOREIGN TABLE text_csv_any_from_server (
3840
word1 text, word2 text
3941
) SERVER file_server

contrib/file_fdw/output/gp_file_fdw.source

+2
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ SELECT * FROM text_csv_all ORDER BY word1;
4949
FOO | bar
5050
(3 rows)
5151

52+
-- Analyze 'all segments' option table when filename is like "text<SEGID>.csv"
53+
Analyze text_csv_all;
5254
CREATE FOREIGN TABLE text_csv_any_from_server (
5355
word1 text, word2 text
5456
) SERVER file_server

contrib/file_fdw/output/gp_file_fdw_optimizer.source

+2
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ SELECT * FROM text_csv_all ORDER BY word1;
4949
FOO | bar
5050
(3 rows)
5151

52+
-- Analyze 'all segments' option table when filename is like "text<SEGID>.csv"
53+
Analyze text_csv_all;
5254
CREATE FOREIGN TABLE text_csv_any_from_server (
5355
word1 text, word2 text
5456
) SERVER file_server

contrib/postgres_fdw/expected/mpp_gp2pg_postgres_fdw.out

+1
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ SELECT * FROM mpp_ft1 ORDER BY c1;
112112
10 | 0
113113
(10 rows)
114114

115+
ANALYZE mpp_ft1;
115116
ALTER FOREIGN TABLE mpp_ft1 OPTIONS (add use_remote_estimate 'true');
116117
EXPLAIN VERBOSE SELECT * FROM mpp_ft1 ORDER BY c1;
117118
QUERY PLAN

contrib/postgres_fdw/sql/mpp_gp2pg_postgres_fdw.sql

+1
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ ALTER SERVER pgserver OPTIONS (set num_segments '2');
7070
-- ===================================================================
7171
EXPLAIN VERBOSE SELECT * FROM mpp_ft1 ORDER BY c1;
7272
SELECT * FROM mpp_ft1 ORDER BY c1;
73+
ANALYZE mpp_ft1;
7374

7475
ALTER FOREIGN TABLE mpp_ft1 OPTIONS (add use_remote_estimate 'true');
7576
EXPLAIN VERBOSE SELECT * FROM mpp_ft1 ORDER BY c1;

src/backend/commands/analyze.c

-3
Original file line numberDiff line numberDiff line change
@@ -185,9 +185,6 @@ static int acquire_sample_rows(Relation onerel, int elevel,
185185
static int acquire_sample_rows_dispatcher(Relation onerel, bool inh, int elevel,
186186
HeapTuple *rows, int targrows,
187187
double *totalrows, double *totaldeadrows);
188-
static int gp_acquire_sample_rows_func(Relation onerel, int elevel,
189-
HeapTuple *rows, int targrows,
190-
double *totalrows, double *totaldeadrows);
191188
static BlockNumber acquire_index_number_of_blocks(Relation indexrel, Relation tablerel);
192189

193190
static void gp_acquire_correlations_dispatcher(Oid relOid, bool inh, float4 *correlations, bool *correlationsIsNull);

src/include/commands/vacuum.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,9 @@ extern bool vacuumStatement_IsTemporary(Relation onerel);
367367
extern void analyze_rel(Oid relid, RangeVar *relation,
368368
VacuumParams *params, List *va_cols, bool in_outer_xact,
369369
BufferAccessStrategy bstrategy, gp_acquire_sample_rows_context *ctx);
370-
370+
extern int gp_acquire_sample_rows_func(Relation onerel, int elevel,
371+
HeapTuple *rows, int targrows,
372+
double *totalrows, double *totaldeadrows);
371373
/* in commands/vacuumlazy.c */
372374
extern void lazy_vacuum_rel_heap(Relation onerel,
373375
VacuumParams *params, BufferAccessStrategy bstrategy);

0 commit comments

Comments
 (0)