GetForeignRelSize() 的源码在 postgres_fdw.c 文件中。

进入 GetForeignRelSize() 一开头,将 FDW 相关的 planner 信息存放在 PgFdwRelationInfo *fpinfo 中。

PgFdwRelationInfo *fpinfo;

fpinfo = (PgFdwRelationInfo *) palloc0(sizeof(PgFdwRelationInfo));
baserel->fdw_private = (void *) fpinfo;
/*
 * FDW-specific planner information kept in RelOptInfo.fdw_private for a
 * foreign table.  This information is collected by postgresGetForeignRelSize.
 */
typedef struct PgFdwRelationInfo
{
	/* baserestrictinfo clauses, broken down into safe and unsafe subsets. */
	List	   *remote_conds;
	List	   *local_conds;

	/* Bitmap of attr numbers we need to fetch from the remote server. */
	Bitmapset  *attrs_used;

	/* Cost and selectivity of local_conds. */
	QualCost	local_conds_cost;
	Selectivity local_conds_sel;

	/* Estimated size and cost for a scan with baserestrictinfo quals. */
	double		rows;
	int		width;
	Cost		startup_cost;
	Cost		total_cost;

	/* Options extracted from catalogs. */
	bool		use_remote_estimate;
	Cost		fdw_startup_cost;
	Cost		fdw_tuple_cost;

	/* Cached catalog information. */
	ForeignTable *table;
	ForeignServer *server;
	UserMapping *user;			/* only set in use_remote_estimate mode */
} PgFdwRelationInfo;

/* Look up foreign-table catalog info. */
fpinfo->table = GetForeignTable(foreigntableid);
fpinfo->server = GetForeignServer(fpinfo->table->serverid);
fpinfo->use_remote_estimate = false;
fpinfo->fdw_startup_cost = DEFAULT_FDW_STARTUP_COST;
fpinfo->fdw_tuple_cost = DEFAULT_FDW_TUPLE_COST;

若保持默认 use_remote_estimate = false,在下面的 classifyConditions() 中,对输入的 SQL 语句中的 where 限制条件进行逐项归类,将之分为 remote_conds 和 local_conds,前者可以发送到 remote 端处理,后者不能。

void
classifyConditions(PlannerInfo *root,
				   RelOptInfo *baserel,
				   List *input_conds,
				   List **remote_conds,
				   List **local_conds)
{
	ListCell   *lc;

	*remote_conds = NIL;
	*local_conds = NIL;

	foreach(lc, input_conds)
	{
		RestrictInfo *ri = (RestrictInfo *) lfirst(lc);

		if (is_foreign_expr(root, baserel, ri->clause))
			*remote_conds = lappend(*remote_conds, ri);
		else
			*local_conds = lappend(*local_conds, ri);
	}
}

\1. 在 foreign_expr_walker() 中对限制条件进行判断,只有 built-in 类型的才能发送到 remote 端,因为非 built-in 的类型在 remote 端不能保证支持。但如果需要,应该可以修改源码加入对自定义类型的支持。

\2. 在 contain_mutable_functions() 中判断是否是 mutable function,只有非 mutable function 才能发送到 remote 端。(如注释所述,For example, sending now() to remote side could cause confusion from clock offsets.)

总结:只有内置类型或非 mutable 函数类型的限制条件才会发送到 remote 端进行处理。

bool
is_foreign_expr(PlannerInfo *root,
				RelOptInfo *baserel,
				Expr *expr)
{
	foreign_glob_cxt glob_cxt;
	foreign_loc_cxt loc_cxt;

	/*
	 * Check that the expression consists of nodes that are safe to execute
	 * remotely.
	 */
	glob_cxt.root = root;
	glob_cxt.foreignrel = baserel;
	loc_cxt.collation = InvalidOid;
	loc_cxt.state = FDW_COLLATE_NONE;
	if (!foreign_expr_walker((Node *) expr, &glob_cxt, &loc_cxt))
		return false;

	/* Expressions examined here should be boolean, ie noncollatable */
	Assert(loc_cxt.collation == InvalidOid);
	Assert(loc_cxt.state == FDW_COLLATE_NONE);

	/*
	 * An expression which includes any mutable functions can't be sent over
	 * because its result is not stable.  For example, sending now() remote
	 * side could cause confusion from clock offsets.  Future versions might
	 * be able to make this choice with more granularity.  (We check this last
	 * because it requires a lot of expensive catalog lookups.)
	 */
	if (contain_mutable_functions((Node *) expr))
		return false;

	/* OK to evaluate on the remote server */
	return true;
}

然后对需要从 remote 端取回的表属性进行标记,需要取回的表属性包括 targetList ( 即select attr from table; 中的 attr) 中的属性和限制条件中本地处理的属性:

attrs_used 在之后构造发送到 remote 端的 SQL 语句时,起主要作用。

	/*
	 * Identify which attributes will need to be retrieved from the remote
	 * server.  These include all attrs needed for joins or final output, plus
	 * all attrs used in the local_conds.  (Note: if we end up using a
	 * parameterized scan, it's possible that some of the join clauses will be
	 * sent to the remote and thus we wouldn't really need to retrieve the
	 * columns used in them.  Doesn't seem worth detecting that case though.)
	 */
	fpinfo->attrs_used = NULL;
	pull_varattnos((Node *) baserel->reltargetlist, baserel->relid,
				   &fpinfo->attrs_used);
	foreach(lc, fpinfo->local_conds)
	{
		RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);

		pull_varattnos((Node *) rinfo->clause, baserel->relid,
					   &fpinfo->attrs_used);
	}
	/*
	 * Compute the selectivity and cost of the local_conds, so we don't have
	 * to do it over again for each path.  The best we can do for these
	 * conditions is to estimate selectivity on the basis of local statistics.
	 */
	fpinfo->local_conds_sel = clauselist_selectivity(root,fpinfo->local_conds,baserel->relid,JOIN_INNER,NULL);

	cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root);

然后,基于本地数据预估表大小,对于外部表,则假定表占用的空间为 10 页,来计算:

		/*
		 * If the foreign table has never been ANALYZEd, it will have relpages
		 * and reltuples equal to zero, which most likely has nothing to do
		 * with reality.  We can't do a whole lot about that if we're not
		 * allowed to consult the remote server, but we can use a hack similar
		 * to plancat.c's treatment of empty relations: use a minimum size
		 * estimate of 10 pages, and divide by the column-datatype-based width
		 * estimate to get the corresponding number of tuples.
		 */
		if (baserel->pages == 0 && baserel->tuples == 0)
		{
			baserel->pages = 10;
			baserel->tuples =
				(10 * BLCKSZ) / (baserel->width + MAXALIGN(SizeofHeapTupleHeader));
		}

		/* Estimate baserel size as best we can with local statistics. */
		set_baserel_size_estimates(root, baserel);

若 use_remote_estimate = true,那么会构造一个 EXPLAIN 的 SQL 语句,构造方式如下,其中包含之前筛选出的适合发送到 remote 端处理的 remote_conds,发送到 remote 端处理估算,然后发回。

		/*
		 * Construct EXPLAIN query including the desired SELECT, FROM, and
		 * WHERE clauses.  Params and other-relation Vars are replaced by
		 * dummy values.
		 */
		initStringInfo(&sql);
		appendStringInfoString(&sql, "EXPLAIN ");
		deparseSelectSql(&sql, root, baserel, fpinfo->attrs_used,
						 &retrieved_attrs);
		if (fpinfo->remote_conds)
			appendWhereClause(&sql, root, baserel, fpinfo->remote_conds,
							  true, NULL);
		if (remote_join_conds)
			appendWhereClause(&sql, root, baserel, remote_join_conds,
							  (fpinfo->remote_conds == NIL), NULL);