PostgreSQL查詢優化之子查詢優化


子查詢優化

上拉子連接

上拉子連接主要是把ANY和EXIST子句轉換為半連接

void
pull_up_sublinks(PlannerInfo *root)
{
    Node	   *jtnode; //子連接上拉生成的結果
    Relids		relids;
    
    /* Begin recursion through the jointree ,jointree代表From和join子句*/
    jtnode = pull_up_sublinks_jointree_recurse(root,
                                           (Node *) root->parse->jointree,
                                           &relids);

    //把生成的結果jtnode,放入jointree表達式中
    if (IsA(jtnode, FromExpr))
    	root->parse->jointree = (FromExpr *) jtnode;
    else
    	root->parse->jointree = makeFromExpr(list_make1(jtnode), NULL);
}
static Node *
pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
								  Relids *relids)
{
    if (jtnode == NULL)
    else if (IsA(jtnode, RangeTblRef))//如果是范圍表,直接合並到表示關系的relid中
    //處理FromExpr
    else if (IsA(jtnode, FromExpr))
    {
    	/* 遞歸上拉From中所有子連接 */
    	foreach(l, f->fromlist)
    	{
            newchild = pull_up_sublinks_jointree_recurse(root,
                            lfirst(l),
                            &childrelids);
            //合並上拉的子連接到本層
            newfromlist = lappend(newfromlist, newchild);
            frelids = bms_join(frelids, childrelids);
    	}
    	//遞歸上拉子連接中的條件
        newf->quals = pull_up_sublinks_qual_recurse(root, f->quals,
    					&jtlink, frelids,
    					NULL, NULL);
    }
    else if (IsA(jtnode, JoinExpr))
    {
    	/* 上拉左右連接中的子連接 */
    	j->larg = pull_up_sublinks_jointree_recurse(root, j->larg,
    						&leftrelids);
    	j->rarg = pull_up_sublinks_jointree_recurse(root, j->rarg,
    						&rightrelids);
    	//遞歸上拉子連接中的條件
    	switch (j->jointype)
    	{
    		case JOIN_INNER:
    			j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
                					 &jtlink,
                					 bms_union(leftrelids,
                							rightrelids),
                					 NULL, NULL);
    			break;
    		case JOIN_LEFT:
    			j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
							 &j->rarg,
							 rightrelids,
							 NULL, NULL);
    			break;
    		case JOIN_FULL:
    			/* can't do anything with full-join quals */
    			break;
    		case JOIN_RIGHT:
    			j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
							 &j->larg,
							 leftrelids,
							 NULL, NULL);
    			break;
    		default:
    			elog(ERROR, "unrecognized join type: %d",
    				 (int) j->jointype);
    			break;
    	}
    
    }
    else
    	elog(ERROR, "unrecognized node type: %d",
    		 (int) nodeTag(jtnode));
    return jtnode;
}

用於遞歸上拉限制條件中存在的子連接(ANY,EXISTS),經過convert_ANY_sublink_to_join轉按ANY子連接,經過convert_EXISTS_sublink_to_join消除EXISTS遞歸消除

static Node *
//node為子連接節點,jtlink1和available_rels1都是上拉后的返回結果
pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
    		  Node **jtlink1, Relids available_rels1,
    		  Node **jtlink2, Relids available_rels2)
{
    if (node == NULL)
    	return NULL;
    if (IsA(node, SubLink))
    {
    	/* 上拉ANY_SUBLINK類型子連接*/
    	if (sublink->subLinkType == ANY_SUBLINK)
    	{
        	if ((j = convert_ANY_sublink_to_join(root, sublink,
        				available_rels1)) != NULL)//上拉ANY子連接成功
        	{
                //處理剛上拉的右關系
                j->rarg = pull_up_sublinks_jointree_recurse(root,
                        			j->rarg,
                        			&child_rels);
        
                //處理剛剛上拉的子連接
            	j->quals = pull_up_sublinks_qual_recurse(root,
    							 j->quals,
    							 &j->larg,
    							 available_rels1,
    							 &j->rarg,
    							 child_rels);
        		/* Return NULL representing constant TRUE */
        		return NULL;
        	}
        	//處理available_rels2對應項
        	if (available_rels2 != NULL &&
        		(j = convert_ANY_sublink_to_join(root, sublink,
						 available_rels2)) != NULL)

    	}
    	//上拉EXIST_SUBLINKS對應項
    	else if (sublink->subLinkType == EXISTS_SUBLINK)
    	{
    		if ((j = convert_EXISTS_sublink_to_join(root, sublink, false,
						available_rels1)) != NULL)
    		{
    		    //基本邏輯和ANY塊相同
    		}
    		if (available_rels2 != NULL &&
    			(j = convert_EXISTS_sublink_to_join(root, sublink, false,
							available_rels2)) != NULL)
    		{
                //基本邏輯和ANY塊相同
    		}
    	}
    	/* Else return it unmodified */
    	return node;
    }
    //處理NOT_EXISTS
    if (not_clause(node))
    {
    }
    if (and_clause(node)) //處理AND每一個子項
    {
    	/* Recurse into AND clause */
    	List	   *newclauses = NIL;
    	ListCell   *l;
    
    	foreach(l, ((BoolExpr *) node)->args)
    	{
    		Node	   *oldclause = (Node *) lfirst(l);
    		Node	   *newclause;
    
    		newclause = pull_up_sublinks_qual_recurse(root,
							  oldclause,
							  jtlink1,
							  available_rels1,
							  jtlink2,
							  available_rels2);
    		if (newclause)
    			newclauses = lappend(newclauses, newclause);
    	}
    	/* We might have got back fewer clauses than we started with */
    	if (newclauses == NIL)
    		return NULL;
    	else if (list_length(newclauses) == 1)
    		return (Node *) linitial(newclauses);
    	else
    		return (Node *) make_andclause(newclauses);
    }
    /* Stop if not an AND */
    return node;
}

pull_up_sublinks_qual_recurse和pull_up_sublinks_qual_recurse交替調用,窮盡所有ANY/EXISTS類型子連接

ANY(類型包括ANY,NOT ANY,SOME, NOT SOME, IN, NOT IN)

JoinExpr *
convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
							Relids available_rels)
{
    JoinExpr   *result;//子鏈接轉換為連接關系
    
    Assert(sublink->subLinkType == ANY_SUBLINK);
    
    //子連接右操作數:不能出現包含上層任何Var對象
    if (contain_vars_of_level((Node *) subselect, 1))
    	return NULL;
    
    /*子連接左操作數
     *a. 一定與上層出現的Var結構體表示的對象有相同,如果沒有,可以直接求解,不用和上層關聯
     *b. 不能引用上層出現的關系
     *c. 不能出現易失函數
     */
    upper_varnos = pull_varnos(sublink->testexpr);//情況a
    if (bms_is_empty(upper_varnos))
    	return NULL;
    
    /*
     * However, it can't refer to anything outside available_rels.
     */
    if (!bms_is_subset(upper_varnos, available_rels))//情況b
    	return NULL;
    
    /*
     * The combining operators and left-hand expressions mustn't be volatile.
     */
    if (contain_volatile_functions(sublink->testexpr))情況c
    	return NULL;
    
    /* Create a dummy ParseState for addRangeTableEntryForSubquery */
    pstate = make_parsestate(NULL);
    
    /*上拉子鏈接到上層范圍表中,作為未來連接的對象 */
    rte = addRangeTableEntryForSubquery(pstate,
    			subselect,
    			makeAlias("ANY_subquery", NIL),
    			false,
    			false);
    parse->rtable = lappend(parse->rtable, rte);
    rtindex = list_length(parse->rtable);
    
    /*
     * Form a RangeTblRef for the pulled-up sub-select.
     */
    rtr = makeNode(RangeTblRef);
    rtr->rtindex = rtindex;
    
    /*
     * Build a list of Vars representing the subselect outputs.
     */
    subquery_vars = generate_subquery_vars(root,
    		   subselect->targetList,
    		   rtindex);
    
    /* 連接條件*/
    quals = convert_testexpr(root, sublink->testexpr, subquery_vars);
    
    /*
     * And finally, build the JoinExpr node.
     */
    result = makeNode(JoinExpr);
    result->jointype = JOIN_SEMI;//半連接
    
    return result;
}

有幾種情況會上拉失敗:

  1. 右操作數不能包含上層操作對象(包含就是關聯子查詢了)
  2. 左邊操作符
    1. 與上層對象無關
    2. 不能引用上層的關系?
    3. 含有易失函數

將EXISTS,NOT EXISTS上拉

JoinExpr *
convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
	   bool under_not, Relids available_rels)
{
    JoinExpr   *result;
    Query	   *parse = root->parse;
    
    Assert(sublink->subLinkType == EXISTS_SUBLINK);
    
    /*對於右操作數
     * a. 不支持帶有WITH子句的格式
     * b. 不支持集合操作或者帶有CTE,聚合函數, HAVING,LIMIT等格式
     * c. 不支持FROM或者WHERE子句為空
     */
    if (subselect->cteList)//情況a
    	return NULL;
    

    if (!simplify_EXISTS_query(root, subselect))//情況b
    	return NULL;

    if (subselect->jointree->fromlist == NIL)//情況c
    	return NULL;
    
    whereClause = subselect->jointree->quals;//子查詢條件保存
    subselect->jointree->quals = NULL;
    
    //右操作樹的子查詢不能包含上層中出現的任何Var對象
    if (contain_vars_of_level((Node *) subselect, 1))
    	return NULL;
    //右操作數的where需要有山層的Var,這樣才能夠成功連接
    if (!contain_vars_of_level(whereClause, 1))
    	return NULL;
    //易失函數不能優化
    if (contain_volatile_functions(whereClause))
    	return NULL;
    
    //上拉成功,到頂層范圍表
    rtoffset = list_length(parse->rtable);
    OffsetVarNodes((Node *) subselect, rtoffset, 0);
    OffsetVarNodes(whereClause, rtoffset, 0);
    
    /* 從技術上看來,上拉子查詢就是把子查詢中范圍表拉到From子句,並把Where條件合並 */
    parse->rtable = list_concat(parse->rtable, subselect->rtable);
    
    /*
     * And finally, build the JoinExpr node.
     */
    result = makeNode(JoinExpr);
    result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
    return result;
}

不能上拉的條件:

  1. 右操作數:
    1. 不支持帶有WITH子句的格式
    2. 不支持集合操作或者帶有CTE(定義臨時表的存在只是一個查詢/withi子句),聚合函數, HAVING,LIMIT等格式
    3. 不支持FROM或者WHERE子句為空
    4. 不支持關聯子查詢,但是Where必須有上層Var
    5. 易失函數不能優化

上拉子查詢

搜索From子句中的范圍表,上拉其中子查詢

void
pull_up_subqueries(PlannerInfo *root)
{
    /* Recursion starts with no containing join nor appendrel */
    root->parse->jointree = (FromExpr *)
    	pull_up_subqueries_recurse(root, (Node *) root->parse->jointree,
    						NULL, NULL, NULL, false);
}

真正執行上拉

static Node *
pull_up_subqueries_recurse(PlannerInfo *root, Node *jtnode,
						   JoinExpr *lowest_outer_join,
						   JoinExpr *lowest_nulling_outer_join,
						   AppendRelInfo *containing_appendrel,
						   bool deletion_ok)
{
    Assert(jtnode != NULL);
    if (IsA(jtnode, RangeTblRef))//一個范圍表
    {
    	if (rte->rtekind == RTE_SUBQUERY &&
    		is_simple_subquery(rte->subquery, rte,
			   lowest_outer_join, deletion_ok) &&//如果是簡單子查詢,上拉
    		(containing_appendrel == NULL ||
    		 is_safe_append_member(rte->subquery)))
    		return pull_up_simple_subquery(root, jtnode, rte,
				   lowest_outer_join,
				   lowest_nulling_outer_join,
				   containing_appendrel,
				   deletion_ok);
    

    	if (rte->rtekind == RTE_SUBQUERY &&
    		is_simple_union_all(rte->subquery))//如果是簡單UNION操作,上拉
    		return pull_up_simple_union_all(root, jtnode, rte);
    
    	//普通Value,上拉
    	if (rte->rtekind == RTE_VALUES &&
    		lowest_outer_join == NULL &&
    		containing_appendrel == NULL &&
    		is_simple_values(root, rte, deletion_ok))
    		return pull_up_simple_values(root, jtnode, rte);
    
    	/* Otherwise, do nothing at this node. */
    }
    else if (IsA(jtnode, FromExpr))
    {
    	FromExpr   *f = (FromExpr *) jtnode;

    	foreach(l, f->fromlist)
    	{
    		lfirst(l) = pull_up_subqueries_recurse(root, lfirst(l),
						   lowest_outer_join,
						   lowest_nulling_outer_join,
						   NULL,
						   sub_deletion_ok);
    	}
    }
    else if (IsA(jtnode, JoinExpr))
    {
    	JoinExpr   *j = (JoinExpr *) jtnode;
    
    	/* Recurse, being careful to tell myself when inside outer join */
    	switch (j->jointype)
    	{
    	    //對各種JOIN子句進行上拉
    	}
    }
    else
    	elog(ERROR, "unrecognized node type: %d",
    		 (int) nodeTag(jtnode));
    return jtnode;
}

可以上拉的:

  1. 簡單子查詢,Union ALL,普通Value
  2. 可以遞歸多個From子句,也可以遞歸上拉join左右操作數

優化頂層Union all

void
flatten_simple_union_all(PlannerInfo *root)
{
	Query	   *parse = root->parse;

	/* 存在Union嵌套,就是WITH子句 */
	if (root->hasRecursion)
		return;

	/* UNION ALL的列不同*/
	if (!is_simple_union_all_recurse((Node *) topop, parse, topop->colTypes))
		return;
    //構造AppendRefInfo節點優化UNION ALL
	pull_up_union_leaf_queries((Node *) topop, root, leftmostRTI, parse, 0);
}

UNION和UNION ALL和OR

  1. UNION和UNION ALL都是使用AppendRefInfo替代,但是UNION會排序和去重
  2. OR里面會有OrFilter


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM