Greenplum Python工具库gpload学习——get_fast_match_exttable_query和get_reuse_exttable_query

2021/11/5 1:10:07

本文主要是介绍Greenplum Python工具库gpload学习——get_fast_match_exttable_query和get_reuse_exttable_query,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!

查询可重复利用的外部表,只指定location、data format和error limit相同

    # Fast path to find out whether we have an existing external table in the
    # catalog which could be reused for this operation. we only make sure the
    # location, data format and error limit are same. we don't check column
    # names and types
    #
    # This function will return the SQL to run in order to find out whether
    # such a table exists. The results of this SQl are table names without schema
    #
    def get_fast_match_exttable_query(self, formatType, formatOpts, limitStr, schemaName, log_errors, encodingCode):

        sqlFormat = """select relname from pg_class
                    join
                    pg_exttable pgext
                    on(pg_class.oid = pgext.reloid)
                    %s
                    where
                    relstorage = 'x' and
                    relname like 'ext_gpload_reusable_%%' and
		    %s
                    """

        joinStr = ""
        conditionStr = ""

        # if schemaName is None, find the resuable ext table which is visible to
        # current search path. Else find the resuable ext table under the specific
        # schema, and this needs to join pg_namespace.
        if schemaName is None:
            joinStr = ""
            conditionStr = "pg_table_is_visible(pg_class.oid)"
        else:
            joinStr = """join
                    pg_namespace pgns
                    on(pg_class.relnamespace = pgns.oid)"""
            conditionStr = "pgns.nspname = '%s'" % schemaName

        sql = sqlFormat % (joinStr, conditionStr)

        if self.gpdb_version < "6.0.0":
            if log_errors:
                sql += " and pgext.fmterrtbl = pgext.reloid "
            else:
                sql += " and pgext.fmterrtbl IS NULL "
        else:
            if log_errors:
                sql += " and pgext.logerrors "
            else:
                sql += " and NOT pgext.logerrors "

        for i, l in enumerate(self.locations):
            sql += " and pgext.urilocation[%s] = %s\n" % (i + 1, quote(l))

        sql+= """and pgext.fmttype = %s
                 and pgext.writable = false
                 and pgext.fmtopts like %s """ % (quote(formatType[0]),quote("%" + quote_unident(formatOpts.rstrip()) +"%"))

        if limitStr:
            sql += "and pgext.rejectlimit = %s " % limitStr
        else:
            sql += "and pgext.rejectlimit IS NULL "

        if encodingCode:
            sql += "and pgext.encoding = %s " % encodingCode

        sql+= "limit 1;"

        self.log(self.DEBUG, "query used to fast match external relations:\n %s" % sql)
        return sql

查询可重复利用的外部表,指定column names and types, the same data format, and location specification, and single row error handling specs相同

    def get_reuse_exttable_query(self, formatType, formatOpts, limitStr, from_cols, schemaName, log_errors, encodingCode):
        sqlFormat = """select attrelid::regclass
                 from (
                        select
                            attrelid,
                            row_number() over (partition by attrelid order by attnum) as attord,
                            attnum,
                            attname,
                            atttypid::regtype
                        from
                            pg_attribute
                            join
                            pg_class
                            on (pg_class.oid = attrelid)
                            %s
                        where
                            relstorage = 'x' and
                            relname like 'ext_gpload_reusable_%%' and
                            attnum > 0 and
                            not attisdropped and %s
                    ) pgattr
                    join
                    pg_exttable pgext
                    on(pgattr.attrelid = pgext.reloid)
                    """
        joinStr = ""
        conditionStr = ""

        # if schemaName is None, find the resuable ext table which is visible to
        # current search path. Else find the resuable ext table under the specific
        # schema, and this needs to join pg_namespace.
        if schemaName is None:
            joinStr = ""
            conditionStr = "pg_table_is_visible(pg_class.oid)"
        else:
            joinStr = """join
                         pg_namespace pgns
                         on(pg_class.relnamespace = pgns.oid)
                      """
            conditionStr = "pgns.nspname = '%s'" % schemaName

        sql = sqlFormat % (joinStr, conditionStr)

        if self.gpdb_version < "6.0.0":
            if log_errors:
                sql += " WHERE pgext.fmterrtbl = pgext.reloid "
            else:
                sql += " WHERE pgext.fmterrtbl IS NULL "
        else:
            if log_errors:
                sql += " WHERE pgext.logerrors "
            else:
                sql += " WHERE NOT pgext.logerrors "

        for i, l in enumerate(self.locations):
            sql += " and pgext.urilocation[%s] = %s\n" % (i + 1, quote(l))

        sql+= """and pgext.fmttype = %s
                 and pgext.writable = false
                 and pgext.fmtopts like %s """ % (quote(formatType[0]),quote("%" + quote_unident(formatOpts.rstrip()) +"%"))

        if limitStr:
            sql += "and pgext.rejectlimit = %s " % limitStr
        else:
            sql += "and pgext.rejectlimit IS NULL "

        if encodingCode:
            sql += "and pgext.encoding = %s " % encodingCode

        sql+= "group by attrelid "

        sql+= """having
                    count(*) = %s and
                    bool_and(case """ % len(from_cols)

        for i, c in enumerate(from_cols):
            name = c[0]
            typ = c[1]
            sql+= "when attord = %s then atttypid = %s::regtype and attname = %s\n" % (i+1, quote(typ), quote(quote_unident(name)))

        sql+= """else true
                 end)
                 limit 1;"""

        self.log(self.DEBUG, "query used to identify reusable external relations: %s" % sql)
        return sql


这篇关于Greenplum Python工具库gpload学习——get_fast_match_exttable_query和get_reuse_exttable_query的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!


扫一扫关注最新编程教程