數據傾斜、數據各節點分布情況、創建分布式表
查看表的分布節點
-- 查看表是否數據傾斜
select table_skewness('b_st_yz_aj_cgajfj');
-- 在哪個節點分布
-- 查看表是否數據傾斜 select table_skewness('b_st_yz_aj_cgajfj'); -- 在哪個節點分布 select xc_node_id, count(1) from tablename group by xc_node_id order by xc_node_id desc;
創建序列、創建分布式表
創建序列
CREATE SEQUENCE "ioc_theme"."m_ss_qyqy_move_warn_success_rid_seq" INCREMENT 1 MINVALUE 1 MAXVALUE 9223372036854775807 START 1 CACHE 1 CYCLE ; SELECT setval('"ioc_theme"."m_ss_qyqy_move_warn_success_rid_seq"', -1, false); ALTER SEQUENCE "ioc_theme"."m_ss_qyqy_move_warn_success_rid_seq" OWNER TO "zsj_qh";
刪除序列 修改序列
----刪除前先解除 id 對該序列的依賴 ALTER TABLE tablename ALTER COLUMN id SET DEFAULT null; DROP SEQUENCE IF EXISTS sequence_name; 修改自增序列開始值 ---- id_max 即 id 目前的最大值,可寫為1,可通過 “SELECT MAX(id) FROM tablename” 得到 CREATE SEQUENCE sequence_name START WITH id_max; 修改自增序列所屬的表和字段 ALTER TABLE ioc_theme.m_ss_yqjc_sbryxxb_51_af ALTER COLUMN rid SET DEFAULT nextval('ioc_theme.m_ss_yqjc_sbryxxb_51_af_rid_seq'::regclass);
查詢表的序列
select table_schema, table_name, column_name, data_type, column_default, is_nullable from information_schema.columns where table_name = 'm_gd_yqjc_sbryxxb_0501_0520';
創建分布式表
CREATE TABLE "ioc_theme"."m_hz_cyrk_rk_education" ( "rid" int4 NOT NULL DEFAULT nextval('"ioc_theme".m_hz_cyrk_rk_education_rid_seq'::regclass), "occur_period" int4, "occur_period_year" int4, "occur_period_month" int4, "area_code_abode" varchar(20) COLLATE "pg_catalog"."default", "area_code_work" varchar(20) COLLATE "pg_catalog"."default", "degree" varchar(40) COLLATE "pg_catalog"."default", "employ_num" int4, "employ_ratio" numeric(20,2), "create_time" timestamp(6) DEFAULT pg_systimestamp(), "update_time" timestamp(6) DEFAULT pg_systimestamp(), CONSTRAINT "m_hz_cyrk_rk_education_pkey" PRIMARY KEY ("rid") )distribute by hash(rid) ; ALTER TABLE "ioc_theme"."m_hz_cyrk_rk_education" OWNER TO "zsj_qh"; COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."rid" IS '行ID'; COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."occur_period" IS '報告期'; COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."occur_period_year" IS '報告期(年)'; COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."occur_period_month" IS '報告期(月)'; COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."area_code_abode" IS '居住地行政區划代碼(社區)'; COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."area_code_work" IS '工作地行政區划代碼'; COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."degree" IS '學歷'; COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."employ_num" IS '人數'; COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."employ_ratio" IS '占比'; COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."create_time" IS '數據創建時間'; COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."update_time" IS '更新時間'; COMMENT ON TABLE "ioc_theme"."m_hz_cyrk_rk_education" IS '人才專題接口';
創建分布列表
6.嘗試選擇staff_ID、FIRST_NAME和LAST_NAME的組合作為分布列,創建目標表staffs。 CREATE TABLE staffs ( staff_ID NUMBER(6) not null, FIRST_NAME VARCHAR2(20), LAST_NAME VARCHAR2(25), EMAIL VARCHAR2(25), PHONE_NUMBER VARCHAR2(20), HIRE_DATE DATE, employment_ID VARCHAR2(10), SALARY NUMBER(8,2), COMMISSION_PCT NUMBER(2,2), MANAGER_ID NUMBER(6), section_ID NUMBER(4) ) DISTRIBUTE BY hash(staff_ID,FIRST_NAME,LAST_NAME);
分布式建表的兩種選擇
DISTRIBUTE BY hash(column_name) 分布在各個節點,注意column_name作為主鍵列
DISTRIBUTE BY replication 每個節點一份完整數據