博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
检查数据倾斜分布
阅读量:7287 次
发布时间:2019-06-30

本文共 4413 字,大约阅读时间需要 14 分钟。

从传统数据库迁移到GP中一个重要的且常常被开发者忽略的概念是数据分布,没有良好的设计表的分布键会导致严重的性能问题。下面函数将给开发者及DBA检測一个表的数据倾斜情况。
-- Function: gpmg.data_skew(character varying) -- DROP FUNCTION gpmg.data_skew(character varying); CREATE OR REPLACE FUNCTION gpmg.data_skew(tablename character varying)  RETURNS text AS$BODY$--2014-05-26,Gtlions,收集和统计数据倾斜情况declare  v_func character varying(200)='gpmg.data_skew()';  v_begin_time timestamp;  v_end_time timestamp;  v_status int=0;  v_msg text='Done.';  v_record record;   v_id integer;  v_rq timestamp;    v_segs integer=64;  v_totalnums bigint=0;  v_maxskew numeric=0.0;  v_minskew numeric=0.0;  v_maxskew_seg varchar(20);  v_minskew_seg varchar(20);  v_maxrows bigint=0;  v_minrows bigint=0;     v_result varchar(2000); begin  v_id=nextval('gpmg.commonseq');  v_rq=now();  v_begin_time=clock_timestamp();  v_result = 'GP hava ';  select into v_segs count(*) segs from gp_segment_configuration where role='p' and content<>-1;  v_result = v_result||v_segs||' instances, Standard skew is '||1.0/v_segs||'. ';  -- bg1 segid, bg2 节点记录数量  execute 'insert into gpmg.commontab(seq,tabname,bg1,bg2) select '||v_id||','''||$1||''',gp_segment_id,count(*) segrownums from '||$1||' group by rollup(( gp_segment_id)) order by gp_segment_id';  select into v_segs,v_totalnums v_segs,max(bg2) from gpmg.commontab where seq=v_id and tabname=$1;  --nm1 标准倾斜率, nm2 节点倾斜率, nm3 标准-节点倾斜率绝对值  update gpmg.commontab set nm1=1::numeric/v_segs,nm2=bg2::numeric/v_totalnums,nm3=abs(1::numeric/v_segs-bg2::numeric/v_totalnums) where seq=v_id and tabname=$1;  select into v_maxskew,v_minskew max(nm2),min(nm2) from gpmg.commontab where seq=v_id and tabname=$1 and bg1 is not null;   select into v_maxskew_seg hostname from gp_segment_configuration where role='p' and content in (select bg1 from gpmg.commontab where seq=v_id and tabname=$1 and bg1 is not null and nm2=v_maxskew limit 1);  select into v_minskew_seg hostname from gp_segment_configuration where role='p' and content in (select bg1 from gpmg.commontab where seq=v_id and tabname=$1 and bg1 is not null and nm2=v_minskew limit 1);   select into v_maxrows bg2 from gpmg.commontab where seq=v_id and tabname=$1 and bg1 is not null and nm2=v_maxskew limit 1;  select into v_minrows bg2 from gpmg.commontab where seq=v_id and tabname=$1 and bg1 is not null and nm2=v_minskew limit 1;   v_result =v_result ||'You Table ['||$1||'] skew info: [table_totalrows:'||v_totalnums||', maxskew:seg-'||v_maxskew_seg||', rows-'||v_maxrows||' '||v_maxskew||', minskew:seg-'||v_minskew_seg||', rows-'||v_minrows||' '||v_minskew||']';  delete from gpmg.commontab where seq=v_id and tabname=$1;  return v_result;  v_end_time=clock_timestamp();end;$BODY$  LANGUAGE plpgsql VOLATILE;ALTER FUNCTION gpmg.data_skew(character varying)  OWNER TO gpadmin;GRANT EXECUTE ON FUNCTION gpmg.data_skew(character varying) TO public;GRANT EXECUTE ON FUNCTION gpmg.data_skew(character varying) TO gpadmin;bigdatagp=# select gpmg.data_skew('gpmg.manager_table');                                                                                                            data_skew                                                                                                             ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- GP hava 64 instances, Standard skew is 0.01562500000000000000. You Table [gpmg.manager_table] skew info: [table_totalrows:83, maxskew:seg-sdw16, rows-3 0.03614457831325301205, minskew:seg-sdw2, rows-1 0.01204819277108433735](1 row) bigdatagp=# select gpmg.data_skew('gpmg.func_log');                                                                                                             data_skew                                                                                                              ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ GP hava 64 instances, Standard skew is 0.01562500000000000000. You Table [gpmg.func_log] skew info: [table_totalrows:53708, maxskew:seg-sdw10, rows-907 0.01688761450808073285, minskew:seg-sdw7, rows-773 0.01439264169211290683](1 row)2014-10-14 09:53:00
-EOF-

转载地址:http://xapjm.baihongyu.com/

你可能感兴趣的文章
主动式部署陷阱
查看>>
webx2.0-RundataService学习总结
查看>>
SpringMVC的拦截器(Interceptor)和过滤器(Filter)的区别与联系
查看>>
云计算培训论云计算下的网络安全及措施
查看>>
users表空间在线损坏(不通过RMAN恢复)
查看>>
我在51cto第一篇博客
查看>>
TCP三次握手 和四次挥手
查看>>
基于本地配置文件的vsftpd
查看>>
MFC 对话框添加背景图片
查看>>
javascript中的void运算符语法及使用介绍
查看>>
《从零开始学Swift》学习笔记(Day 18)——有几个分支语句?
查看>>
类-Class
查看>>
T-SQL 优化
查看>>
System Center2012综述
查看>>
zabbix proxy搭建及应用proxy监控腾讯CVM服务器
查看>>
面向对象
查看>>
SQL语法2
查看>>
grub应用
查看>>
test
查看>>
linux配置Yum源
查看>>