0%

dolphinscheduler之shell数据校验

dolphinscheduler可以通过shell节点校验数据是否符合要求

变量定义

1
2
PT_DATE=${system.biz.date}
PT_PATH=/user/hive/warehouse/default.db/test/pt_d=${PT_DATE}

校验hdfs分区是否存在

1
2
3
4
5
6
7
8
9
10
set -e

kinit -kt /etc/krb5/geosmart.keytab geosmart
hdfs dfs -test -e ${PT_PATH}
if [ $? -eq 0 ] ;then
echo 'partition ${PT_PATH} exist'
exit 0
fi
echo 'partition ${PT_PATH} not exist'
exit 1

校验hive表中数据行数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
set -e

TABLE='default.test'
MIN_PT_COUNT=5000

pt_count=`spark-sql \
--keytab /etc/krb5/geosamrt.keytab \
--principal geosamrt -S \
-e "select count(1) from $TABLE where pt_d='${PT_DATE}'"
`

pt_count=`echo "$pt_count" | tail -1`

if [ "$pt_count" -gt "$MIN_PT_COUNT" ] ;then
echo "$TABLE of partition ${PT_DATE},count=[$pt_count],greater than min[$MIN_PT_COUNT]"
exit 0
fi
echo "$TABLE of partition ${PT_DATE},count=[$pt_count],lower than min[$MIN_PT_COUNT]"
exit 1