%macro mardia ( data= , /* input data set */ var= , /* REQUIRED: variables for test */ /* May NOT be a list e.g. var1-var10 */ ); /* DATA= must be specified and data set must exist */ %if &data= or %sysfunc(exist(&data)) ne 1 %then %do; %put ERROR: DATA= data set not specified or not found.; %goto exit; %end; /* Verify that VAR= option is specified */ %if &var= %then %do; %put ERROR: Specify test variables in the VAR= argument; %goto exit; %end; /* Check existence and get number of VAR= variables */ %let _i=1; %let _p=0; %let dsid=%sysfunc(open(&data)); %if &dsid %then %do; %let _token=%scan(&var,&_i); %do %while ( &_token ne %str() ); %if %sysfunc(varnum(&dsid,&_token)) ne 0 %then %do; %let _p=%eval(&_p+1); %let _v&_p = &_token; %end; %else %do; %put ERROR: Variable &_token not found.; %goto exit; %end; %let _i=%eval(&_i+1); %let _token=%scan(&var,&_i); %end; %let rc=%sysfunc(close(&dsid)); %end; %else %do; %put ERROR: Could not open DATA= data set.; %goto exit; %end; %let nvar=&_p; /* Remove observations with missing values */ %put MULTNORM: Removing observations with missing values...; data _nomiss; set &data; if nmiss(of &var )=0; run; /* Quit if covariance matrix is singular */ %let singular=nonsingular; %put MULTNORM: Checking for singularity of covariance matrix...; proc princomp data=_nomiss noprint outstat=_evals out=_prins(keep=prin:) std vardef=n; var &var ; run; %if &syserr=3000 %then %do; %put MULTNORM: PROC PRINCOMP required for singularity check.; %put %str( Covariance matrix not checked for singularity.); %let princomp=na; %goto findproc; %end; %else %let princomp=yes; data _null_; set _evals; where _TYPE_='EIGENVAL'; if round(min(of &var ),1e-8)<=0 then do; put 'ERROR: Covariance matrix is singular.'; call symput('singular','singular'); end; run; %if &singular=singular %then %goto exit; proc iml; reset noname PRINTADV=0; use _nomiss; read all var {&var} into x; close _nomiss; use _nomiss; read all var {&var} into y[colname=Variable]; close _nomiss; p=ncol(x); n=nrow(x); *Multivariate skew and kurt; mean=x`*j(n,1,1)/n; ones=j(1,n,1); meanmat=(mean*ones); xdev=x-meanmat`; S=(xdev`*xdev)/n; Sinv=inv(S); D=xdev*Sinv*xdev`; b1p=sum(D##3)/(n*n); b2p=trace(D##2)/n; chidf=p*(p+1)*(p+2)/6; k=(p+1)*(n+1)*(n+3)/(n*((n+1)*(p+1)-6)); smallskew=n*k*b1p/6; z1=n*b1p/6; z2=(sqrt(n/(8*p*(p+2))))*(b2p-p*(p+2)); p_skew=1-probchi(z1,chidf); p_small=1-probchi(smallskew,chidf); p_kurt=2*(1-probnorm(abs(z2))); * Univariate; dev=xdev[+,]; devsq=(dev##2)/n; ss=(xdev##2)[+,]; m2=(ss-devsq)/n; sdev=sqrt(m2); m3=((xdev##3)[+,])/n; m4=((xdev##4)[+,])/n; sqrtb1=m3/(m2#sdev); b2=m4/(m2##2); g1=((sqrt(n*(n-1)))*sqrtb1)/(n-2); g2=(b2-((3*(n-1))/(n+1)))*((n**2-1)/((n-2)*(n-3))); Skewness=g1`; Kurtosis=g2`; skewSE=sqrt(6*n*(n-1) / ((n-2)*(n+1)*(n+3))); kurtSE=sqrt(4*(n##2-1)*skewSE##2 / ((n-3)*(n+5))); SE_skew=j(p,1,skewSE); SE_kurt=j(p,1,kurtSE); print "### Univariate Skewness and Kurtosis ###"; print " "; univariate = skewness || SE_skew || Kurtosis || SE_kurt; print univariate[rowname=Variable colname={"Skewness" "SE_skew" "Kurtosis" "SE_kurt"} ]; print " "; print "### Mardia's multivariate skewness and kurtosis ###"; print " "; print "Sample size = " n; print "Number of variables = " p; print " "; print "Multivariate skewness"; print "b1p = " b1p; print "z1 = " z1; print "p-value = " p_skew; print " "; print "Multivariate kurtosis"; print "b2p = " b2p; print "z2 = " z2; print "p-value = " p_kurt; print " "; %exit: %mend;