[rkward-cvs] SF.net SVN: rkward:[4134] trunk/rkward/rkward/plugins/analysis

m-eik at users.sourceforge.net m-eik at users.sourceforge.net
Tue Dec 20 20:39:19 UTC 2011


Revision: 4134
          http://rkward.svn.sourceforge.net/rkward/?rev=4134&view=rev
Author:   m-eik
Date:     2011-12-20 20:39:19 +0000 (Tue, 20 Dec 2011)
Log Message:
-----------
added support for polyserial and polychoric correlations to the correlation matrix plugin

Modified Paths:
--------------
    trunk/rkward/rkward/plugins/analysis/corr_matrix.js
    trunk/rkward/rkward/plugins/analysis/corr_matrix.rkh
    trunk/rkward/rkward/plugins/analysis/corr_matrix.xml

Modified: trunk/rkward/rkward/plugins/analysis/corr_matrix.js
===================================================================
--- trunk/rkward/rkward/plugins/analysis/corr_matrix.js	2011-12-19 14:42:04 UTC (rev 4133)
+++ trunk/rkward/rkward/plugins/analysis/corr_matrix.js	2011-12-20 20:39:19 UTC (rev 4134)
@@ -1,9 +1,22 @@
 // globals
 var use;
 var method;
+var polyCorr;
 var do_p;
 var toNumeric;
 
+function preprocess() {
+	method = "\"" + getValue ("method") + "\"";
+	if (method == "\"polyserial\"" || method == "\"polychoric\""){
+		polyCorr = true;
+	} else {
+		polyCorr = false;
+	}
+	if (polyCorr) {
+		echo ('require(polycor)\n');
+	} else {}
+}
+
 function calculate () {
 	do_p = getValue ("do_p");
 
@@ -18,12 +31,11 @@
 		exclude_whole = true;
 		use = "\"complete.obs\"";
 	}
-	method = "\"" + getValue ("method") + "\"";
 
 	echo ('# cor requires all objects to be inside the same data.frame.\n');
 	echo ('# Here we construct such a temporary frame from the input variables\n');
 	echo ('data.list <- rk.list (' + vars.split ("\n").join (", ") + ')\n');
-	if (toNumeric) {
+	if (!polyCorr && toNumeric) {
 		echo ('# Non-numeric variables will be treated as ordered data and transformed into numeric ranks\n');
 		echo ('transformed.vars <- list()\n');
 		echo ('for (i in names(data.list)) {\n');
@@ -41,21 +53,60 @@
 	echo ('data <- as.data.frame (data.list, check.names=FALSE)\n');
 	echo ('\n');
 	echo ('# calculate correlation matrix\n');
-	echo ('result <- cor (data, use=' + use + ', method=' + method + ')\n');
-	if (do_p) {
+	if (polyCorr) {
+		echo ('result <- matrix (nrow = length (data), ncol = length (data), dimnames=list (names (data), names (data)))\n');
+	} else {
+		echo ('result <- cor (data, use=' + use + ', method=' + method + ')\n');
+	}
+	if (do_p || polyCorr) {
 		echo ('# calculate matrix of probabilities\n');
 		echo ('result.p <- matrix (nrow = length (data), ncol = length (data), dimnames=list (names (data), names (data)))\n');
-		if (exclude_whole) {
+		if (!polyCorr && exclude_whole) {
 			echo ('# as we need to do pairwise comparisons for technical reasons,\n');
 			echo ('# we need to exclude incomplete cases first to match the use="complete.obs" parameter to cor()\n');
 			echo ('data <- data[complete.cases (data),]\n');
-		}
+		} else {}
 		echo ('for (i in 1:length (data)) {\n');
 		echo ('	for (j in i:length (data)) {\n');
 		echo ('		if (i != j) {\n');
-		echo ('			t <- cor.test (data[[i]], data[[j]], method=' + method + ')\n');
-		echo ('			result.p[i, j] <- t$p.value\n');
-		echo ('			result.p[j, i] <- sum (complete.cases (data[[i]], data[[j]]))\n');
+		if (polyCorr) {
+			if(method == "\"polyserial\""){
+				echo('			# polyserial expects x to be numeric\n');
+				echo('			if(is.numeric(data[[i]]) & !is.numeric(data[[j]])){\n');
+				echo('				t <- polyserial(data[[i]], data[[j]]');
+				if (do_p) {
+					echo(', std.err=TRUE');
+				} else {}
+				echo(')\n			} else if(is.numeric(data[[j]]) & !is.numeric(data[[i]])){\n');
+				echo('				t <- polyserial(data[[j]], data[[i]]');
+				if (do_p) {
+					echo(', std.err=TRUE');
+				} else {}
+				echo(')\n			} else {\n');
+				echo('				t <- NULL\n');
+				echo('			}\n');
+			} else {
+				echo('			t <- polychor(data[[i]], data[[j]]');
+				if (do_p) {
+					echo(', std.err=TRUE)\n');
+				} else {
+					echo(')\n');
+				}
+			}
+			if (do_p) {
+				echo ('			if(length(t) > 0){\n');
+				echo ('				result[j, i] <- result[i, j] <- t$rho\n');
+				echo ('				result.p[j, i] <- paste("Chisq=", t$chisq, ",<br />df=", t$df, ",<br />p=", pchisq(t$chisq, t$df, lower.tail=FALSE), sep="")\n');
+				echo ('				result.p[i, j] <- paste("se=", sqrt(diag(t$var)), ",<br />n=", t$n, sep="")\n');
+				echo ('			} else {}\n');
+			} else {
+				echo ('			result[i, j] <- result[j, i] <- t\n');
+			}
+		} else {
+			echo ('			t <- cor.test (data[[i]], data[[j]], method=' + method + ')\n');
+			echo ('			result.p[i, j] <- t$p.value\n');
+			echo ('			result.p[j, i] <- sum (complete.cases (data[[i]], data[[j]]))\n');
+		}
 		echo ('		}\n');
 		echo ('	}\n');
 		echo ('}\n');
@@ -63,13 +114,22 @@
 }
 
 function printout () {
-	echo ('rk.header ("Correlation Matrix", parameters=list ("Method", ' + method + ', "Exclusion", ' + use + '))\n');
-	echo ('\n');
+	echo ('rk.header ("Correlation Matrix", parameters=list ("Method", ' + method);
+	if (!polyCorr) {
+		echo(', "Exclusion", ' + use);
+	} else {}
+	echo ('))\n\n');
 	echo ('rk.results (data.frame (result, check.names=FALSE), titles=c ("Coefficient", names (data)))\n');
 	if (do_p) {
-		echo ('rk.results (data.frame (result.p, check.names=FALSE), titles=c ("n \\\\ p", names (data)))\n');
+		if (polyCorr) {
+			echo ('rk.header ("Standard errors, test of bivariate normality and sample size", level=4)\n');
+			echo ('rk.results (data.frame (result.p, check.names=FALSE, stringsAsFactors=FALSE), titles=c ("Chisq, df, p \\\\ se, n", names (data)))\n');
+		} else {
+			echo ('rk.header ("p-values and sample size", level=4)\n');
+			echo ('rk.results (data.frame (result.p, check.names=FALSE), titles=c ("n \\\\ p", names (data)))\n');
+		}
 	}
-	if (toNumeric) {
+	if (!polyCorr && toNumeric) {
 		echo ('if(length(transformed.vars) > 0){\n');
 		echo ('	rk.header("Variables treated as numeric ranks", level=4)\n');
 		echo ('	for (i in names(transformed.vars)) {\n');

Modified: trunk/rkward/rkward/plugins/analysis/corr_matrix.rkh
===================================================================
--- trunk/rkward/rkward/plugins/analysis/corr_matrix.rkh	2011-12-19 14:42:04 UTC (rev 4133)
+++ trunk/rkward/rkward/plugins/analysis/corr_matrix.rkh	2011-12-20 20:39:19 UTC (rev 4134)
@@ -10,16 +10,19 @@
 
 	<settings>
 		<caption id="tab_variables"/>
-		<setting id="x">Select the vectors to be correlated. The vectors need to be numeric, and of equal length.</setting>
+		<setting id="x">Select the vectors to be correlated. For Pearson, Kendall and Spearman, the vectors need to be numeric (see below), and of equal length. Polyserial correlations are calculated between pairs of numeric and categorial variables, and polychoric correlations between categorial variables. If the categorial variables are dichotomous, polyserial/polychoric is equivalent to biserial/tetrachoric correlations. </setting>
 		<caption id="tab_options"/>
-		<setting id="do_p">If checked, an additional table with the (two-sided) significance values is calculated and printed.</setting>
-		<setting id="method">Method of correlation to be used</setting>
+		<setting id="do_p">If checked, an additional table with the (two-sided) significance values is calculated and printed. For polyserial/polychoric correlations, Chi-squared tests of bivariate normality are conducted, and also the standard errors are reported.</setting>
+		<setting id="method">Method of correlation to be used.</setting>
+		<setting id="to_numeric">For Kendall and Spearman, if some variables are not numeric but ordered categorial variables, have them treated as numeric ranks.</setting>
 		<setting id="use">In case of missing values, should be row be excluded from all calculation (i.e. even for those pairs of variables, where neither value is missing), or should they only be excluded for those pairs where it is actually missing?</setting>
 	</settings>
 	<related>
 		<ul>
 			<li><link href="rkward://rhelp/cor"/></li>
 			<li><link href="rkward://rhelp/cor.test"/></li>
+			<li><link href="rkward://rhelp/polyserial"/></li>
+			<li><link href="rkward://rhelp/polychor"/></li>
 		</ul>
 	</related>
 </document>

Modified: trunk/rkward/rkward/plugins/analysis/corr_matrix.xml
===================================================================
--- trunk/rkward/rkward/plugins/analysis/corr_matrix.xml	2011-12-19 14:42:04 UTC (rev 4133)
+++ trunk/rkward/rkward/plugins/analysis/corr_matrix.xml	2011-12-20 20:39:19 UTC (rev 4134)
@@ -4,6 +4,14 @@
 	<code file="corr_matrix.js"/>
 	<help file="corr_matrix.rkh"/>
 
+	<logic>
+		<convert id="polyser" sources="method.string" mode="notequals" standard="polyserial" />
+		<convert id="polycho" sources="method.string" mode="notequals" standard="polychoric" />
+		<convert id="polycor" sources="polyser;polycho" mode="and" />
+		<connect governor="polycor" client="use.enabled" />
+		<connect governor="polycor" client="to_numeric.enabled" />
+	</logic>
+
 	<dialog label="Correlation matrix">
 		<tabbook>
 			<tab id="tab_variables" label="Variables">
@@ -12,14 +20,17 @@
 					<varslot type="numeric" min_vars="2" multi="true" id="x" source="vars" required="true" label="variable(s):" num_dimensions="1"/>
 				</row>
 			</tab>
-			<tab id="tab_options" label="Options">
+			<tab label="Options" id="tab_Options">
 				<checkbox id="do_p" label="Calculate p values" value="true" checked="true" />
 				<radio id="method" label="Method">
 					<option label="Pearson's product-moment correlation" value="pearson" checked="true" />
 					<option label="Kendall's tau" value="kendall" />
 					<option label="Spearman's rho" value="spearman" />
+					<option label="Polyserial correlation" value="polyserial" />
+					<option label="Polychoric correlation" value="polychoric" />
 				</radio>
 				<checkbox id="to_numeric" label="Treat ordered categorial variables as numeric ranks" value="true" />
+				<stretch />
 				<radio id="use" label="Exclude missing values">
 					<option label="whole cases" value="complete" />
 					<option label="pairwise" value="pairwise" checked="true" />

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.





More information about the rkward-tracker mailing list