[rkward-cvs] SF.net SVN: rkward-code:[4857] trunk/rkward/rkward/plugins/00saveload/ import

tfry at users.sf.net tfry at users.sf.net
Tue Sep 30 07:51:01 UTC 2014


Revision: 4857
          http://sourceforge.net/p/rkward/code/4857
Author:   tfry
Date:     2014-09-30 07:50:57 +0000 (Tue, 30 Sep 2014)
Log Message:
-----------
Add encoding conversion to stata import plugin

Modified Paths:
--------------
    trunk/rkward/rkward/plugins/00saveload/import/import_spss.js
    trunk/rkward/rkward/plugins/00saveload/import/import_spss.rkh
    trunk/rkward/rkward/plugins/00saveload/import/import_spss.xml
    trunk/rkward/rkward/plugins/00saveload/import/import_stata.js
    trunk/rkward/rkward/plugins/00saveload/import/import_stata.rkh
    trunk/rkward/rkward/plugins/00saveload/import/import_stata.xml

Added Paths:
-----------
    trunk/rkward/rkward/plugins/00saveload/import/convert_encoding.js
    trunk/rkward/rkward/plugins/00saveload/import/convert_encoding.xml

Added: trunk/rkward/rkward/plugins/00saveload/import/convert_encoding.js
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/convert_encoding.js	                        (rev 0)
+++ trunk/rkward/rkward/plugins/00saveload/import/convert_encoding.js	2014-09-30 07:50:57 UTC (rev 4857)
@@ -0,0 +1,28 @@
+function makeEncodingPreprocessCode () {
+	if (!getValue ("do_locale_conversion")) return;
+	echo ('\n');
+	echo ('# helper function to convert all strings to the current encoding\n');
+	echo ('iconv.recursive <- function (x, from) {\n');
+	echo ('	attribs <- attributes (x);\n');
+	echo ('	if (is.character (x)) {\n');
+	echo ('		x <- iconv (x, from=from, to="", sub="")\n');
+	echo ('	} else if (is.list (x)) {\n');
+	echo ('		x <- lapply (x, function (sub) iconv.recursive (sub, from))\n');
+	echo ('	}\n');
+	echo ('	# convert factor levels and all other attributes\n');
+	echo ('	attributes (x) <- lapply (attribs, function (sub) iconv.recursive (sub, from))\n');
+	echo ('	x\n');
+	echo ('}\n');
+}
+
+function makeEncodingCall (varname) {
+	if (!getValue ("do_locale_conversion")) return;
+
+	var from_locale = getValue ("encoding");
+	if (from_locale == "other") {
+		from_locale = getValue ("user_encoding");
+	}
+	echo ('\n');
+	echo ('# convert all strings to the current encoding\n');
+	echo (varname + ' <- iconv.recursive (' + varname + ', from="' + from_locale + '")\n');
+}
\ No newline at end of file

Added: trunk/rkward/rkward/plugins/00saveload/import/convert_encoding.xml
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/convert_encoding.xml	                        (rev 0)
+++ trunk/rkward/rkward/plugins/00saveload/import/convert_encoding.xml	2014-09-30 07:50:57 UTC (rev 4857)
@@ -0,0 +1,43 @@
+<!DOCTYPE rkplugin>
+<!-- Snippet for inclusion -->
+<document>
+	<snippet id="encoding_logic">
+		<connect governor="do_locale_conversion.state" client="encoding.enabled" />
+		<convert id="other_encoding" mode="equals" standard="other" sources="encoding.string" />
+		<convert id="other_encoding2" mode="and" sources="other_encoding;do_locale_conversion.state" />
+		<connect governor="other_encoding2" client="user_encoding.enabled" />
+	</snippet>
+	<snippet id="encoding_tab">
+		<tab id="tab_encoding" label="Encoding">
+			<checkbox id="do_locale_conversion" checked="false" label="Convert string encoding" value="1" value_unchecked="0"/>
+			<dropdown id="encoding" label="Convert from:">
+				<option value="latin1" label="Latin 1" checked="true"/>
+				<option value="UTF-8" label="UTF-8"/>
+				<option value="ISO8859-1" label="ISO8859-1 Latin-1 Western European"/>
+				<option value="ISO8859-2" label="ISO8859-2 Latin-2 Central European"/>
+				<option value="ISO8859-3" label="ISO8859-3 Latin-3 South European"/>
+				<option value="ISO8859-4" label="ISO8859-4 Latin-4 North European"/>
+				<option value="ISO8859-5" label="ISO8859-5 Latin/Cyrillic"/>
+				<option value="ISO8859-6" label="ISO8859-6 Latin/Arabic"/>
+				<option value="ISO8859-7" label="ISO8859-7 Latin/Greek"/>
+				<option value="MS-GREEK" label="MS-GREEK"/>
+				<option value="ISO8859-8" label="ISO8859-8 Latin/Hebrew"/>
+				<option value="ISO8859-9" label="ISO8859-9 Latin-5 Turkish"/>
+				<option value="ISO8859-10" label="ISO8859-10 Latin-6 Nordic"/>
+				<option value="ISO8859-11" label="ISO8859-11 Latin/Thai"/>
+				<option value="ISO8859-13" label="ISO8859-13 Latin-7 Baltic Rim"/>
+				<option value="ISO8859-14" label="ISO8859-14 Latin-8 Celtic"/>
+				<option value="ISO8859-15" label="ISO8859-15 Latin-9 Western European (EUR)"/>
+				<option value="ISO8859-16" label="ISO8859-16 Latin-10 South-Eastern European"/>
+				<option value="other" label="Other (specify below)"/>
+			</dropdown>
+			<input id="user_encoding" label="Other encoding" required="true"/>
+		</tab>
+	</snippet>
+	<snippet id="encoding_doc">
+		<caption id="tab_encoding"/>
+		<setting id="do_locale_conversion">If special character (e.g. umlauts) do not show up correctly, the data file probably uses a different locale than your R session. In this case, check this option, and specify the correct character encoding below.</setting>
+		<setting id="encoding">Select character encoding to convert from. The option above needs to be checked, for this to be enabled.</setting>
+		<setting id="user_encoding">If none of the encodings above matches, you can specify an encoding here. Note that the available encodings may differ from platform to platform. See <link href="rkward://rhelp/iconvlist" /> for a list of available encodings.</setting>
+	</snippet>
+</document>
\ No newline at end of file

Modified: trunk/rkward/rkward/plugins/00saveload/import/import_spss.js
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_spss.js	2014-09-29 17:41:27 UTC (rev 4856)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_spss.js	2014-09-30 07:50:57 UTC (rev 4857)
@@ -1,20 +1,8 @@
+include ("convert_encoding.js");
+
 function preprocess () {
 	echo ('require (foreign)\n');
-	if (getValue ("do_locale_conversion")) {
-		echo ('\n');
-		echo ('# helper function to convert all strings to the current encoding\n');
-		echo ('iconv.recursive <- function (x, from) {\n');
-		echo ('	attribs <- attributes (x);\n');
-		echo ('	if (is.character (x)) {\n');
-		echo ('		x <- iconv (x, from=from, to="", sub="")\n');
-		echo ('	} else if (is.list (x)) {\n');
-		echo ('		x <- lapply (x, function (sub) iconv.recursive (sub, from))\n');
-		echo ('	}\n');
-		echo ('	# convert factor levels and all other attributes\n');
-		echo ('	attributes (x) <- lapply (attribs, function (sub) iconv.recursive (sub, from))\n');
-		echo ('	x\n');
-		echo ('}\n');
-	}
+	makeEncodingPreprocessCode ();
 }
 
 function calculate () {
@@ -36,15 +24,7 @@
 	var object = getValue ("saveto");
 
 	echo ('data <- read.spss ("' + getValue ("file") + '"' + data_frame_opt + labels_opt + ')\n');
-	if (getValue ("do_locale_conversion")) {
-		var from_locale = getValue ("encoding");
-		if (from_locale == "other") {
-			from_locale = getValue ("user_encoding");
-		}
-		echo ('\n');
-		echo ('# convert all strings to the current encoding\n');
-		echo ('data <- iconv.recursive (data, from="' + from_locale + '")\n');
-	}
+	makeEncodingCall ('data');
 	if (getValue ("convert_var_labels")) {
 		echo ('\n');
 		echo ('# set variable labels for use in RKWard\n');

Modified: trunk/rkward/rkward/plugins/00saveload/import/import_spss.rkh
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_spss.rkh	2014-09-29 17:41:27 UTC (rev 4856)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_spss.rkh	2014-09-30 07:50:57 UTC (rev 4857)
@@ -1,5 +1,8 @@
 <!DOCTYPE rkhelp>
 <document>
+	<snippets>
+		<include file="convert_encoding.xml"/>
+	</snippets>
 	<summary>
 Import SPSS data files.
 	</summary>
@@ -19,11 +22,7 @@
 		<setting id="use_labels">Should SPSS variables with value labels be converted to R factors with those levels?</setting>
 		<setting id="labels_limit">Maximum number of factor levels to use (see <link href="rkward://rhelp/read.spss" />)</setting>
 		<setting id="trim_labels">Trim trailing white space from labels?</setting>
-
-		<caption id="tab_encoding"/>
-		<setting id="do_locale_conversion">If special character (e.g. umlauts) do not show up correctly, the SPSS file probably uses a different locale than your R session. In this case, check this option, and specify the correct character encoding below.</setting>
-		<setting id="encoding">Select character encoding to convert from. The option above needs to be checked, for this to be enabled.</setting>
-		<setting id="user_encoding">If none of the encodings above matches, you can specify an encoding here. Note that the available encodings may differ from platform to platform. See <link href="rkward://rhelp/iconvlist" /> for a list of available encodings.</setting>
+		<insert snippet="encoding_doc"/>
 	</settings>
 	<related>
 		<ul>

Modified: trunk/rkward/rkward/plugins/00saveload/import/import_spss.xml
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_spss.xml	2014-09-29 17:41:27 UTC (rev 4856)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_spss.xml	2014-09-30 07:50:57 UTC (rev 4857)
@@ -2,6 +2,9 @@
 <document>
 	<code file="import_spss.js" />
 	<help file="import_spss.rkh" />
+	<snippets>
+		<include file="convert_encoding.xml"/>
+	</snippets>
 	<logic>
 		<external id="filename"/>
 		<connect governor="filename" client="file.selection"/>
@@ -11,10 +14,7 @@
 		<connect governor="use_labels.state" client="labels_limit.enabled"/>
 		<connect governor="use_labels.state" client="trim_labels.enabled"/>
 		
-		<connect governor="do_locale_conversion.state" client="encoding.enabled" />
-		<convert id="other_encoding" mode="equals" standard="other" sources="encoding.string" />
-		<convert id="other_encoding2" mode="and" sources="other_encoding;do_locale_conversion.state" />
-		<connect governor="other_encoding2" client="user_encoding.enabled" />
+		<insert snippet="encoding_logic"/>
 	</logic>
 	<dialog label="Import SPSS file">
 		<tabbook>
@@ -34,31 +34,7 @@
 					<checkbox id="trim_labels" checked="false" label="Trim white space" value="1" value_unchecked="0"/>
 				</frame>
 			</tab>
-			<tab id="tab_encoding" label="Encoding">
-				<checkbox id="do_locale_conversion" checked="false" label="Convert string encoding" value="1" value_unchecked="0"/>
-				<dropdown id="encoding" label="Convert from:">
-					<option value="latin1" label="Latin 1" checked="true"/>
-					<option value="UTF-8" label="UTF-8"/>
-					<option value="ISO8859-1" label="ISO8859-1 Latin-1 Western European"/>
-					<option value="ISO8859-2" label="ISO8859-2 Latin-2 Central European"/>
-					<option value="ISO8859-3" label="ISO8859-3 Latin-3 South European"/>
-					<option value="ISO8859-4" label="ISO8859-4 Latin-4 North European"/>
-					<option value="ISO8859-5" label="ISO8859-5 Latin/Cyrillic"/>
-					<option value="ISO8859-6" label="ISO8859-6 Latin/Arabic"/>
-					<option value="ISO8859-7" label="ISO8859-7 Latin/Greek"/>
-					<option value="MS-GREEK" label="MS-GREEK"/>
-					<option value="ISO8859-8" label="ISO8859-8 Latin/Hebrew"/>
-					<option value="ISO8859-9" label="ISO8859-9 Latin-5 Turkish"/>
-					<option value="ISO8859-10" label="ISO8859-10 Latin-6 Nordic"/>
-					<option value="ISO8859-11" label="ISO8859-11 Latin/Thai"/>
-					<option value="ISO8859-13" label="ISO8859-13 Latin-7 Baltic Rim"/>
-					<option value="ISO8859-14" label="ISO8859-14 Latin-8 Celtic"/>
-					<option value="ISO8859-15" label="ISO8859-15 Latin-9 Western European (EUR)"/>
-					<option value="ISO8859-16" label="ISO8859-16 Latin-10 South-Eastern European"/>
-					<option value="other" label="Other (specify below)"/>
-				</dropdown>
-				<input id="user_encoding" label="Other encoding" required="true"/>
-			</tab>
+			<insert snippet="encoding_tab"/>
 		</tabbook>
 	</dialog>
 </document>

Modified: trunk/rkward/rkward/plugins/00saveload/import/import_stata.js
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_stata.js	2014-09-29 17:41:27 UTC (rev 4856)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_stata.js	2014-09-30 07:50:57 UTC (rev 4857)
@@ -1,5 +1,8 @@
+include ("convert_encoding.js");
+
 function preprocess () {
 	echo ('require (foreign)\n');
+	makeEncodingPreprocessCode ();
 }
 
 function calculate () {
@@ -32,6 +35,7 @@
 	var object = getValue ("saveto");
 
 	echo ('data <- read.dta ("' + getValue ("file") + '"' + options + ')\n');
+	makeEncodingCall ('data');
 	echo ('\n');
 	echo ('# set variable labels for use in RKWard\n');
 	echo ('labels <- attr (data, "var.labels")\n');

Modified: trunk/rkward/rkward/plugins/00saveload/import/import_stata.rkh
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_stata.rkh	2014-09-29 17:41:27 UTC (rev 4856)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_stata.rkh	2014-09-30 07:50:57 UTC (rev 4857)
@@ -1,5 +1,8 @@
 <!DOCTYPE rkhelp>
 <document>
+	<snippets>
+		<include file="convert_encoding.xml"/>
+	</snippets>
 	<summary>
 Import STATA data files.
 	</summary>
@@ -18,6 +21,7 @@
 		<setting id="convert_factors">R uses value labels only for factors. Should Stata variable with value labels be converted to factors?</setting>
 		<setting id="missing_type">Stata version 8 and above differentiates various different type of missing values. If this option is set, this information is stored in an attribute of the imported data. See <link href="rkward://rhelp/read.dta"/> for details.</setting>
 		<setting id="convert_underscore">The underscore ('_') is usually not used in R variable names, and may cause problems in some (rare) situations. Should underscore characters be converted to dots ('.')?</setting>
+		<insert snippet="encoding_doc"/>
 	</settings>
 	<related>
 		<ul>

Modified: trunk/rkward/rkward/plugins/00saveload/import/import_stata.xml
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_stata.xml	2014-09-29 17:41:27 UTC (rev 4856)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_stata.xml	2014-09-30 07:50:57 UTC (rev 4857)
@@ -2,9 +2,13 @@
 <document>
 	<code file="import_stata.js" />
 	<help file="import_stata.rkh" />
+	<snippets>
+		<include file="convert_encoding.xml"/>
+	</snippets>
 	<logic>
 		<external id="filename"/>
 		<connect governor="filename" client="file.selection"/>
+		<insert snippet="encoding_logic"/>
 	</logic>
 	<dialog label="Import STATA file">
 		<tabbook>
@@ -23,6 +27,7 @@
 					<checkbox id="convert_underscore" checked="false" label="Convert '_' in Stata variable names to '.' in R names" value="1" value_unchecked="0"/>
 				</frame>
 			</tab>
+			<insert snippet="encoding_tab"/>
 		</tabbook>
 	</dialog>
 </document>





More information about the rkward-tracker mailing list