[rkward-cvs] SF.net SVN: rkward-code:[4857] trunk/rkward/rkward/plugins/00saveload/ import
tfry at users.sf.net
tfry at users.sf.net
Tue Sep 30 07:51:01 UTC 2014
Revision: 4857
http://sourceforge.net/p/rkward/code/4857
Author: tfry
Date: 2014-09-30 07:50:57 +0000 (Tue, 30 Sep 2014)
Log Message:
-----------
Add encoding conversion to stata import plugin
Modified Paths:
--------------
trunk/rkward/rkward/plugins/00saveload/import/import_spss.js
trunk/rkward/rkward/plugins/00saveload/import/import_spss.rkh
trunk/rkward/rkward/plugins/00saveload/import/import_spss.xml
trunk/rkward/rkward/plugins/00saveload/import/import_stata.js
trunk/rkward/rkward/plugins/00saveload/import/import_stata.rkh
trunk/rkward/rkward/plugins/00saveload/import/import_stata.xml
Added Paths:
-----------
trunk/rkward/rkward/plugins/00saveload/import/convert_encoding.js
trunk/rkward/rkward/plugins/00saveload/import/convert_encoding.xml
Added: trunk/rkward/rkward/plugins/00saveload/import/convert_encoding.js
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/convert_encoding.js (rev 0)
+++ trunk/rkward/rkward/plugins/00saveload/import/convert_encoding.js 2014-09-30 07:50:57 UTC (rev 4857)
@@ -0,0 +1,28 @@
+function makeEncodingPreprocessCode () {
+ if (!getValue ("do_locale_conversion")) return;
+ echo ('\n');
+ echo ('# helper function to convert all strings to the current encoding\n');
+ echo ('iconv.recursive <- function (x, from) {\n');
+ echo (' attribs <- attributes (x);\n');
+ echo (' if (is.character (x)) {\n');
+ echo (' x <- iconv (x, from=from, to="", sub="")\n');
+ echo (' } else if (is.list (x)) {\n');
+ echo (' x <- lapply (x, function (sub) iconv.recursive (sub, from))\n');
+ echo (' }\n');
+ echo (' # convert factor levels and all other attributes\n');
+ echo (' attributes (x) <- lapply (attribs, function (sub) iconv.recursive (sub, from))\n');
+ echo (' x\n');
+ echo ('}\n');
+}
+
+function makeEncodingCall (varname) {
+ if (!getValue ("do_locale_conversion")) return;
+
+ var from_locale = getValue ("encoding");
+ if (from_locale == "other") {
+ from_locale = getValue ("user_encoding");
+ }
+ echo ('\n');
+ echo ('# convert all strings to the current encoding\n');
+ echo (varname + ' <- iconv.recursive (' + varname + ', from="' + from_locale + '")\n');
+}
\ No newline at end of file
Added: trunk/rkward/rkward/plugins/00saveload/import/convert_encoding.xml
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/convert_encoding.xml (rev 0)
+++ trunk/rkward/rkward/plugins/00saveload/import/convert_encoding.xml 2014-09-30 07:50:57 UTC (rev 4857)
@@ -0,0 +1,43 @@
+<!DOCTYPE rkplugin>
+<!-- Snippet for inclusion -->
+<document>
+ <snippet id="encoding_logic">
+ <connect governor="do_locale_conversion.state" client="encoding.enabled" />
+ <convert id="other_encoding" mode="equals" standard="other" sources="encoding.string" />
+ <convert id="other_encoding2" mode="and" sources="other_encoding;do_locale_conversion.state" />
+ <connect governor="other_encoding2" client="user_encoding.enabled" />
+ </snippet>
+ <snippet id="encoding_tab">
+ <tab id="tab_encoding" label="Encoding">
+ <checkbox id="do_locale_conversion" checked="false" label="Convert string encoding" value="1" value_unchecked="0"/>
+ <dropdown id="encoding" label="Convert from:">
+ <option value="latin1" label="Latin 1" checked="true"/>
+ <option value="UTF-8" label="UTF-8"/>
+ <option value="ISO8859-1" label="ISO8859-1 Latin-1 Western European"/>
+ <option value="ISO8859-2" label="ISO8859-2 Latin-2 Central European"/>
+ <option value="ISO8859-3" label="ISO8859-3 Latin-3 South European"/>
+ <option value="ISO8859-4" label="ISO8859-4 Latin-4 North European"/>
+ <option value="ISO8859-5" label="ISO8859-5 Latin/Cyrillic"/>
+ <option value="ISO8859-6" label="ISO8859-6 Latin/Arabic"/>
+ <option value="ISO8859-7" label="ISO8859-7 Latin/Greek"/>
+ <option value="MS-GREEK" label="MS-GREEK"/>
+ <option value="ISO8859-8" label="ISO8859-8 Latin/Hebrew"/>
+ <option value="ISO8859-9" label="ISO8859-9 Latin-5 Turkish"/>
+ <option value="ISO8859-10" label="ISO8859-10 Latin-6 Nordic"/>
+ <option value="ISO8859-11" label="ISO8859-11 Latin/Thai"/>
+ <option value="ISO8859-13" label="ISO8859-13 Latin-7 Baltic Rim"/>
+ <option value="ISO8859-14" label="ISO8859-14 Latin-8 Celtic"/>
+ <option value="ISO8859-15" label="ISO8859-15 Latin-9 Western European (EUR)"/>
+ <option value="ISO8859-16" label="ISO8859-16 Latin-10 South-Eastern European"/>
+ <option value="other" label="Other (specify below)"/>
+ </dropdown>
+ <input id="user_encoding" label="Other encoding" required="true"/>
+ </tab>
+ </snippet>
+ <snippet id="encoding_doc">
+ <caption id="tab_encoding"/>
+ <setting id="do_locale_conversion">If special character (e.g. umlauts) do not show up correctly, the data file probably uses a different locale than your R session. In this case, check this option, and specify the correct character encoding below.</setting>
+ <setting id="encoding">Select character encoding to convert from. The option above needs to be checked, for this to be enabled.</setting>
+ <setting id="user_encoding">If none of the encodings above matches, you can specify an encoding here. Note that the available encodings may differ from platform to platform. See <link href="rkward://rhelp/iconvlist" /> for a list of available encodings.</setting>
+ </snippet>
+</document>
\ No newline at end of file
Modified: trunk/rkward/rkward/plugins/00saveload/import/import_spss.js
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_spss.js 2014-09-29 17:41:27 UTC (rev 4856)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_spss.js 2014-09-30 07:50:57 UTC (rev 4857)
@@ -1,20 +1,8 @@
+include ("convert_encoding.js");
+
function preprocess () {
echo ('require (foreign)\n');
- if (getValue ("do_locale_conversion")) {
- echo ('\n');
- echo ('# helper function to convert all strings to the current encoding\n');
- echo ('iconv.recursive <- function (x, from) {\n');
- echo (' attribs <- attributes (x);\n');
- echo (' if (is.character (x)) {\n');
- echo (' x <- iconv (x, from=from, to="", sub="")\n');
- echo (' } else if (is.list (x)) {\n');
- echo (' x <- lapply (x, function (sub) iconv.recursive (sub, from))\n');
- echo (' }\n');
- echo (' # convert factor levels and all other attributes\n');
- echo (' attributes (x) <- lapply (attribs, function (sub) iconv.recursive (sub, from))\n');
- echo (' x\n');
- echo ('}\n');
- }
+ makeEncodingPreprocessCode ();
}
function calculate () {
@@ -36,15 +24,7 @@
var object = getValue ("saveto");
echo ('data <- read.spss ("' + getValue ("file") + '"' + data_frame_opt + labels_opt + ')\n');
- if (getValue ("do_locale_conversion")) {
- var from_locale = getValue ("encoding");
- if (from_locale == "other") {
- from_locale = getValue ("user_encoding");
- }
- echo ('\n');
- echo ('# convert all strings to the current encoding\n');
- echo ('data <- iconv.recursive (data, from="' + from_locale + '")\n');
- }
+ makeEncodingCall ('data');
if (getValue ("convert_var_labels")) {
echo ('\n');
echo ('# set variable labels for use in RKWard\n');
Modified: trunk/rkward/rkward/plugins/00saveload/import/import_spss.rkh
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_spss.rkh 2014-09-29 17:41:27 UTC (rev 4856)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_spss.rkh 2014-09-30 07:50:57 UTC (rev 4857)
@@ -1,5 +1,8 @@
<!DOCTYPE rkhelp>
<document>
+ <snippets>
+ <include file="convert_encoding.xml"/>
+ </snippets>
<summary>
Import SPSS data files.
</summary>
@@ -19,11 +22,7 @@
<setting id="use_labels">Should SPSS variables with value labels be converted to R factors with those levels?</setting>
<setting id="labels_limit">Maximum number of factor levels to use (see <link href="rkward://rhelp/read.spss" />)</setting>
<setting id="trim_labels">Trim trailing white space from labels?</setting>
-
- <caption id="tab_encoding"/>
- <setting id="do_locale_conversion">If special character (e.g. umlauts) do not show up correctly, the SPSS file probably uses a different locale than your R session. In this case, check this option, and specify the correct character encoding below.</setting>
- <setting id="encoding">Select character encoding to convert from. The option above needs to be checked, for this to be enabled.</setting>
- <setting id="user_encoding">If none of the encodings above matches, you can specify an encoding here. Note that the available encodings may differ from platform to platform. See <link href="rkward://rhelp/iconvlist" /> for a list of available encodings.</setting>
+ <insert snippet="encoding_doc"/>
</settings>
<related>
<ul>
Modified: trunk/rkward/rkward/plugins/00saveload/import/import_spss.xml
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_spss.xml 2014-09-29 17:41:27 UTC (rev 4856)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_spss.xml 2014-09-30 07:50:57 UTC (rev 4857)
@@ -2,6 +2,9 @@
<document>
<code file="import_spss.js" />
<help file="import_spss.rkh" />
+ <snippets>
+ <include file="convert_encoding.xml"/>
+ </snippets>
<logic>
<external id="filename"/>
<connect governor="filename" client="file.selection"/>
@@ -11,10 +14,7 @@
<connect governor="use_labels.state" client="labels_limit.enabled"/>
<connect governor="use_labels.state" client="trim_labels.enabled"/>
- <connect governor="do_locale_conversion.state" client="encoding.enabled" />
- <convert id="other_encoding" mode="equals" standard="other" sources="encoding.string" />
- <convert id="other_encoding2" mode="and" sources="other_encoding;do_locale_conversion.state" />
- <connect governor="other_encoding2" client="user_encoding.enabled" />
+ <insert snippet="encoding_logic"/>
</logic>
<dialog label="Import SPSS file">
<tabbook>
@@ -34,31 +34,7 @@
<checkbox id="trim_labels" checked="false" label="Trim white space" value="1" value_unchecked="0"/>
</frame>
</tab>
- <tab id="tab_encoding" label="Encoding">
- <checkbox id="do_locale_conversion" checked="false" label="Convert string encoding" value="1" value_unchecked="0"/>
- <dropdown id="encoding" label="Convert from:">
- <option value="latin1" label="Latin 1" checked="true"/>
- <option value="UTF-8" label="UTF-8"/>
- <option value="ISO8859-1" label="ISO8859-1 Latin-1 Western European"/>
- <option value="ISO8859-2" label="ISO8859-2 Latin-2 Central European"/>
- <option value="ISO8859-3" label="ISO8859-3 Latin-3 South European"/>
- <option value="ISO8859-4" label="ISO8859-4 Latin-4 North European"/>
- <option value="ISO8859-5" label="ISO8859-5 Latin/Cyrillic"/>
- <option value="ISO8859-6" label="ISO8859-6 Latin/Arabic"/>
- <option value="ISO8859-7" label="ISO8859-7 Latin/Greek"/>
- <option value="MS-GREEK" label="MS-GREEK"/>
- <option value="ISO8859-8" label="ISO8859-8 Latin/Hebrew"/>
- <option value="ISO8859-9" label="ISO8859-9 Latin-5 Turkish"/>
- <option value="ISO8859-10" label="ISO8859-10 Latin-6 Nordic"/>
- <option value="ISO8859-11" label="ISO8859-11 Latin/Thai"/>
- <option value="ISO8859-13" label="ISO8859-13 Latin-7 Baltic Rim"/>
- <option value="ISO8859-14" label="ISO8859-14 Latin-8 Celtic"/>
- <option value="ISO8859-15" label="ISO8859-15 Latin-9 Western European (EUR)"/>
- <option value="ISO8859-16" label="ISO8859-16 Latin-10 South-Eastern European"/>
- <option value="other" label="Other (specify below)"/>
- </dropdown>
- <input id="user_encoding" label="Other encoding" required="true"/>
- </tab>
+ <insert snippet="encoding_tab"/>
</tabbook>
</dialog>
</document>
Modified: trunk/rkward/rkward/plugins/00saveload/import/import_stata.js
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_stata.js 2014-09-29 17:41:27 UTC (rev 4856)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_stata.js 2014-09-30 07:50:57 UTC (rev 4857)
@@ -1,5 +1,8 @@
+include ("convert_encoding.js");
+
function preprocess () {
echo ('require (foreign)\n');
+ makeEncodingPreprocessCode ();
}
function calculate () {
@@ -32,6 +35,7 @@
var object = getValue ("saveto");
echo ('data <- read.dta ("' + getValue ("file") + '"' + options + ')\n');
+ makeEncodingCall ('data');
echo ('\n');
echo ('# set variable labels for use in RKWard\n');
echo ('labels <- attr (data, "var.labels")\n');
Modified: trunk/rkward/rkward/plugins/00saveload/import/import_stata.rkh
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_stata.rkh 2014-09-29 17:41:27 UTC (rev 4856)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_stata.rkh 2014-09-30 07:50:57 UTC (rev 4857)
@@ -1,5 +1,8 @@
<!DOCTYPE rkhelp>
<document>
+ <snippets>
+ <include file="convert_encoding.xml"/>
+ </snippets>
<summary>
Import STATA data files.
</summary>
@@ -18,6 +21,7 @@
<setting id="convert_factors">R uses value labels only for factors. Should Stata variable with value labels be converted to factors?</setting>
<setting id="missing_type">Stata version 8 and above differentiates various different type of missing values. If this option is set, this information is stored in an attribute of the imported data. See <link href="rkward://rhelp/read.dta"/> for details.</setting>
<setting id="convert_underscore">The underscore ('_') is usually not used in R variable names, and may cause problems in some (rare) situations. Should underscore characters be converted to dots ('.')?</setting>
+ <insert snippet="encoding_doc"/>
</settings>
<related>
<ul>
Modified: trunk/rkward/rkward/plugins/00saveload/import/import_stata.xml
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_stata.xml 2014-09-29 17:41:27 UTC (rev 4856)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_stata.xml 2014-09-30 07:50:57 UTC (rev 4857)
@@ -2,9 +2,13 @@
<document>
<code file="import_stata.js" />
<help file="import_stata.rkh" />
+ <snippets>
+ <include file="convert_encoding.xml"/>
+ </snippets>
<logic>
<external id="filename"/>
<connect governor="filename" client="file.selection"/>
+ <insert snippet="encoding_logic"/>
</logic>
<dialog label="Import STATA file">
<tabbook>
@@ -23,6 +27,7 @@
<checkbox id="convert_underscore" checked="false" label="Convert '_' in Stata variable names to '.' in R names" value="1" value_unchecked="0"/>
</frame>
</tab>
+ <insert snippet="encoding_tab"/>
</tabbook>
</dialog>
</document>
More information about the rkward-tracker
mailing list