[rkward] /: Fix some quirks in gdata-based XLS import plugin, and add automated test.
Thomas Friedrichsmeier
thomas.friedrichsmeier at ruhr-uni-bochum.de
Thu Jan 14 21:21:07 UTC 2016
Git commit d63feeaf84bfec28586eeb265deaf50a43dbc9e9 by Thomas Friedrichsmeier.
Committed on 14/01/2016 at 21:20.
Pushed by tfry into branch 'master'.
Fix some quirks in gdata-based XLS import plugin, and add automated test.
Activate both XLS-import plugins (move to import_export.pluginmap)
M +1 -0 ChangeLog
M +23 -9 rkward/plugins/00saveload/import/import_xls.js
M +5 -4 rkward/plugins/00saveload/import/import_xls.rkh
M +6 -8 rkward/plugins/00saveload/import/import_xls.xml
M +8 -0 rkward/plugins/import_export.pluginmap
M +0 -9 rkward/plugins/under_development.pluginmap
M +5 -0 tests/import_export_plugins.R
A +15 -0 tests/import_export_plugins/import_xls.rkcommands.R
A +104 -0 tests/import_export_plugins/import_xls.rkout
http://commits.kde.org/rkward/d63feeaf84bfec28586eeb265deaf50a43dbc9e9
diff --git a/ChangeLog b/ChangeLog
index 5d68142..96dd806 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,4 @@
+- Add plugins for importing Excel files (.xls and .xlsx, Perl- and Java-based)
- Add ability to extract <matrix> values row-wise in plugins
- Add convenience JS-function "makeOption()" for use in plugins
- Fixed: Numerical (display) precision setting was not honored in data editor
diff --git a/rkward/plugins/00saveload/import/import_xls.js b/rkward/plugins/00saveload/import/import_xls.js
index 208c623..572039d 100644
--- a/rkward/plugins/00saveload/import/import_xls.js
+++ b/rkward/plugins/00saveload/import/import_xls.js
@@ -1,8 +1,18 @@
-function preprocess () {
- echo ('require (gdata)\n');
+function preview () {
+ preprocess (true);
+ calculate (true);
+ // printout ();
}
-function calculate () {
+function preprocess (is_preview) {
+ if (is_preview) {
+ echo ('if (!base::require (gdata)) stop (' + i18n ("Preview not available, because package gdata is not installed or cannot be loaded.") + ')\n');
+ } else {
+ echo ('require (gdata)\n');
+ }
+}
+
+function calculate (is_preview) {
var header = getValue ("header");
var verbose = getValue ("verbose");
var sheet = getValue ("sheetname");
@@ -10,16 +20,20 @@ function calculate () {
var quote_char = getValue ("quote");
if (quote_char == "other") quote_char = quote (getValue ("custom_quote"));
- var options = ", header=" + header + ", quote=" + quote_char + ", verbose=" + verbose;
+ var options = ", header=" + header + makeOption ("quote", quote_char) + ", verbose=" + verbose;
echo ('data <- read.xls ("' + getValue ("file") + '", sheet="' + sheet + '"' + options + ', ');
echo (' nrows=' + getValue ("nrows") + ', skip=' + getValue ("skip") + ', na.string="'+ getValue ("na") +'"' + getValue("strings_as_factors") +
- ', check.names = ' + getValue("checkname") + ', strip.white = ' + getValue("stripwhite") + ')\n');
+ ', strip.white = ' + getValue("stripwhite") + ')\n');
- var object = getValue ("saveto");
- echo ('.GlobalEnv$' + object + ' <- data '); comment ('assign to globalenv()');
- if (getValue ("doedit") ) {
- echo ('rk.edit (.GlobalEnv$' + object + ')\n');
+ if (is_preview) {
+ echo ('preview_data <- data[1:min(50,dim(data)[1]),1:min(50,dim(data)[2])]\n');
+ } else {
+ var object = getValue ("saveto");
+ echo ('.GlobalEnv$' + object + ' <- data '); comment ('assign to globalenv()');
+ if (getValue ("doedit") ) {
+ echo ('rk.edit (.GlobalEnv$' + object + ')\n');
+ }
}
}
diff --git a/rkward/plugins/00saveload/import/import_xls.rkh b/rkward/plugins/00saveload/import/import_xls.rkh
index 68130dd..441746e 100644
--- a/rkward/plugins/00saveload/import/import_xls.rkh
+++ b/rkward/plugins/00saveload/import/import_xls.rkh
@@ -6,19 +6,20 @@ Import a sheet of a Microsoft Excel file into a data frame.
<usage>
Choose the Microsoft Excel file to import a sheet from. An R object containing the data will be created. This method of importing Excel sheets is based on R package "gdata",
-and requires a working Perl setup. In case this does not work on your system, you may want to try the Java-based alternative <link href="rkward://component/import_xls_xlconnect"/>.
+and requires a working Perl setup. In case this does not work on your system, or does not produce the expected results, you may want to try the Java-based alternative
+<link href="rkward://component/import_xls_xlconnect"/>.
</usage>
<settings>
<caption id="tab_general"/>
<setting id="file">The filename of the file to import</setting>
<setting id="name">The name of an R object to store the imported data in. If you choose an existing symbol name, you will be prompted, whether to overwrite this object.</setting>
- <setting id="sheetname">Specifies the sheet which is imported from a Microsoft EXCEL file</setting>
- <setting id="verbose">Print details as the file is processed</setting>
+ <setting id="sheetname">Specifies the sheet to be imported</setting>
+ <setting id="verbose">Print details as the file is processed. This may be helpful in troubleshooting.</setting>
<setting id="skip">Number of rows to skip at the top</setting>
<setting id="nrows">Maximum number of rows to import</setting>
<setting id="doedit">Whether the object should be opened for editing after it was imported</setting>
- <setting id="header">Whether the first row of the file contains should be interpreted as column names</setting>
+ <setting id="header">Whether the first row of the file should be interpreted as column names</setting>
</settings>
<related>
<ul>
diff --git a/rkward/plugins/00saveload/import/import_xls.xml b/rkward/plugins/00saveload/import/import_xls.xml
index d93cc96..41f5b2c 100644
--- a/rkward/plugins/00saveload/import/import_xls.xml
+++ b/rkward/plugins/00saveload/import/import_xls.xml
@@ -13,14 +13,14 @@
<browser type="file" allow_urls="true" id="file" filter="*.xls *.xlsx" label="File name" />
<input label="Name or number of sheet" id="sheetname" initial="1" size="medium" />
<frame>
- <checkbox id="header" value="TRUE" value_unchecked="FALSE" checked="false" label="Use first row as column names"/>
- <checkbox value_unchecked="FALSE" checked="true" value="TRUE" id="checkname" label="Check syntax of the variables names" />
+ <checkbox id="header" value="TRUE" value_unchecked="FALSE" checked="false" label="Use first row as column names" checked="true"/>
</frame>
- <row>
+ <frame>
<saveobject id="saveto" initial="my.xls.data" label="Object to save to"/>
- <checkbox id="doedit" value="1" value_unchecked="0" label="Edit Object" checked="true" />
- </row>
+ <checkbox id="doedit" label="Open imported data for editing" checked="true" />
+ </frame>
<stretch/>
+ <preview id="preview" active="true" mode="data"/>
</tab>
<tab id="tab_further_options" label="Further Options" >
<column>
@@ -38,9 +38,7 @@
<checkbox value_unchecked="FALSE" checked="false" value="TRUE" id="stripwhite" label="Strip leading and trailing whitespace" />
<frame label="Error handling">
- <row>
- <checkbox id="verbose" value="TRUE" value_unchecked="FALSE" checked="false" label="Print details as the file is processed"/>
- </row>
+ <checkbox id="verbose" value="TRUE" value_unchecked="FALSE" checked="false" label="Print details as the file is processed"/>
</frame>
</column>
</tab>
diff --git a/rkward/plugins/import_export.pluginmap b/rkward/plugins/import_export.pluginmap
index 4e81e60..189cbab 100644
--- a/rkward/plugins/import_export.pluginmap
+++ b/rkward/plugins/import_export.pluginmap
@@ -35,6 +35,12 @@
<component type="standard" id="import_stata" file="import/import_stata.xml" label="Import Stata">
<attribute id="format" value="*.dta" label="Stata data files"/>
</component>
+ <component type="standard" id="import_xls" file="import/import_xls.xml" label="Import Excel files (Perl based)">
+ <attribute id="format" value="*.xls *.xlsx" label="Microsoft Excel files (Perl)"/>
+ </component>
+ <component type="standard" id="import_xls_xlconnect" file="import/import_xls_xlconnect.xml" label="Import Excel files (Java based)">
+ <attribute id="format" value="*.xls *.xlsx" label="Microsoft Excel files (Java)"/>
+ </component>
<component type="standard" id="setworkdir" file="setworkdir.xml" label="Set Working Directory" />
</components>
@@ -46,6 +52,8 @@
<entry component="import_spss"/>
<entry component="import_stata"/>
<entry component="import_csv"/>
+ <entry component="import_xls"/>
+ <entry component="import_xls_xlconnect"/>
</menu>
<entry component="load_r_object" />
<entry component="load_source" />
diff --git a/rkward/plugins/under_development.pluginmap b/rkward/plugins/under_development.pluginmap
index 8c020ee..857caa0 100644
--- a/rkward/plugins/under_development.pluginmap
+++ b/rkward/plugins/under_development.pluginmap
@@ -28,21 +28,12 @@
<component type="standard" id="matrix_test1" file="testing/matrix1.xml" label="Input Matrix Test" />
<component type="standard" id="valueselect_test1" file="testing/valueselect1.xml" label="Valueselector/valueslot/select Test" />
<!-- End -->
-
- <component type="standard" id="import_xls" file="00saveload/import/import_xls.xml" label="Import Excel files (Perl based)">
- <attribute id="format" value="*.xls *.xlsx" label="Microsoft Excel files (Perl)"/>
- </component>
- <component type="standard" id="import_xls_xlconnect" file="00saveload/import/import_xls_xlconnect.xml" label="Import Excel files (Java based)">
- <attribute id="format" value="*.xls *.xlsx" label="Microsoft Excel files (Java)"/>
- </component>
</components>
<hierarchy>
<menu id="file" label="File">
<menu id="import" label="Import">
<menu id="import_format" label="Import format">
- <entry component="import_xls"/>
- <entry component="import_xls_xlconnect"/>
</menu>
</menu>
<menu id="export" label="Export">
diff --git a/tests/import_export_plugins.R b/tests/import_export_plugins.R
index 0ae7840..4444545 100644
--- a/tests/import_export_plugins.R
+++ b/tests/import_export_plugins.R
@@ -67,6 +67,11 @@ suite <- new ("RKTestSuite", id="import_export_plugins",
rk.call.plugin ("rkward::import_xls_xlconnect", autofitcol.state="1", autofitrow.state="1", coltypes.tsv="", doedit.state="0", file.selection="/home/thomas/develop/rkward/tests/import_export_plugins_testfile.xls", header.state="0", range.text="A6:B9", rownames.text="", saveto.objectname="my.xlsx.data", saveto.parent=".GlobalEnv", sheet.text="1", submit.mode="submit")
rk.print (my.xlsx.data)
}, libraries=c("XLConnect"), files=c("../import_export_plugins_testfile.xls", "../import_export_plugins_testfile.xlsx")),
+ new ("RKTest", id="import_xls", call=function () {
+ rk.call.plugin ("rkward::import_xls", doedit.state="0", file.selection="/home/thomas/develop/rkward/tests/import_export_plugins_testfile.xls", fill.state="0", header.state="1", na.text="NA", nrows.text="-1", saveto.objectname="my.xls.data", saveto.parent=".GlobalEnv", sheetname.text="1", skip.text="0", strings_as_factors.string="", stripwhite.state="0", verbose.state="0", submit.mode="submit")
+ rk.print (my.xls.data)
+ # TODO: known bug: does not import numeric columns from xlsx-files correctly
+ }, libraries=c("gdata"), files=c("../import_export_plugins_testfile.xls", "../import_export_plugins_testfile.xlsx")),
new ("RKTest", id="load_source", call=function () {
stopifnot (!exists ("testx", globalenv ()))
diff --git a/tests/import_export_plugins/import_xls.rkcommands.R b/tests/import_export_plugins/import_xls.rkcommands.R
new file mode 100644
index 0000000..6b2629e
--- /dev/null
+++ b/tests/import_export_plugins/import_xls.rkcommands.R
@@ -0,0 +1,15 @@
+local({
+## Prepare
+require (gdata)
+## Compute
+data <- read.xls ("/home/thomas/develop/rkward/tests/import_export_plugins_testfile.xls", sheet="1", header=TRUE, verbose=FALSE, nrows=-1, skip=0, na.string="NA", strip.white = FALSE)
+.GlobalEnv$my.xls.data <- data # assign to globalenv()
+## Print result
+rk.header ("Import Microsoft EXCEL sheet", parameters=list("File name"="/home/thomas/develop/rkward/tests/import_export_plugins_testfile.xls",
+ "Object to save to"="my.xls.data",
+ "Name or number of sheet"="1",
+ "Use first row as column names"="yes",
+ "Number of rows to skip"="0",
+ "Max number of rows to read (-1 for no limit)"="-1",
+ "Character for missing values"="NA"))
+})
diff --git a/tests/import_export_plugins/import_xls.rkout b/tests/import_export_plugins/import_xls.rkout
new file mode 100644
index 0000000..836cf2c
--- /dev/null
+++ b/tests/import_export_plugins/import_xls.rkout
@@ -0,0 +1,104 @@
+<h1>Import Microsoft EXCEL sheet</h1>
+<h2>Parameters</h2>
+<ul><li>File name: /home/thomas/develop/rkward/tests/import_export_plugins_testfile.xls</li>
+<li>Object to save to: my.xls.data</li>
+<li>Name or number of sheet: 1</li>
+<li>Use first row as column names: yes</li>
+<li>Number of rows to skip: 0</li>
+<li>Max number of rows to read (-1 for no limit): -1</li>
+<li>Character for missing values: NA</li>
+</ul>
+DATE<br />
+<h2>Messages, warnings, or errors:</h2>
+<pre class="output_warning">Calling POSIX::isdigit() is deprecated at /home/thomas/.rkward/library/gdata/perl/xls2csv.pl line 118.
+</pre>
+
+
+<p align= center >
+<table cellspacing=0 border=1>
+<caption align=bottom class=captiondataframe></caption>
+<tr><td>
+ <table border=0 class=dataframe>
+ <tbody>
+ <tr class= firstline >
+ <th> </th>
+ <th>First.var </th>
+ <th>Second.var </th>
+ <th>Third.war </th>
+ <th>Gap </th>
+ <th>Fourth.var</th>
+ </tr>
+<tr>
+<td class=firstcolumn>1
+</td>
+<td class=cellinside>5
+</td>
+<td class=cellinside> a
+</td>
+<td class=cellinside>1.1
+</td>
+<td class=cellinside>
+</td>
+<td class=cellinside>This is a test
+</td></tr>
+
+<tr>
+<td class=firstcolumn>2
+</td>
+<td class=cellinside>4
+</td>
+<td class=cellinside>b
+</td>
+<td class=cellinside>1.2
+</td>
+<td class=cellinside>
+</td>
+<td class=cellinside>
+</td></tr>
+
+<tr>
+<td class=firstcolumn>3
+</td>
+<td class=cellinside>3
+</td>
+<td class=cellinside>c
+</td>
+<td class=cellinside>1.3
+</td>
+<td class=cellinside>
+</td>
+<td class=cellinside>This is a test
+</td></tr>
+
+<tr>
+<td class=firstcolumn>4
+</td>
+<td class=cellinside>2
+</td>
+<td class=cellinside>b
+</td>
+<td class=cellinside>1.4
+</td>
+<td class=cellinside>
+</td>
+<td class=cellinside>
+</td></tr>
+
+<tr>
+<td class=firstcolumn>5
+</td>
+<td class=cellinside>1
+</td>
+<td class=cellinside>b
+</td>
+<td class=cellinside>1.6
+</td>
+<td class=cellinside>
+</td>
+<td class=cellinside>This is a test
+</td></tr>
+
+ </tbody>
+</table>
+ </td></table>
+ <br>
More information about the rkward-tracker
mailing list