[rkward] /: Fix some quirks in gdata-based XLS import plugin, and add automated test.

Thomas Friedrichsmeier thomas.friedrichsmeier at ruhr-uni-bochum.de
Thu Jan 14 21:21:07 UTC 2016


Git commit d63feeaf84bfec28586eeb265deaf50a43dbc9e9 by Thomas Friedrichsmeier.
Committed on 14/01/2016 at 21:20.
Pushed by tfry into branch 'master'.

Fix some quirks in gdata-based XLS import plugin, and add automated test.

Activate both XLS-import plugins (move to import_export.pluginmap)

M  +1    -0    ChangeLog
M  +23   -9    rkward/plugins/00saveload/import/import_xls.js
M  +5    -4    rkward/plugins/00saveload/import/import_xls.rkh
M  +6    -8    rkward/plugins/00saveload/import/import_xls.xml
M  +8    -0    rkward/plugins/import_export.pluginmap
M  +0    -9    rkward/plugins/under_development.pluginmap
M  +5    -0    tests/import_export_plugins.R
A  +15   -0    tests/import_export_plugins/import_xls.rkcommands.R
A  +104  -0    tests/import_export_plugins/import_xls.rkout

http://commits.kde.org/rkward/d63feeaf84bfec28586eeb265deaf50a43dbc9e9

diff --git a/ChangeLog b/ChangeLog
index 5d68142..96dd806 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,4 @@
+- Add plugins for importing Excel files (.xls and .xlsx, Perl- and Java-based)
 - Add ability to extract <matrix> values row-wise in plugins
 - Add convenience JS-function "makeOption()" for use in plugins
 - Fixed: Numerical (display) precision setting was not honored in data editor
diff --git a/rkward/plugins/00saveload/import/import_xls.js b/rkward/plugins/00saveload/import/import_xls.js
index 208c623..572039d 100644
--- a/rkward/plugins/00saveload/import/import_xls.js
+++ b/rkward/plugins/00saveload/import/import_xls.js
@@ -1,8 +1,18 @@
-function preprocess () {
-	echo ('require (gdata)\n');
+function preview () {
+	preprocess (true);
+	calculate (true);
+	// printout ();
 }
 
-function calculate () {
+function preprocess (is_preview) {
+	if (is_preview) {
+		echo ('if (!base::require (gdata)) stop (' + i18n ("Preview not available, because package gdata is not installed or cannot be loaded.") + ')\n');
+	} else {
+		echo ('require (gdata)\n');
+	}
+}
+
+function calculate (is_preview) {
 	var header = getValue ("header");
 	var verbose = getValue ("verbose");
 	var sheet = getValue ("sheetname");
@@ -10,16 +20,20 @@ function calculate () {
 	var quote_char = getValue ("quote");
 	if (quote_char == "other") quote_char = quote (getValue ("custom_quote"));
 
-	var options = ", header=" + header + ", quote=" + quote_char + ", verbose=" + verbose;
+	var options = ", header=" + header + makeOption ("quote", quote_char) + ", verbose=" + verbose;
 
 	echo ('data <- read.xls ("' + getValue ("file") + '", sheet="' + sheet + '"' + options + ', ');
 	echo (' nrows=' + getValue ("nrows") + ', skip=' + getValue ("skip") + ', na.string="'+ getValue ("na") +'"' + getValue("strings_as_factors") + 
-	      ', check.names = ' + getValue("checkname") + ', strip.white = ' + getValue("stripwhite") + ')\n');
+	      ', strip.white = ' + getValue("stripwhite") + ')\n');
 
-	var object = getValue ("saveto");
-	echo ('.GlobalEnv$' + object + ' <- data		'); comment ('assign to globalenv()');
-	if (getValue ("doedit") ) {
-		echo ('rk.edit (.GlobalEnv$' + object + ')\n');
+	if (is_preview) {
+		echo ('preview_data <- data[1:min(50,dim(data)[1]),1:min(50,dim(data)[2])]\n');
+	} else {
+		var object = getValue ("saveto");
+		echo ('.GlobalEnv$' + object + ' <- data		'); comment ('assign to globalenv()');
+		if (getValue ("doedit") ) {
+			echo ('rk.edit (.GlobalEnv$' + object + ')\n');
+		}
 	}
 }
 
diff --git a/rkward/plugins/00saveload/import/import_xls.rkh b/rkward/plugins/00saveload/import/import_xls.rkh
index 68130dd..441746e 100644
--- a/rkward/plugins/00saveload/import/import_xls.rkh
+++ b/rkward/plugins/00saveload/import/import_xls.rkh
@@ -6,19 +6,20 @@ Import a sheet of a Microsoft Excel file into a data frame.
 
 	<usage>
 Choose the Microsoft Excel file to import a sheet from. An R object containing the data will be created. This method of importing Excel sheets is based on R package "gdata",
-and requires a working Perl setup. In case this does not work on your system, you may want to try the Java-based alternative <link href="rkward://component/import_xls_xlconnect"/>.
+and requires a working Perl setup. In case this does not work on your system, or does not produce the expected results, you may want to try the Java-based alternative
+<link href="rkward://component/import_xls_xlconnect"/>.
 	</usage>
 
 	<settings>
 		<caption id="tab_general"/>
 		<setting id="file">The filename of the file to import</setting>
 		<setting id="name">The name of an R object to store the imported data in. If you choose an existing symbol name, you will be prompted, whether to overwrite this object.</setting>
-		<setting id="sheetname">Specifies the sheet which is imported from a Microsoft EXCEL file</setting>
-		<setting id="verbose">Print details as the file is processed</setting>
+		<setting id="sheetname">Specifies the sheet to be imported</setting>
+		<setting id="verbose">Print details as the file is processed. This may be helpful in troubleshooting.</setting>
 		<setting id="skip">Number of rows to skip at the top</setting>
 		<setting id="nrows">Maximum number of rows to import</setting>
 		<setting id="doedit">Whether the object should be opened for editing after it was imported</setting>		
-		<setting id="header">Whether the first row of the file contains should be interpreted as column names</setting>
+		<setting id="header">Whether the first row of the file should be interpreted as column names</setting>
 	</settings>
 	<related>
 		<ul>
diff --git a/rkward/plugins/00saveload/import/import_xls.xml b/rkward/plugins/00saveload/import/import_xls.xml
index d93cc96..41f5b2c 100644
--- a/rkward/plugins/00saveload/import/import_xls.xml
+++ b/rkward/plugins/00saveload/import/import_xls.xml
@@ -13,14 +13,14 @@
 				<browser type="file" allow_urls="true" id="file" filter="*.xls *.xlsx" label="File name" />
 				<input label="Name or number of sheet" id="sheetname" initial="1" size="medium" />
 				<frame>
-					<checkbox id="header" value="TRUE" value_unchecked="FALSE" checked="false" label="Use first row as column names"/>
-					<checkbox value_unchecked="FALSE" checked="true" value="TRUE" id="checkname" label="Check syntax of the variables names" />
+					<checkbox id="header" value="TRUE" value_unchecked="FALSE" checked="false" label="Use first row as column names" checked="true"/>
 				</frame>
-				<row>
+				<frame>
 					<saveobject id="saveto" initial="my.xls.data" label="Object to save to"/>
-					<checkbox id="doedit" value="1" value_unchecked="0" label="Edit Object" checked="true" />
-				</row>
+					<checkbox id="doedit" label="Open imported data for editing" checked="true" />
+				</frame>
 				<stretch/>
+				<preview id="preview" active="true" mode="data"/>
 			</tab>
 			<tab id="tab_further_options" label="Further Options" >
 				<column>
@@ -38,9 +38,7 @@
 					<checkbox value_unchecked="FALSE" checked="false" value="TRUE" id="stripwhite" label="Strip leading and trailing whitespace" />
 		
 					<frame label="Error handling">
-						<row>
-						      <checkbox id="verbose" value="TRUE" value_unchecked="FALSE" checked="false" label="Print details as the file is processed"/>
-						</row>
+						<checkbox id="verbose" value="TRUE" value_unchecked="FALSE" checked="false" label="Print details as the file is processed"/>
 					</frame>
 				</column>
 			</tab>
diff --git a/rkward/plugins/import_export.pluginmap b/rkward/plugins/import_export.pluginmap
index 4e81e60..189cbab 100644
--- a/rkward/plugins/import_export.pluginmap
+++ b/rkward/plugins/import_export.pluginmap
@@ -35,6 +35,12 @@
 		<component type="standard" id="import_stata" file="import/import_stata.xml" label="Import Stata">
 			<attribute id="format" value="*.dta" label="Stata data files"/>
 		</component>
+		<component type="standard" id="import_xls" file="import/import_xls.xml" label="Import Excel files (Perl based)">
+			<attribute id="format" value="*.xls *.xlsx" label="Microsoft Excel files (Perl)"/>
+		</component>
+		<component type="standard" id="import_xls_xlconnect" file="import/import_xls_xlconnect.xml" label="Import Excel files (Java based)">
+			<attribute id="format" value="*.xls *.xlsx" label="Microsoft Excel files (Java)"/>
+		</component>
 
 		<component type="standard" id="setworkdir" file="setworkdir.xml" label="Set Working Directory" />
 	</components>
@@ -46,6 +52,8 @@
 					<entry component="import_spss"/>
 					<entry component="import_stata"/>
 					<entry component="import_csv"/>
+					<entry component="import_xls"/>
+					<entry component="import_xls_xlconnect"/>
 				</menu>
 				<entry component="load_r_object" />
 				<entry component="load_source" />
diff --git a/rkward/plugins/under_development.pluginmap b/rkward/plugins/under_development.pluginmap
index 8c020ee..857caa0 100644
--- a/rkward/plugins/under_development.pluginmap
+++ b/rkward/plugins/under_development.pluginmap
@@ -28,21 +28,12 @@
 		<component type="standard" id="matrix_test1" file="testing/matrix1.xml" label="Input Matrix Test" />
 		<component type="standard" id="valueselect_test1" file="testing/valueselect1.xml" label="Valueselector/valueslot/select Test" />
 <!-- End -->
-
-		<component type="standard" id="import_xls" file="00saveload/import/import_xls.xml" label="Import Excel files (Perl based)">
-			<attribute id="format" value="*.xls *.xlsx" label="Microsoft Excel files (Perl)"/>
-		</component>
-		<component type="standard" id="import_xls_xlconnect" file="00saveload/import/import_xls_xlconnect.xml" label="Import Excel files (Java based)">
-			<attribute id="format" value="*.xls *.xlsx" label="Microsoft Excel files (Java)"/>
-		</component>
 	</components>
 
 	<hierarchy>
 		<menu id="file" label="File">
 			<menu id="import" label="Import">
 				<menu id="import_format" label="Import format">
-					<entry component="import_xls"/>
-					<entry component="import_xls_xlconnect"/>
 				</menu>
 			</menu>
 			<menu id="export" label="Export">
diff --git a/tests/import_export_plugins.R b/tests/import_export_plugins.R
index 0ae7840..4444545 100644
--- a/tests/import_export_plugins.R
+++ b/tests/import_export_plugins.R
@@ -67,6 +67,11 @@ suite <- new ("RKTestSuite", id="import_export_plugins",
 			rk.call.plugin ("rkward::import_xls_xlconnect", autofitcol.state="1", autofitrow.state="1", coltypes.tsv="", doedit.state="0", file.selection="/home/thomas/develop/rkward/tests/import_export_plugins_testfile.xls", header.state="0", range.text="A6:B9", rownames.text="", saveto.objectname="my.xlsx.data", saveto.parent=".GlobalEnv", sheet.text="1", submit.mode="submit")
 			rk.print (my.xlsx.data)
 		}, libraries=c("XLConnect"), files=c("../import_export_plugins_testfile.xls", "../import_export_plugins_testfile.xlsx")),
+		new ("RKTest", id="import_xls", call=function () {
+			rk.call.plugin ("rkward::import_xls", doedit.state="0", file.selection="/home/thomas/develop/rkward/tests/import_export_plugins_testfile.xls", fill.state="0", header.state="1", na.text="NA", nrows.text="-1", saveto.objectname="my.xls.data", saveto.parent=".GlobalEnv", sheetname.text="1", skip.text="0", strings_as_factors.string="", stripwhite.state="0", verbose.state="0", submit.mode="submit")
+			rk.print (my.xls.data)
+			# TODO: known bug: does not import numeric columns from xlsx-files correctly
+		}, libraries=c("gdata"), files=c("../import_export_plugins_testfile.xls", "../import_export_plugins_testfile.xlsx")),
 		new ("RKTest", id="load_source", call=function () {
 			stopifnot (!exists ("testx", globalenv ()))
 
diff --git a/tests/import_export_plugins/import_xls.rkcommands.R b/tests/import_export_plugins/import_xls.rkcommands.R
new file mode 100644
index 0000000..6b2629e
--- /dev/null
+++ b/tests/import_export_plugins/import_xls.rkcommands.R
@@ -0,0 +1,15 @@
+local({
+## Prepare
+require (gdata)
+## Compute
+data <- read.xls ("/home/thomas/develop/rkward/tests/import_export_plugins_testfile.xls", sheet="1", header=TRUE, verbose=FALSE,  nrows=-1, skip=0, na.string="NA", strip.white = FALSE)
+.GlobalEnv$my.xls.data <- data		# assign to globalenv()
+## Print result
+rk.header ("Import Microsoft EXCEL sheet", parameters=list("File name"="/home/thomas/develop/rkward/tests/import_export_plugins_testfile.xls",
+	"Object to save to"="my.xls.data",
+	"Name or number of sheet"="1",
+	"Use first row as column names"="yes",
+	"Number of rows to skip"="0",
+	"Max number of rows to read (-1 for no limit)"="-1",
+	"Character for missing values"="NA"))
+})
diff --git a/tests/import_export_plugins/import_xls.rkout b/tests/import_export_plugins/import_xls.rkout
new file mode 100644
index 0000000..836cf2c
--- /dev/null
+++ b/tests/import_export_plugins/import_xls.rkout
@@ -0,0 +1,104 @@
+<h1>Import Microsoft EXCEL sheet</h1>
+<h2>Parameters</h2>
+<ul><li>File name: /home/thomas/develop/rkward/tests/import_export_plugins_testfile.xls</li>
+<li>Object to save to: my.xls.data</li>
+<li>Name or number of sheet: 1</li>
+<li>Use first row as column names: yes</li>
+<li>Number of rows to skip: 0</li>
+<li>Max number of rows to read (-1 for no limit): -1</li>
+<li>Character for missing values: NA</li>
+</ul>
+DATE<br />
+<h2>Messages, warnings, or errors:</h2>
+<pre class="output_warning">Calling POSIX::isdigit() is deprecated at /home/thomas/.rkward/library/gdata/perl/xls2csv.pl line 118.
+</pre>
+
+
+<p align= center >
+<table cellspacing=0 border=1>
+<caption align=bottom class=captiondataframe></caption>
+<tr><td>
+	<table border=0 class=dataframe>
+	<tbody> 
+	<tr class= firstline > 
+		<th>   </th>
+		<th>First.var  </th>
+		<th>Second.var  </th>
+		<th>Third.war  </th>
+		<th>Gap  </th>
+		<th>Fourth.var</th> 
+	</tr> 
+<tr> 
+<td class=firstcolumn>1
+</td>
+<td class=cellinside>5
+</td>
+<td class=cellinside>  a  
+</td>
+<td class=cellinside>1.1
+</td>
+<td class=cellinside> 
+</td>
+<td class=cellinside>This is a test
+</td></tr>
+ 
+<tr> 
+<td class=firstcolumn>2
+</td>
+<td class=cellinside>4
+</td>
+<td class=cellinside>b
+</td>
+<td class=cellinside>1.2
+</td>
+<td class=cellinside> 
+</td>
+<td class=cellinside>
+</td></tr>
+ 
+<tr> 
+<td class=firstcolumn>3
+</td>
+<td class=cellinside>3
+</td>
+<td class=cellinside>c
+</td>
+<td class=cellinside>1.3
+</td>
+<td class=cellinside> 
+</td>
+<td class=cellinside>This is a test
+</td></tr>
+ 
+<tr> 
+<td class=firstcolumn>4
+</td>
+<td class=cellinside>2
+</td>
+<td class=cellinside>b
+</td>
+<td class=cellinside>1.4
+</td>
+<td class=cellinside> 
+</td>
+<td class=cellinside>
+</td></tr>
+ 
+<tr> 
+<td class=firstcolumn>5
+</td>
+<td class=cellinside>1
+</td>
+<td class=cellinside>b
+</td>
+<td class=cellinside>1.6
+</td>
+<td class=cellinside> 
+</td>
+<td class=cellinside>This is a test
+</td></tr>
+ 
+	</tbody>
+</table>
+ </td></table>
+ <br>



More information about the rkward-tracker mailing list