[rkward] rkward/plugins: Modernize XLS-import plugin a bit, and add a Java-based alternative.

Thomas Friedrichsmeier thomas.friedrichsmeier at ruhr-uni-bochum.de
Thu Jan 14 09:44:45 UTC 2016


Git commit cbd266e5515f32dfd2c38e9e7098cf0dbc7c3c13 by Thomas Friedrichsmeier.
Committed on 14/01/2016 at 09:43.
Pushed by tfry into branch 'master'.

Modernize XLS-import plugin a bit, and add a Java-based alternative.
Both plugins are unfinished. The former still needs a preview, both need testing (both manual and automatic).

M  +2    -6    rkward/plugins/00saveload/import/import_csv.js
M  +5    -4    rkward/plugins/00saveload/import/import_xls.rkh
M  +12   -11   rkward/plugins/00saveload/import/import_xls.xml
A  +61   -0    rkward/plugins/00saveload/import/import_xls_xlconnect.js
A  +52   -0    rkward/plugins/00saveload/import/import_xls_xlconnect.rkh
A  +55   -0    rkward/plugins/00saveload/import/import_xls_xlconnect.xml
M  +6    -2    rkward/plugins/under_development.pluginmap

http://commits.kde.org/rkward/cbd266e5515f32dfd2c38e9e7098cf0dbc7c3c13

diff --git a/rkward/plugins/00saveload/import/import_csv.js b/rkward/plugins/00saveload/import/import_csv.js
index 505e967..c647e1d 100644
--- a/rkward/plugins/00saveload/import/import_csv.js
+++ b/rkward/plugins/00saveload/import/import_csv.js
@@ -1,12 +1,8 @@
 function preview () {
-	doCalculate (true);
+	calculate (true);
 }
 
-function calculate () {
-	doCalculate (false);
-}
-
-function doCalculate (is_preview) {
+function calculate (is_preview) {
 	var tableOptions = "";
 	var quick = getValue ("quick");
 	if (quick == "custom") quick = "table";   // Difference only relevant in UI
diff --git a/rkward/plugins/00saveload/import/import_xls.rkh b/rkward/plugins/00saveload/import/import_xls.rkh
index dd4fbb8..68130dd 100644
--- a/rkward/plugins/00saveload/import/import_xls.rkh
+++ b/rkward/plugins/00saveload/import/import_xls.rkh
@@ -5,8 +5,8 @@ Import a sheet of a Microsoft Excel file into a data frame.
 	</summary>
 
 	<usage>
-Choose the Microsoft Excel file (XLS (Excel 97-2004), XLSX (Excel 2007+)) to import a sheet from. An R object containing the data will be created. See <link href="rkward://rhelp/read.table" />.
-Note: The this plugin requires a properly working Perl setup.  See <link href="rkward://rhelp/read.xls" /> for details.
+Choose the Microsoft Excel file to import a sheet from. An R object containing the data will be created. This method of importing Excel sheets is based on R package "gdata",
+and requires a working Perl setup. In case this does not work on your system, you may want to try the Java-based alternative <link href="rkward://component/import_xls_xlconnect"/>.
 	</usage>
 
 	<settings>
@@ -15,14 +15,15 @@ Note: The this plugin requires a properly working Perl setup.  See <link href="r
 		<setting id="name">The name of an R object to store the imported data in. If you choose an existing symbol name, you will be prompted, whether to overwrite this object.</setting>
 		<setting id="sheetname">Specifies the sheet which is imported from a Microsoft EXCEL file</setting>
 		<setting id="verbose">Print details as the file is processed</setting>
-		<setting id="skip">Defines a number of rows to skip</setting>
-		<setting id="skip">Defines a number of rows to skip</setting>
+		<setting id="skip">Number of rows to skip at the top</setting>
+		<setting id="nrows">Maximum number of rows to import</setting>
 		<setting id="doedit">Whether the object should be opened for editing after it was imported</setting>		
 		<setting id="header">Whether the first row of the file contains should be interpreted as column names</setting>
 	</settings>
 	<related>
 		<ul>
 			<li><link href="rkward://rhelp/read.xls"/></li>
+			<li><link href="rkward://component/import_xls_xlconnect"/></li>
 			<li><link href="rkward://component/import_csv"/></li>
 			<li><link href="rkward://rhelp/read.table"/></li>
 		</ul>
diff --git a/rkward/plugins/00saveload/import/import_xls.xml b/rkward/plugins/00saveload/import/import_xls.xml
index 6d5bb70..fcbb378 100644
--- a/rkward/plugins/00saveload/import/import_xls.xml
+++ b/rkward/plugins/00saveload/import/import_xls.xml
@@ -10,9 +10,12 @@
 	<dialog label="Import Microsoft EXCEL sheet">
 		<tabbook>
 			<tab id="tab_general" label="General">
-				<browser type="file" allow_urls="true" id="file" label="File name" />
-				<input label="Name of sheet" id="sheetname" initial="1" size="medium" />
-				<stretch/>
+				<browser type="file" allow_urls="true" id="file" filter="*.xls *.xlsx" label="File name" />
+				<input label="Name or number of sheet" id="sheetname" initial="1" size="medium" />
+				<frame>
+					<checkbox id="header" value="TRUE" value_unchecked="FALSE" checked="false" label="Use first row as column names"/>
+					<checkbox value_unchecked="FALSE" checked="true" value="TRUE" id="checkname" label="Check syntax of the variables names" />
+				</frame>
 				<row>
 					<saveobject id="saveto" initial="my.xls.data" label="Object to save to"/>
 					<checkbox id="doedit" value="1" value_unchecked="0" label="Edit Object" checked="true" />
@@ -21,20 +24,18 @@
 			</tab>
 			<tab id="tab_further_options" label="Further Options" >
 				<column>
-				  	<checkbox id="header" value="TRUE" value_unchecked="FALSE" checked="false" label="Column names in first row"/>
-					<row>
-					      <input size="small" initial="-1" id="nrows" label="Max number of rows to read (-1 for no limit)" />
-					      <input size="small" initial="-1" id="skip" label="Number of rows to skip" />
-					</row>
+					<frame label="Range" id="frame_range">
+						<input size="small" initial="-1" id="nrows" label="Max number of rows to read (-1 for no limit)" />
+						<input size="small" initial="0" id="skip" label="Number of rows to skip" />
+					</frame>
 					<input size="small" initial="NA" id="na" label="Character for missing values" />
 					<radio id="strings_as_factors" label="Convert character columns to factors" >
 					  <option value=", stringsAsFactors=TRUE" label="Convert to factor" />
 					  <option value="" label="Default" checked="true" />
 					  <option value=", stringsAsFactors=FALSE" label="Do not convert" />
 					</radio>
-					<checkbox id="fill" checked="true" value="TRUE" value_unchecked="FALSE" label="Fill the rows if unequal length" />
-					<checkbox value_unchecked="FALSE" checked="true" value="TRUE" id="checkname" label="Check syntax of the variables names" />
-					<checkbox value_unchecked="FALSE" checked="false" value="TRUE" id="stripwhite" label="Strip white space" />
+					<checkbox id="fill" checked="false" value="TRUE" value_unchecked="FALSE" label="Pad rows if unequal length" />
+					<checkbox value_unchecked="FALSE" checked="false" value="TRUE" id="stripwhite" label="Strip leading and trailing whitespace" />
 		
 					<frame label="Error handling">
 						<row>
diff --git a/rkward/plugins/00saveload/import/import_xls_xlconnect.js b/rkward/plugins/00saveload/import/import_xls_xlconnect.js
new file mode 100644
index 0000000..419d167
--- /dev/null
+++ b/rkward/plugins/00saveload/import/import_xls_xlconnect.js
@@ -0,0 +1,61 @@
+include ("convert_encoding.js");
+
+function preprocess () {
+	doPreprocess (false);
+}
+
+function doPreprocess (is_preview) {
+	if (is_preview) {
+		echo ('if (!base::require (XLConnect)) stop (' + i18n ("Preview not available, because package XLConnect is not installed or cannot be loaded.") + ')\n');
+	} else {
+		echo ('require (XLConnect)\n');
+	}
+	makeEncodingPreprocessCode ();
+}
+
+function preview () {
+	doPreprocess (true);
+	doCalculate (true);
+}
+
+function calculate () {
+	doCalculate (false);
+}
+
+function doCalculate (is_preview) {
+	var options = '';
+
+	var range = getString ("range");
+	if (range) options += ', region=' + quote ("range");
+	else {
+		options += makeOption ("startRow", getString ("startrow")) + makeOption ("startCol", getString ("startcol")) + makeOption ("endRow", getString ("endrow")) + makeOption ("endCol", getString ("endcol"));
+	}
+
+	if (!getBoolean ("autofitrow")) options += ', autoFitRow=FALSE';
+	if (!getBoolean ("autofitcol")) options += ', autoFitCol=FALSE';
+	if (!getBoolean ("header")) options += ', header=FALSE';
+	options += makeOption ("rownames", getString ("rownames"));
+
+	if (getValue ("coltypes.columns") > 0) {
+		options += ', colTypes=c (' + getList ("coltypes.row.0") + ')';
+	}
+
+	echo ('data <- readWorksheetFromFile (' + quote (getString ("file")) + ', sheet=' + getString ("sheet") + options + ')\n');
+	makeEncodingCall ('data');
+	echo ('\n');
+	if (is_preview) {
+		echo ('preview_data <- data[1:min(50,dim(data)[1]),1:min(50,dim(data)[2])]\n');
+	} else {
+		var object = getString ("saveto");
+		echo ('.GlobalEnv$' + object + ' <- data		'); comment ('assign to globalenv()');
+		if (getValue ("doedit")) {
+			echo ('rk.edit (.GlobalEnv$' + object + ')\n');
+		}
+	}
+}
+
+function printout () {
+	new Header (i18n ("Import SPSS data")).addFromUI ("file").addFromUI ("saveto").print ();
+}
+
+ 
diff --git a/rkward/plugins/00saveload/import/import_xls_xlconnect.rkh b/rkward/plugins/00saveload/import/import_xls_xlconnect.rkh
new file mode 100644
index 0000000..502b2c8
--- /dev/null
+++ b/rkward/plugins/00saveload/import/import_xls_xlconnect.rkh
@@ -0,0 +1,52 @@
+<!DOCTYPE rkhelp>
+<document>
+ 	<snippets>
+		<include file="convert_encoding.xml"/>
+	</snippets>
+
+	<summary>
+Import a sheet of a Microsoft Excel file into a data frame.
+	</summary>
+
+	<usage>
+Choose the Microsoft Excel file to import a sheet from. An R object containing the data will be created. This method of importing Excel sheets is based on R package "XLconnect",
+and requires a working Java-setup. In case this does not work on your system, you may want to try the Perl-based alternative <link href="rkward://component/import_xls"/>.
+	</usage>
+
+	<settings>
+		<caption id="tab_general"/>
+		<setting id="file">The filename of the file to import</setting>
+		<setting id="sheet">Numeric index or name/label of the sheet to import from the file. If you specify a name, here, make sure to quote it.</setting>
+		<setting id="header">Whether the first row of data should be used as column headers. Alternatively, columns are named, automatically (by index).</setting>
+		<setting id="rownames">Numeric index or name/label of the row containing row names. If you specify a name, here, make sure to quote it. If you leave this field empty, row-names will
+		                       be assigned automatically (row numbers)</setting>
+		<setting id="saveto">The name of an R object to store the imported data in. If you choose an existing symbol name, you will be prompted, whether to overwrite this object.</setting>
+		<setting id="doedit">Whether the object should be opened for editing after it was imported</setting>
+		<setting id="preview">Show a preview of the imported data. At most the first 50 rows and columns will be shown.</setting>
+		<caption id="tab_rows_columns"/>
+		<caption id="startrc"/>
+		<setting id="startrow">First row of data to import within the sheet. If left to 0 (the default), the start row will be determined, automatically. Alternatively, you can specify a range
+		                       in spreadsheet notation, below.</setting>
+		<setting id="startcol">First column of data to import within the sheet. See <label id="startrow"/>, above.</setting>
+		<caption id="endrc"/>
+		<setting id="endrow">Last row of data to import within the sheet. See <label id="startrow"/>, above.</setting>
+		<setting id="endcol">Last column of data to import within the sheet. See <label id="startrow"/>, above.</setting>
+		<setting id="range">Specify the range to import within the sheet in spreadsheet notation (e.g. "A10:B20").</setting>
+		<setting id="autofitrow">If checked, empty leading and trailing rows will be omitted from the imported data.</setting>
+		<setting id="autofitcol">If checked, empty leading and trailing columns will be omitted from the imported data.</setting>
+		<setting id="coltypes">Allows you to override the automatic data type detection. Specify R data types such as "character" or "numeric", here. Any specification you make here will be applied
+		                       to <b>all</b> imported columns (the specification will be "recycled", if necessary). To use automatic data type detection, leave this control empty, entirely. Also
+		                       note that most common type conversions can also be done <i>after</i> the data has been imported.</setting>
+
+		<insert snippet="encoding_doc"/>
+	</settings>
+	<related>
+		<ul>
+			<li><link href="rkward://rhelp/read.xls"/></li>
+			<li><link href="rkward://component/import_xls"/></li>
+			<li><link href="rkward://component/import_csv"/></li>
+			<li><link href="rkward://rhelp/read.table"/></li>
+		</ul>
+	</related>
+</document>
+ 
diff --git a/rkward/plugins/00saveload/import/import_xls_xlconnect.xml b/rkward/plugins/00saveload/import/import_xls_xlconnect.xml
new file mode 100644
index 0000000..71ac9e8
--- /dev/null
+++ b/rkward/plugins/00saveload/import/import_xls_xlconnect.xml
@@ -0,0 +1,55 @@
+<!DOCTYPE rkplugin>
+<document>
+	<code file="import_xls_xlconnect.js" />
+	<help file="import_xls_xlconnect.rkh" />
+	<snippets>
+		<include file="convert_encoding.xml"/>
+	</snippets>
+	<logic>
+		<convert id="range_empty" mode="equals" sources="range" standard=""/>
+		<connect governor="range_empty" client="startrc.enabled"/>
+		<connect governor="range_empty" client="endrc.enabled"/>
+	</logic>
+	<dialog label="Import Microsoft EXCEL sheet">
+		<tabbook>
+			<tab id="tab_general" label="General">
+				<browser type="file" id="file" filter="*.xls *.xlsx" label="File name" />
+				<input label="Index or "name" of sheet" id="sheet" initial="1" size="small" />
+				<frame>
+					<checkbox id="header" value="TRUE" value_unchecked="FALSE" checked="false" label="Use first row as column names"/>
+					<input id="rownames" initial="" size="small" label="Index or "name" of column with row names (if any)"/>
+				</frame>
+				<frame>
+					<saveobject id="saveto" initial="my.xls.data" label="Object to save to"/>
+					<checkbox id="doedit" value="1" value_unchecked="0" label="Open imported data for editing" checked="true" />
+				</frame>
+				<stretch/>
+				<preview id="preview" active="true" mode="data" label="Preview"/>
+			</tab>
+			<tab id="tab_rows_columns" label="Rows and columns">
+				<frame label="Range">
+					<row>
+						<frame label="Start row / column ("0" for automatic)" id="startrc">
+							<spinbox id="startrow" initial="0" min="0" type="integer" label="Row"/>
+							<spinbox id="startcol" initial="0" min="0" type="integer" label="Column"/>
+						</frame>
+						<frame label="End row / column ("0" for automatic)" id="endrc">
+							<spinbox id="endrow" initial="0" min="0" type="integer" label="Row"/>
+							<spinbox id="endcol" initial="0" min="0" type="integer" label="Column"/>
+						</frame>
+					</row>
+					<row><text>- <b>or</b> Excel Range (e.g. "A10:B20") -</text></row>
+					<input id="range" label="" size="small" initial="" />
+				</frame>
+				<frame>
+					<checkbox id="autofitrow" label="Skip empty leading or trailing rows" checked="true"/>
+					<checkbox id="autofitcol" label="Skip empty leading or trailing columns" checked="true"/>
+				</frame>
+				<matrix id="coltypes" label="Column classes (empty for automatic)" mode="string" allow_user_resize_columns="true" allow_user_resize_rows="false" rows="1" fixed_height="true" vert_headers=""/>
+				<stretch/>
+			</tab>
+			<insert snippet="encoding_logic"/>
+		</tabbook>
+	</dialog>
+</document>
+ 
diff --git a/rkward/plugins/under_development.pluginmap b/rkward/plugins/under_development.pluginmap
index 319f68a..8c020ee 100644
--- a/rkward/plugins/under_development.pluginmap
+++ b/rkward/plugins/under_development.pluginmap
@@ -29,8 +29,11 @@
 		<component type="standard" id="valueselect_test1" file="testing/valueselect1.xml" label="Valueselector/valueslot/select Test" />
 <!-- End -->
 
-		<component type="standard" id="import_xls" file="00saveload/import/import_xls.xml" label="Import MS EXCEL">
-			<attribute id="format" value="*.xls *.xlsx" label="MS EXCEL sheet"/>
+		<component type="standard" id="import_xls" file="00saveload/import/import_xls.xml" label="Import Excel files (Perl based)">
+			<attribute id="format" value="*.xls *.xlsx" label="Microsoft Excel files (Perl)"/>
+		</component>
+		<component type="standard" id="import_xls_xlconnect" file="00saveload/import/import_xls_xlconnect.xml" label="Import Excel files (Java based)">
+			<attribute id="format" value="*.xls *.xlsx" label="Microsoft Excel files (Java)"/>
 		</component>
 	</components>
 
@@ -39,6 +42,7 @@
 			<menu id="import" label="Import">
 				<menu id="import_format" label="Import format">
 					<entry component="import_xls"/>
+					<entry component="import_xls_xlconnect"/>
 				</menu>
 			</menu>
 			<menu id="export" label="Export">



More information about the rkward-tracker mailing list