[rkward-cvs] SF.net SVN: rkward:[4063] trunk/rkward/rkward/plugins/00saveload/import

sjar at users.sourceforge.net sjar at users.sourceforge.net
Thu Dec 1 23:26:52 UTC 2011


Revision: 4063
          http://rkward.svn.sourceforge.net/rkward/?rev=4063&view=rev
Author:   sjar
Date:     2011-12-01 23:26:52 +0000 (Thu, 01 Dec 2011)
Log Message:
-----------
first working plugin of XLS/XLSX import for RKWard

It was created as independent plugin instead to hook it in an existing because:
+ requires yet another package (gdata)
+ has a rather slow import speed (somewhat CPU hungry): XLS -> perl script -> read.x -> data.frame
   #e.g., reading 3000 lines and 256 columns took 100% CPU usage, circa 4 min and 350 mb on my machine (2x Intel Atom N270 @ clocked at [ 1333.000 MHz ])
   # therefore no multiple sheet import is/will be implemented
+ dependence of a working Perl installation
+ it is read from more or less binary blobs
+ ...

Todo:
+ find a way to make the Perl path user definable (i.e. if it is located on bizarre places) and as such persistent (@ Thomas hope you know what I mean)
+ give users the option to insert a sheet name instead of the number
+ test on MS Windows
+ add further warning for large data sets

Added Paths:
-----------
    trunk/rkward/rkward/plugins/00saveload/import/import_xls.js
    trunk/rkward/rkward/plugins/00saveload/import/import_xls.rkh
    trunk/rkward/rkward/plugins/00saveload/import/import_xls.xml

Added: trunk/rkward/rkward/plugins/00saveload/import/import_xls.js
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_xls.js	                        (rev 0)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_xls.js	2011-12-01 23:26:52 UTC (rev 4063)
@@ -0,0 +1,35 @@
+function preprocess () {
+	echo ('require (gdata)\n');
+}
+
+function calculate () {
+	var options = "";
+	
+	var sheet = getValue ("sheet");
+	var header = getValue ("header");
+	var verbose = getValue ("verbose");
+	
+	var quote_char = getValue ("quote");
+	if (quote_char == "other") quote_char = quote (getValue ("custom_quote"));
+		
+	options = ", sheet=" + sheet + ", header=" + header + ", verbose=" + verbose;
+
+	var object = getValue ("saveto");
+
+	echo ('data <- read.xls ("' + getValue ("file") + '"' + options + ', ');
+	echo (' nrows=' + getValue ("nrows") + ', skip=' + getValue ("skip") + ', na.string="'+ getValue ("na") +'"' + getValue("strings_as_factors") + 
+	      ', check.names = ' + getValue("checkname") + ', strip.white = ' + getValue("stripwhite") + ')\n');
+	echo ('.GlobalEnv$' + object + ' <- data		# assign to globalenv()\n');
+	if (getValue ("doedit") ) {
+		echo ('rk.edit (.GlobalEnv$' + object + ')\n');
+	}
+}
+
+function printout () {
+	makeHeaderCode ("Import Microsoft EXCEL sheet", new Array("File", getValue ("file"), "Imported to", getValue ("saveto"), 
+								  "Imported Sheet", getValue ("sheet"), "First row as header", getValue ("header"), 
+								  "Max number of rows to skip (-1 for no limit)", getValue ("skip"),
+								  "Number of lines to read (-1 for no limit)", getValue ("nrows"),
+								  "Character for missing values", getValue ("na")));
+}
+

Added: trunk/rkward/rkward/plugins/00saveload/import/import_xls.rkh
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_xls.rkh	                        (rev 0)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_xls.rkh	2011-12-01 23:26:52 UTC (rev 4063)
@@ -0,0 +1,30 @@
+<!DOCTYPE rkhelp>
+<document>
+	<summary>
+Import a sheet of a Microsoft Excel file into a data frame.
+	</summary>
+
+	<usage>
+Choose the Microsoft Excel file (XLS (Excel 97-2004), XLSX (Excel 2007+)) to import a sheet from. An R object containing the data will be created. See <link href="rkward://rhelp/read.table" />.
+Note: The this plugin requires a properly working Perl setup.  See <link href="rkward://rhelp/read.xls" /> for details.
+	</usage>
+
+	<settings>
+		<caption id="tab_general"/>
+		<setting id="file">The filename of the file to import</setting>
+		<setting id="name">The name of an R object to store the imported data in. If you chose an existing symbol name, you will be prompted, whether to overwrite this object.</setting>
+		<setting id="sheet">Specifies the sheet which is imported from a Microsoft EXCEL file</setting>
+		<setting id="verbose">Print details as the file is processed</setting>
+		<setting id="skip">Defines a number of rows to skip</setting>
+		<setting id="skip">Defines a number of rows to skip</setting>
+		<setting id="doedit">Whether the object should be opened for editing after it was imported</setting>		
+		<setting id="header">Whether the first row of the file contains should be interpreted as column names</setting>
+	</settings>
+	<related>
+		<ul>
+			<li><link href="rkward://rhelp/read.xls"/></li>
+			<li><link href="rkward://component/import_csv"/></li>
+			<li><link href="rkward://rhelp/read.table"/></li>
+		</ul>
+	</related>
+</document>

Added: trunk/rkward/rkward/plugins/00saveload/import/import_xls.xml
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_xls.xml	                        (rev 0)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_xls.xml	2011-12-01 23:26:52 UTC (rev 4063)
@@ -0,0 +1,48 @@
+<!DOCTYPE rkplugin>
+<document>
+	<code file="import_xls.js" />
+	<help file="import_xls.rkh" />
+
+	<logic>
+		<external id="filename"/>
+		<connect governor="filename" client="file.selection"/>
+	</logic>
+	<dialog label="Import Microsoft EXCEL sheet">
+		<tabbook>
+			<tab id="tab_general" label="General">
+				<browser type="file" allow_urls="true" id="file" label="File name" />
+				<stretch/>
+				<row>
+					<saveobject id="saveto" initial="my.xls.data" label="Object to save to"/>
+					<checkbox id="doedit" value="1" value_unchecked="0" label="Edit Object" checked="true" />
+				</row>
+				<stretch/>
+				<frame label="Options">
+		  		      	<spinbox type="integer" id="sheet" label="Sheet within the Excel file from which data are to be read" min="1" initial="1"/>
+				</frame>
+			</tab>
+			<tab id="tab_further_options" label="Further Options" >
+				<column>
+				  	<checkbox id="header" value="TRUE" value_unchecked="FALSE" checked="false" label="Column names in first row"/>
+					<input size="small" initial="-1" id="nrows" label="Max number of rows to read (-1 for no limit)" />
+					<input size="small" initial="-1" id="skip" label="Number of rows to skip" />
+					<input size="small" initial="NA" id="na" label="Character for missing values" />
+					<radio id="strings_as_factors" label="Convert character columns to factors" >
+					  <option value=", stringsAsFactors=TRUE" label="Convert to factor" />
+					  <option value="" label="Default" checked="true" />
+					  <option value=", stringsAsFactors=FALSE" label="Do not convert" />
+					</radio>
+					<checkbox id="fill" checked="false" value="TRUE" value_unchecked="FALSE" label="Fill the rows if unequal length" />
+					<checkbox value_unchecked="FALSE" checked="true" value="TRUE" id="checkname" label="Check syntax of the variables names" />
+					<checkbox value_unchecked="FALSE" checked="false" value="TRUE" id="stripwhite" label="Strip white values" />
+		
+					<frame label="Error handling">
+						<row>
+						      <checkbox id="verbose" value="TRUE" value_unchecked="FALSE" checked="false" label="Print details as the file is processed"/>
+						</row>
+					</frame>
+				</column>
+			</tab>
+		</tabbook>
+	</dialog>
+</document>

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.





More information about the rkward-tracker mailing list