[rkward-cvs] SF.net SVN: rkward:[4063] trunk/rkward/rkward/plugins/00saveload/import
sjar at users.sourceforge.net
sjar at users.sourceforge.net
Thu Dec 1 23:26:52 UTC 2011
Revision: 4063
http://rkward.svn.sourceforge.net/rkward/?rev=4063&view=rev
Author: sjar
Date: 2011-12-01 23:26:52 +0000 (Thu, 01 Dec 2011)
Log Message:
-----------
first working plugin of XLS/XLSX import for RKWard
It was created as independent plugin instead to hook it in an existing because:
+ requires yet another package (gdata)
+ has a rather slow import speed (somewhat CPU hungry): XLS -> perl script -> read.x -> data.frame
#e.g., reading 3000 lines and 256 columns took 100% CPU usage, circa 4 min and 350 mb on my machine (2x Intel Atom N270 @ clocked at [ 1333.000 MHz ])
# therefore no multiple sheet import is/will be implemented
+ dependence of a working Perl installation
+ it is read from more or less binary blobs
+ ...
Todo:
+ find a way to make the Perl path user definable (i.e. if it is located on bizarre places) and as such persistent (@ Thomas hope you know what I mean)
+ give users the option to insert a sheet name instead of the number
+ test on MS Windows
+ add further warning for large data sets
Added Paths:
-----------
trunk/rkward/rkward/plugins/00saveload/import/import_xls.js
trunk/rkward/rkward/plugins/00saveload/import/import_xls.rkh
trunk/rkward/rkward/plugins/00saveload/import/import_xls.xml
Added: trunk/rkward/rkward/plugins/00saveload/import/import_xls.js
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_xls.js (rev 0)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_xls.js 2011-12-01 23:26:52 UTC (rev 4063)
@@ -0,0 +1,35 @@
+function preprocess () {
+ echo ('require (gdata)\n');
+}
+
+function calculate () {
+ var options = "";
+
+ var sheet = getValue ("sheet");
+ var header = getValue ("header");
+ var verbose = getValue ("verbose");
+
+ var quote_char = getValue ("quote");
+ if (quote_char == "other") quote_char = quote (getValue ("custom_quote"));
+
+ options = ", sheet=" + sheet + ", header=" + header + ", verbose=" + verbose;
+
+ var object = getValue ("saveto");
+
+ echo ('data <- read.xls ("' + getValue ("file") + '"' + options + ', ');
+ echo (' nrows=' + getValue ("nrows") + ', skip=' + getValue ("skip") + ', na.string="'+ getValue ("na") +'"' + getValue("strings_as_factors") +
+ ', check.names = ' + getValue("checkname") + ', strip.white = ' + getValue("stripwhite") + ')\n');
+ echo ('.GlobalEnv$' + object + ' <- data # assign to globalenv()\n');
+ if (getValue ("doedit") ) {
+ echo ('rk.edit (.GlobalEnv$' + object + ')\n');
+ }
+}
+
+function printout () {
+ makeHeaderCode ("Import Microsoft EXCEL sheet", new Array("File", getValue ("file"), "Imported to", getValue ("saveto"),
+ "Imported Sheet", getValue ("sheet"), "First row as header", getValue ("header"),
+ "Max number of rows to skip (-1 for no limit)", getValue ("skip"),
+ "Number of lines to read (-1 for no limit)", getValue ("nrows"),
+ "Character for missing values", getValue ("na")));
+}
+
Added: trunk/rkward/rkward/plugins/00saveload/import/import_xls.rkh
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_xls.rkh (rev 0)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_xls.rkh 2011-12-01 23:26:52 UTC (rev 4063)
@@ -0,0 +1,30 @@
+<!DOCTYPE rkhelp>
+<document>
+ <summary>
+Import a sheet of a Microsoft Excel file into a data frame.
+ </summary>
+
+ <usage>
+Choose the Microsoft Excel file (XLS (Excel 97-2004), XLSX (Excel 2007+)) to import a sheet from. An R object containing the data will be created. See <link href="rkward://rhelp/read.table" />.
+Note: The this plugin requires a properly working Perl setup. See <link href="rkward://rhelp/read.xls" /> for details.
+ </usage>
+
+ <settings>
+ <caption id="tab_general"/>
+ <setting id="file">The filename of the file to import</setting>
+ <setting id="name">The name of an R object to store the imported data in. If you chose an existing symbol name, you will be prompted, whether to overwrite this object.</setting>
+ <setting id="sheet">Specifies the sheet which is imported from a Microsoft EXCEL file</setting>
+ <setting id="verbose">Print details as the file is processed</setting>
+ <setting id="skip">Defines a number of rows to skip</setting>
+ <setting id="skip">Defines a number of rows to skip</setting>
+ <setting id="doedit">Whether the object should be opened for editing after it was imported</setting>
+ <setting id="header">Whether the first row of the file contains should be interpreted as column names</setting>
+ </settings>
+ <related>
+ <ul>
+ <li><link href="rkward://rhelp/read.xls"/></li>
+ <li><link href="rkward://component/import_csv"/></li>
+ <li><link href="rkward://rhelp/read.table"/></li>
+ </ul>
+ </related>
+</document>
Added: trunk/rkward/rkward/plugins/00saveload/import/import_xls.xml
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_xls.xml (rev 0)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_xls.xml 2011-12-01 23:26:52 UTC (rev 4063)
@@ -0,0 +1,48 @@
+<!DOCTYPE rkplugin>
+<document>
+ <code file="import_xls.js" />
+ <help file="import_xls.rkh" />
+
+ <logic>
+ <external id="filename"/>
+ <connect governor="filename" client="file.selection"/>
+ </logic>
+ <dialog label="Import Microsoft EXCEL sheet">
+ <tabbook>
+ <tab id="tab_general" label="General">
+ <browser type="file" allow_urls="true" id="file" label="File name" />
+ <stretch/>
+ <row>
+ <saveobject id="saveto" initial="my.xls.data" label="Object to save to"/>
+ <checkbox id="doedit" value="1" value_unchecked="0" label="Edit Object" checked="true" />
+ </row>
+ <stretch/>
+ <frame label="Options">
+ <spinbox type="integer" id="sheet" label="Sheet within the Excel file from which data are to be read" min="1" initial="1"/>
+ </frame>
+ </tab>
+ <tab id="tab_further_options" label="Further Options" >
+ <column>
+ <checkbox id="header" value="TRUE" value_unchecked="FALSE" checked="false" label="Column names in first row"/>
+ <input size="small" initial="-1" id="nrows" label="Max number of rows to read (-1 for no limit)" />
+ <input size="small" initial="-1" id="skip" label="Number of rows to skip" />
+ <input size="small" initial="NA" id="na" label="Character for missing values" />
+ <radio id="strings_as_factors" label="Convert character columns to factors" >
+ <option value=", stringsAsFactors=TRUE" label="Convert to factor" />
+ <option value="" label="Default" checked="true" />
+ <option value=", stringsAsFactors=FALSE" label="Do not convert" />
+ </radio>
+ <checkbox id="fill" checked="false" value="TRUE" value_unchecked="FALSE" label="Fill the rows if unequal length" />
+ <checkbox value_unchecked="FALSE" checked="true" value="TRUE" id="checkname" label="Check syntax of the variables names" />
+ <checkbox value_unchecked="FALSE" checked="false" value="TRUE" id="stripwhite" label="Strip white values" />
+
+ <frame label="Error handling">
+ <row>
+ <checkbox id="verbose" value="TRUE" value_unchecked="FALSE" checked="false" label="Print details as the file is processed"/>
+ </row>
+ </frame>
+ </column>
+ </tab>
+ </tabbook>
+ </dialog>
+</document>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the rkward-tracker
mailing list