[rkward-cvs] SF.net SVN: rkward: [1374] trunk/rkward/rkward/plugins/00saveload/import
tfry at users.sourceforge.net
tfry at users.sourceforge.net
Sun Feb 11 17:55:56 UTC 2007
Revision: 1374
http://svn.sourceforge.net/rkward/?rev=1374&view=rev
Author: tfry
Date: 2007-02-11 09:55:56 -0800 (Sun, 11 Feb 2007)
Log Message:
-----------
Several improvements to import CSV plugin
Modified Paths:
--------------
trunk/rkward/rkward/plugins/00saveload/import/import_csv.php
trunk/rkward/rkward/plugins/00saveload/import/import_csv.rkh
trunk/rkward/rkward/plugins/00saveload/import/import_csv.xml
Modified: trunk/rkward/rkward/plugins/00saveload/import/import_csv.php
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_csv.php 2007-02-11 14:39:15 UTC (rev 1373)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_csv.php 2007-02-11 17:55:56 UTC (rev 1374)
@@ -1,9 +1,21 @@
<?
- function preprocess () {
- }
-
- function calculate () {
-getRK("name"); ?> <- read.table (file="<? getRK("file"); ?>", header=<? getRK("header"); ?>, sep=<? getRK("sep"); ?>, dec=<? getRK("dec") ?>, <? # doing row names (what a pity...)
+// internal helper function
+function quoteString ($string) {
+ return ('"' . strtr ($string, array ('"'=>'\\"')) . '"');
+}
+
+function preprocess () {
+}
+
+function calculate () {
+ $dec = getRK_val ("dec");
+ if ($dec == "other") $dec = quoteString (getRK_val ("custom_dec"));
+ $sep = getRK_val ("sep");
+ if ($sep == "other") $sep = quoteString (getRK_val ("custom_sep"));
+ $quote = getRK_val ("quote");
+ if ($quote == "other") $quote = quoteString (getRK_val ("custom_quote"));
+
+getRK("name"); ?> <- read.table (file="<? getRK("file"); ?>", header=<? getRK("header"); ?>, sep=<? echo ($sep); ?>, quote=<? echo ($quote); ?>, dec=<? echo ($dec); ?>, <? # doing row names (what a pity...)
if (getRK_val("rowname")!="NULL") {
echo( "row.names = ");
if (getRK_val("rowname")=="rowcol") echo (getRK("nomrow") . ",");
@@ -13,15 +25,15 @@
if (getRK_val("colname") == "custoCol") echo ( "col.names = " . getRK_val ("colnames") . ",");
# doing col class (what a pity...)
if (getRK_val("colclass") == "custoClass") echo( "colClasses = " . getRK_val ("custoClasses") . ",");
-#doing what is left?> na.strings = "<? getRK("na") ?>" , nrows = <? getRK("nrows") ; ?> , skip = <? getRK("skip") ; ?> , check.names = <? getRK("checkname") ; ?> , fill = <? getRK("fill") ; ?> , strip.white = <? getRK("stripwhite") ; ?>, blank.lines.skip = <? getRK("blanklinesskip") ; ?> ,comment.char = "<? getRK("commentchar") ; ?>" )
+#doing what is left?> na.strings = "<? getRK("na") ?>", nrows = <? getRK("nrows") ; ?>, skip = <? getRK("skip") ; ?>, check.names = <? getRK("checkname") ; ?>, fill = <? getRK("fill") ; ?>, strip.white = <? getRK("stripwhite") ; ?>, blank.lines.skip = <? getRK("blanklinesskip") ; ?>, comment.char=<? echo (quoteString (getRK_val("commentchar"))); ?><? getRK("allow_escapes"); ?><? getRK("flush"); ?><? getRK("strings_as_factors"); ?>)
<?
- }
-
- function printout () {
- // produce the output
- }
-
- function cleanup () {
- }
+}
+
+function printout () {
+// produce the output
+}
+
+function cleanup () {
+}
?>
Modified: trunk/rkward/rkward/plugins/00saveload/import/import_csv.rkh
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_csv.rkh 2007-02-11 14:39:15 UTC (rev 1373)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_csv.rkh 2007-02-11 17:55:56 UTC (rev 1374)
@@ -14,24 +14,33 @@
<setting id="name">The name of an R object to store the imported data in. If you chose an existing symbol name, you will be prompted, whether to overwrite this object.</setting>
<setting id="header">Whether the first row of the file contains should be interpreted as column names</setting>
<setting id="dec">Decimal character used in the imported file</setting>
+ <setting id="custom_dec">If you selected "other" above, specify the character, here.</setting>
<setting id="sep">The separator character used in the imported file</setting>
+ <setting id="custom_sep">If you selected "other" above, specify the character, here.</setting>
<caption id="tab_rows"/>
<setting id="rowname">How to determine row names? Either the rows can be numbered 1...n. Or you can specify a single column containing the row names (typically the first). Or you can provide a character vector of names.</setting>
<setting id="nomrow">See above. The column containing the row names, given either as a number, or as a column name (in the latter case, make sure to quote the name)</setting>
- <setting id="rownames">See above. A vector of column names (e.g. c ("row1", "row2", ... "rown"))</setting>
+ <setting id="rownames">See above. A vector of column names (e.g. c ("row1", "row2", ..., "rown"))</setting>
<caption id="tab_columns"/>
<setting id="colname">Use default column names (possibly read from first row of file), or use specific names?</setting>
- <setting id="colnames">See above. Specify a character vector of column names (e.g. c ("col1", "col2", ... "coln"))</setting>
+ <setting id="colnames">See above. Specify a character vector of column names (e.g. c ("col1", "col2", ..., "coln"))</setting>
+ <setting id="flush">Should additional columns (not specified above, and not within the first five rows of input) be skipped? See parameter flush in <link href="rkward://rhelp/read.table"/>.</setting>
<setting id="colclass">Should be class of each column be determined automatically, or specified?</setting>
- <setting id="custoClasses">See above. Specify a character vector with the names of the classes to use for each column</setting>
+ <setting id="custoClasses">See above. Specify a character vector with the names of the classes to use for each column.</setting>
+ <caption id="tab_strings"/>
+ <setting id="allow_escapes">Should a backslash ('\') followed by a character be treated as an escaped character? Otherwise the sequence is read literally.</setting>
+ <setting id="strings_as_factors">Should columns containing character data be converted to factors in R, or should they be read as character vectors?</setting>
+ <setting id="quote">String delimiter character(s) used in the file</setting>
+ <setting id="custom_quote">If you selected "other" above, you can specify which quoting character to use, here. Simply enter all character to use (without spaces or comma, e.g. "'@ to use ", ', and @ as string delimiters).</setting>
+
<caption id="tab_further_options"/>
<setting id="skip">This many rows will be skipped at the start of the file. Use, for instance, if the file contains a text header.</setting>
<setting id="nrows">Maximum number of rows to read (-1 to read all lines)</setting>
<setting id="na">How are missing values (NAs) written in the file to be imported?</setting>
- <setting id="commentchar">If you enter a single character, here, everything after that character in a line will be ignored. Leave blank to disable detection of comments</setting>
+ <setting id="commentchar">If you enter a single character, here, everything after that character in a line will be ignored. Leave blank to disable detection of comments.</setting>
<setting id="fill">If checked, and the rows have differing length, they will be padded with empty values in the imported data.</setting>
<setting id="checkname">Should column names be checked for validity?</setting>
<setting id="stripwhite">Should leading and trailing white space be removed from character fields?</setting>
Modified: trunk/rkward/rkward/plugins/00saveload/import/import_csv.xml
===================================================================
--- trunk/rkward/rkward/plugins/00saveload/import/import_csv.xml 2007-02-11 14:39:15 UTC (rev 1373)
+++ trunk/rkward/rkward/plugins/00saveload/import/import_csv.xml 2007-02-11 17:55:56 UTC (rev 1374)
@@ -6,9 +6,20 @@
<external id="filename" />
<connect governor="filename" client="file.selection"/>
-
<connect client="rowname.enabled" governor="isrow.state" />
+ <convert id="customsep" mode="equals" sources="sep.string" standard="other" />
+ <connect client="custom_sep.enabled" governor="customsep" />
+ <connect client="custom_sep.required" governor="custom_sep.enabled" />
+
+ <convert id="customdec" mode="equals" sources="dec.string" standard="other" />
+ <connect client="custom_dec.enabled" governor="customdec" />
+ <connect client="custom_dec.required" governor="custom_dec.enabled" />
+
+ <convert id="customquote" mode="equals" sources="quote.string" standard="other" />
+ <connect client="custom_quote.enabled" governor="customquote" />
+ <connect client="custom_quote.required" governor="custom_quote.enabled" />
+
<convert id="userow" mode="equals" sources="rowname.string" standard="rowcol" />
<convert id="customizerow" mode="equals" sources="rowname.string" standard="custoRow" />
<convert id="customizerow2" mode="and" sources="isrow.state;customizerow" standard="custoRow" />
@@ -31,16 +42,24 @@
<saveobject id="name" initial="my.csv.data" label="Object to save to"/>
<checkbox id="header" value="TRUE" value_unchecked="FALSE" checked="true" label="First row contains column names"/>
<row>
- <radio id="dec" label="Decimal point character" >
- <option value="'.'" label="'.'" />
- <option value="','" label="','" />
- </radio>
- <radio id="sep" label="Field separator character" >
- <option value="'\t'" label="Tab" />
- <option value="';'" label="';'" />
- <option value="','" label="','" />
- <option value="' '" label="Space" />
- </radio>
+ <column>
+ <radio id="dec" label="Decimal point character" >
+ <option value="'.'" label="'.'" />
+ <option value="','" label="','" />
+ <option value="other" label="Other (specify below)" />
+ </radio>
+ <input id="custom_dec" label="Specify decimal point character" />
+ </column>
+ <column>
+ <radio id="sep" label="Field separator character" >
+ <option value="'\t'" label="Tab" />
+ <option value="';'" label="';'" />
+ <option value="','" label="','" />
+ <option value="' '" label="Space" />
+ <option value="other" label="Other (specify below)" />
+ </radio>
+ <input id="custom_sep" label="Specify field separator character" />
+ </column>
</row>
</tab>
<tab id="tab_rows" label="Rows" >
@@ -54,16 +73,37 @@
<input id="rownames" label="Vector of row names (quoted)" />
</tab>
<tab id="tab_columns" label="Columns" >
- <radio id="colname" label="Column names" >
- <option value="" label="Default" />
- <option value="custoCol" label="Use a character vector" />
+ <frame label="Column names">
+ <radio id="colname" label="Column names" >
+ <option value="" label="Default" />
+ <option value="custoCol" label="Use a character vector" />
+ </radio>
+ <input id="colnames" label="Vector of column names (quoted)" />
+ </frame>
+ <checkbox id="flush" label="Skip any further fields" value=", flush=TRUE" value_unchecked="" />
+ <frame label="Column classes">
+ <radio id="colclass" label="Classes of columns" >
+ <option value="" label="Default" />
+ <option value="custoClass" label="Use a character vector" />
+ </radio>
+ <input id="custoClasses" label="Vector of column classes (quoted)" />
+ </frame>
+ </tab>
+ <tab id="tab_strings" label="Strings" >
+ <checkbox id="allow_escapes" label="Allow escaped characters" value=", allowEscapes=TRUE" value_unchecked="" />
+ <radio id="strings_as_factors" label="Convert character columns to factors" >
+ <option value=", stringsAsFactors=TRUE" label="Convert to factor" />
+ <option value="" label="Default" checked="true" />
+ <option value=", stringsAsFactors=FALSE" label="Do not convert" />
</radio>
- <input id="colnames" label="Vector of column names (quoted)" />
- <radio id="colclass" label="Classes of columns" >
- <option value="" label="Default" />
- <option value="custoClass" label="Use a character vector" />
+ <radio id="quote" label="String delimiter" >
+ <option value="'"'" label="" only" />
+ <option value="'\''" label="' only" />
+ <option value="'"\''" label="" and '" checked="true"/>
+ <option value="''" label="None / disabled" />
+ <option value="other" label="Other (specify below)" />
</radio>
- <input id="custoClasses" label="Vector of column classes (quoted)" />
+ <input id="custom_quote" label="Specify quoting character(s)" />
</tab>
<tab id="tab_further_options" label="Further Options" >
<row>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the rkward-tracker
mailing list