[rkward-cvs] SF.net SVN: rkward:[4438] trunk/rkward

tfry at users.sourceforge.net tfry at users.sourceforge.net
Wed Nov 21 12:55:47 UTC 2012


Revision: 4438
          http://rkward.svn.sourceforge.net/rkward/?rev=4438&view=rev
Author:   tfry
Date:     2012-11-21 12:55:46 +0000 (Wed, 21 Nov 2012)
Log Message:
-----------
Fix several problems regarding factor handling:
- When converting to factor, make sure to mark all fields as invalid, which do not correspond to a factor level
- Support writing many invalid fields at once (important, when converting a large var to factor, before setting factor levels)
- Fix some subtle problems with setting factor levels.

Modified Paths:
--------------
    trunk/rkward/ChangeLog
    trunk/rkward/rkward/core/rkvariable.cpp
    trunk/rkward/rkward/core/rkvariable.h
    trunk/rkward/rkward/rbackend/rpackages/rkward/NAMESPACE
    trunk/rkward/rkward/rbackend/rpackages/rkward/R/internal.R

Modified: trunk/rkward/ChangeLog
===================================================================
--- trunk/rkward/ChangeLog	2012-11-21 12:52:45 UTC (rev 4437)
+++ trunk/rkward/ChangeLog	2012-11-21 12:55:46 UTC (rev 4438)
@@ -1,3 +1,4 @@
+- Fix conversion from Numeric to Factor in the data editor
 - In the data.frame editor, columns containing invalid values are now highlighted in red
 - Fixed: If none of the previous plugin maps could be found on startup, re-add the default
 - Added GUI element for entering matrix or vector data in plugins

Modified: trunk/rkward/rkward/core/rkvariable.cpp
===================================================================
--- trunk/rkward/rkward/core/rkvariable.cpp	2012-11-21 12:52:45 UTC (rev 4437)
+++ trunk/rkward/rkward/core/rkvariable.cpp	2012-11-21 12:55:46 UTC (rev 4438)
@@ -98,7 +98,7 @@
 			else if (new_type == RObject::DataFactor) command += "as.factor";
 			command += ")";
 			RKGlobals::rInterface ()->issueCommand (command, RCommand::App | RCommand::Sync, QString::null);
-			if (new_type == RObject::DataFactor) writeValueLabels (0);		// as.factor resets the "levels"-attribute!
+			if (new_type == RObject::DataFactor) updateValueLabels ();	// as.factor resets the "levels"-attribute!
 
 			syncDataToR ();
 		} else discardUnsyncedChanges ();
@@ -180,8 +180,9 @@
 			int invalids_count = invalids_length / 2;
 			for (int i=0; i < invalids_count; ++i) {
 				int row = invalids_list.at (i).toInt () - 1;
-				if (data->cell_states[row] & RKVarEditData::NA) {
-					setText (row, invalids_list.at (invalids_count + i));
+				if (data->cell_states[row] & RKVarEditData::NA) {	// NOTE: Do *not* use setText(), here. It tries too hard to set a valid value.
+					data->invalid_fields.insert (row, invalids_list.at (invalids_count + i));
+					data->cell_states[row] = RKVarEditData::Invalid;
 				}
 			}
 		}
@@ -339,15 +340,44 @@
 	writeMetaData (chain);
 }
 
-void RKVariable::writeInvalidField (int row, RCommandChain *chain) {
+void RKVariable::writeInvalidFields (QList<int> rows, RCommandChain *chain) {
 	RK_TRACE (OBJECTS);
 
-	if (data->invalid_fields.contains (row)) {
-		RKGlobals::rInterface ()->issueCommand (".rk.set.invalid.field (" + getFullName () + ", " + QString::number (row+1) + ", " + rQuote (data->invalid_fields.value (row)) + ')', RCommand::App | RCommand::Sync, QString::null, 0,0, chain);
-	} else {
-		RKGlobals::rInterface ()->issueCommand (".rk.set.invalid.field (" + getFullName () + ", " + QString::number (row+1) + ", NULL)", RCommand::App | RCommand::Sync, QString (), 0,0, chain);
+	if (rows.isEmpty ()) return;
+
+	QString set;
+	QString values;
+	QString clear;
+
+	for (int i = 0; i < rows.size (); ++i) {
+		int row = rows[i];
+
+		if (data->invalid_fields.contains (row)) {
+			if (!set.isEmpty ()) {
+				set.append (", ");
+				values.append (", ");
+			}
+			set.append (QString::number (row+1));
+			values.append (rQuote (data->invalid_fields[row]));
+		} else {
+			if (!clear.isEmpty ()) clear.append (", ");
+			clear.append (QString::number (row+1));
+		}
+
+		data->cell_states[row] -= (data->cell_states[row] & RKVarEditData::UnsyncedInvalidState);
 	}
-	data->cell_states[row] -= (data->cell_states[row] & RKVarEditData::UnsyncedInvalidState);
+
+	if (!set.isEmpty ()) {
+		set = "set=c(" + set + "), ";
+		values = "values=c(" + values + ")";
+	}
+	if (!clear.isEmpty ()) {
+		clear = "clear=c(" + clear + ")";
+		if (!values.isEmpty ()) values.append (",");
+	}
+
+	RKGlobals::rInterface ()->issueCommand (".rk.set.invalid.fields (" + getFullName () + ", " + set + values + clear + ")", RCommand::App | RCommand::Sync, QString (), 0,0, chain);
+
 	if (data->previously_valid != data->invalid_fields.isEmpty ()) {
 		data->previously_valid = data->invalid_fields.isEmpty ();
 		RKGlobals::tracker ()->objectMetaChanged (this);
@@ -358,10 +388,12 @@
 	RK_TRACE (OBJECTS);
 	if (from_row == -1) return;
 
+	QList<int> changed_invalids;
+
 	// TODO: try to sync in correct storage mode
 	if (from_row == to_row) {
 		RKGlobals::rInterface ()->issueCommand (getFullName () + '[' + QString::number (from_row+1) + "] <- " + getRText (from_row), RCommand::App | RCommand::Sync, QString::null, 0,0, chain);
-		if (data->cell_states[from_row] & RKVarEditData::UnsyncedInvalidState) writeInvalidField (from_row, chain);
+		if (data->cell_states[from_row] & RKVarEditData::UnsyncedInvalidState) changed_invalids.append (from_row);
 	} else {
 		QString data_string = "c (";
 		for (int row = from_row; row <= to_row; ++row) {
@@ -370,12 +402,14 @@
 			if (row != to_row) {
 				data_string.append (", ");
 			}
-			if (data->cell_states[row] & RKVarEditData::UnsyncedInvalidState) writeInvalidField (row, chain);
+			if (data->cell_states[row] & RKVarEditData::UnsyncedInvalidState) changed_invalids.append (row);
 		}
 		data_string.append (")");
 		RKGlobals::rInterface ()->issueCommand (getFullName () + '[' + QString::number (from_row + 1) + ':' + QString::number (to_row + 1) + "] <- " + data_string, RCommand::App | RCommand::Sync, QString::null, 0,0, chain);
 	}
 
+	if (!changed_invalids.isEmpty ()) writeInvalidFields (changed_invalids, chain);
+
 	ChangeSet *set = new ChangeSet (from_row, to_row);
 	RKGlobals::tracker ()->objectDataChanged (this, set);
 }
@@ -486,13 +520,13 @@
 			data->cell_strings[row] = text;
 		} else if (getDataType () == DataFactor) {
 			if (data->value_labels) {
-				QString realtext = data->value_labels->key (text);
-				if (realtext.isEmpty ()) valid = false;
-				else data->cell_doubles[row] = realtext.toInt ();
+				QString realtext = data->value_labels->key (text);	// first, attempt to set by level
+				if (!realtext.isEmpty ()) data->cell_doubles[row] = realtext.toInt ();
+				else {	// if this failed, try to set by index, instead.
+					if (data->value_labels->contains (text)) data->cell_doubles[row] = text.toInt ();
+					else valid = false;
+				}
 			} else valid = false;
-			if (!valid) {	// setting by lavel failed. Try to set a numeric value, instead
-				data->cell_doubles[row] = text.toDouble (&valid);
-			}
 		} else if (getDataType () == DataLogical) {
 			if (text == "0" || text == "F" || text == "FALSE") data->cell_doubles[row] = 0;
 			else if (text == "1" || text == "T" || text == "TRUE") data->cell_doubles[row] = 1;
@@ -631,9 +665,7 @@
 		else data->cell_doubles.removeAt (row);
 	}
 
-	for (int i = 0; i < changed_invalids.size (); ++i) {
-		writeInvalidField (changed_invalids[i], 0);
-	}
+	if (!changed_invalids.isEmpty ()) writeInvalidFields (changed_invalids);
 
 	dimensions[0] -= offset;
 }
@@ -657,9 +689,7 @@
 		}
 	}
 
-	for (int i = 0; i < changed_invalids.size (); ++i) {
-		writeInvalidField (changed_invalids[i], 0);
-	}
+	if (!changed_invalids.isEmpty ()) writeInvalidFields (changed_invalids);
 
 	dimensions[0] += count;
 }
@@ -688,21 +718,31 @@
 		*(data->value_labels) = labels;
 	}
 
+	updateValueLabels ();
+}
+
+void RKVariable::updateValueLabels () {
+	RK_TRACE (OBJECTS);
+
 	writeValueLabels (0);
 	RKGlobals::tracker ()->objectMetaChanged (this);
 
+	ValueLabels *labels = data->value_labels;
+
+	lockSyncing (true);
 	// find out which values got valid / invalid and change those
 	for (int i=0; i < getLength (); ++i) {
 		if (cellStatus (i) == ValueInvalid) {
-			if (labels.contains (getText (i))) {
+			if (labels && labels->contains (getText (i))) {
 				setText (i, getText (i));
 			}
 		} else {
-			if (!(labels.contains (getText (i)))) {
+			if (!(labels && labels->contains (getText (i)))) {
 				setText (i, getText (i));
 			}
 		}
 	}
+	lockSyncing (false);
 
 	// also update display of all values:
 	ChangeSet *set = new ChangeSet (0, getLength () - 1);
@@ -714,10 +754,11 @@
 void RKVariable::writeValueLabels (RCommandChain *chain) const {
 	RK_TRACE (OBJECTS);
 	RK_ASSERT (data);
-	
-	if (data->value_labels) {
+
+	QString level_string;
+	if (data->value_labels && (!data->value_labels->isEmpty())) {
 		int i = 1;
-		QString level_string = "c (";
+		level_string = "c (";
 		while (data->value_labels->contains (QString::number (i))) {
 			level_string.append (rQuote ((*(data->value_labels))[QString::number (i)]));
 			if (data->value_labels->contains (QString::number (++i))) {
@@ -725,13 +766,11 @@
 			}
 		}
 		level_string.append (")");
-		// using attr (..., "levels) instead of levels (...) in order to bypass checking
-		RKGlobals::rInterface ()->issueCommand ("attr (" + getFullName () + ", \"levels\") <- " + level_string, RCommand::App | RCommand::Sync, QString::null, 0, 0, chain);
 	} else {
-		QString empty = "NULL";
-		if (getDataType () == DataFactor) empty = "NA";	// never set levels to NULL on a factor!
-		RKGlobals::rInterface ()->issueCommand ("attr (" + getFullName () + ", \"levels\") <- " + empty, RCommand::App | RCommand::Sync, QString::null, 0, 0, chain);
+		level_string = "NULL";
 	}
+
+	RKGlobals::rInterface ()->issueCommand (".rk.set.levels (" + getFullName () + ", " + level_string + ")", RCommand::App | RCommand::Sync, QString::null, 0, 0, chain);
 }
 
 QString RKVariable::getValueLabelString () const {

Modified: trunk/rkward/rkward/core/rkvariable.h
===================================================================
--- trunk/rkward/rkward/core/rkvariable.h	2012-11-21 12:52:45 UTC (rev 4437)
+++ trunk/rkward/rkward/core/rkvariable.h	2012-11-21 12:55:46 UTC (rev 4438)
@@ -87,6 +87,8 @@
 	ValueLabels getValueLabels () const;
 /** assigns a new map of labels. Also takes care of syncing with the backend. */
 	void setValueLabels (const ValueLabels& labels);
+/** re-check a factor variable after editing its value labels, and sync labels to R */
+	void updateValueLabels ();
 /** get value labels as string (for display) */
 	QString getValueLabelString () const;
 /** set value labels from string (for paste operations) */
@@ -173,7 +175,7 @@
 	void cellsChanged (int from_row, int to_row);
 /** writes the given range of cells to the backend (regardless of whether syncing should be immediate) */
 	virtual void writeData (int from_row, int to_row, RCommandChain *chain=0);
-	void writeInvalidField (int row, RCommandChain *chain=0);
+	void writeInvalidFields (QList<int> rows, RCommandChain *chain=0);
 /** writes the values labels to the backend */
 	void writeValueLabels (RCommandChain *chain) const;
 /** tries to match a value-label to the value in the given cell. Returns the label, or - if there is no label - the original value in textual representation */

Modified: trunk/rkward/rkward/rbackend/rpackages/rkward/NAMESPACE
===================================================================
--- trunk/rkward/rkward/rbackend/rpackages/rkward/NAMESPACE	2012-11-21 12:52:45 UTC (rev 4437)
+++ trunk/rkward/rkward/rbackend/rpackages/rkward/NAMESPACE	2012-11-21 12:55:46 UTC (rev 4438)
@@ -82,7 +82,8 @@
 export(rk.select.CRAN.mirror)
 export(rk.select.list)
 export(rk.sessionInfo)
-export(.rk.set.invalid.field)
+export(.rk.set.levels)
+export(.rk.set.invalid.fields)
 export(rk.set.label)
 export(.rk.set.meta)
 export(rk.set.output.html.file)

Modified: trunk/rkward/rkward/rbackend/rpackages/rkward/R/internal.R
===================================================================
--- trunk/rkward/rkward/rbackend/rpackages/rkward/R/internal.R	2012-11-21 12:52:45 UTC (rev 4437)
+++ trunk/rkward/rkward/rbackend/rpackages/rkward/R/internal.R	2012-11-21 12:55:46 UTC (rev 4438)
@@ -26,7 +26,6 @@
 	eval (substitute (attr (x, ".rk.meta") <<- m))
 }
 
-#' @export
 ".rk.set.invalid.field" <- function (x, row, value) {
 	l <- attr (x, ".rk.invalid.fields");
 	if (is.null (l)) l <- list ();
@@ -35,7 +34,42 @@
 	eval (substitute (attr (x, ".rk.invalid.fields") <<- l))
 }
 
+#' Work around some peculiarities in R's handling of levels
 #' @export
+".rk.set.levels" <- function (var, levels) {
+	if (is.factor (var)) {
+		if (is.null (levels)) levels <- NA	# must never set NULL levels on a factor
+		old_l <- levels (var)
+		# using attr (..., "levels) instead of levels (...) in order to bypass checking
+		eval (substitute (attr (var, "levels") <<- levels))
+		if ((length (var) > 0) && (is.null (old_l) || ((length (old_l) == 1L) && is.na (old_l[1L]))))	{
+			# if levels were empty, this means var is all NAs. R will set all to first level, instead, in some cases
+			len <- length (var)
+			eval(substitute(var[1:len] <<- NA))
+		}
+	} else {
+		eval (substitute (attr (var, "levels") <<- levels))
+	}
+}
+
+#' @export
+".rk.set.invalid.fields" <- function (x, set, values, clear) {
+	l <- attr (x, ".rk.invalid.fields");
+	if (is.null (l)) l <- list ();
+
+	if (!missing (set)) {
+		set <- as.character (set)
+		l[set] <- as.character (values)
+	}
+	if (!missing (clear)) {
+		l[as.character (clear)] <- NULL
+	}
+
+	if (length (l) == 0) l <- NULL
+	eval (substitute (attr (x, ".rk.invalid.fields") <<- l))
+}
+
+#' @export
 ".rk.data.frame.insert.row" <- function (x, index=0) {
 	if ((index == 0) || (index > dim (x)[1])) {	# insert row at end
 		eval (substitute (x[dim(x)[1]+1,] <<- c (NA)))

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.





More information about the rkward-tracker mailing list