[education/rkward] rkward: Rework (non-user) R command parse and run

Sun Sep 14 19:26:16 BST 2025

Git commit ad38d5987c738da3fa4becf09d8fdb71891bee1f by Thomas Friedrichsmeier.
Committed on 09/09/2025 at 12:17.
Pushed by tfry into branch 'master'.

Rework (non-user) R command parse and run

Besides cleanup, the main idea is to have fewer separate
try(Eval)-blocks, and entry points into R-API, which should make it
easier to clean up the command interruption business in a next step.

M  +14   -1    rkward/autotests/core_test.cpp
M  +101  -131  rkward/rbackend/rkrbackend.cpp
M  +1    -9    rkward/rbackend/rkrbackend.h

https://invent.kde.org/education/rkward/-/commit/ad38d5987c738da3fa4becf09d8fdb71891bee1f

diff --git a/rkward/autotests/core_test.cpp b/rkward/autotests/core_test.cpp
index 54b55d468..1a18131b0 100644
--- a/rkward/autotests/core_test.cpp
+++ b/rkward/autotests/core_test.cpp
@@ -1,6 +1,6 @@
 /*
 core_test - This file is part of RKWard (https://rkward.kde.org). Created: Thu Jun 09 2022
-SPDX-FileCopyrightText: 2024 by Thomas Friedrichsmeier <thomas.friedrichsmeier at kdemail.net>
+SPDX-FileCopyrightText: 2024-2025 by Thomas Friedrichsmeier <thomas.friedrichsmeier at kdemail.net>
 SPDX-FileContributor: The RKWard Team <rkward-devel at kde.org>
 SPDX-License-Identifier: GPL-2.0-or-later
 */
@@ -501,6 +501,19 @@ class RKWardCoreTest : public QObject {
 		}
 	}
 
+	void captureWarningTest() {
+		runCommandAsync(new RCommand(QStringLiteral("Sys.sleep(5)"), RCommand::User), nullptr, [](RCommand *command) {
+			QVERIFY(command->failed());
+			QVERIFY(command->wasCanceled());
+		});
+		auto priority_command = new RCommand(QStringLiteral("warning(\"mywarn\")"), RCommand::PriorityCommand | RCommand::App);
+		runCommandAsync(priority_command, nullptr, [](RCommand *command) {
+			QVERIFY(command->fullOutput().contains(u"mywarn"_s)); // warning shall (also) be associated with the innermost command
+			RInterface::instance()->cancelAll();
+		});
+		waitForAllFinished(6000);
+	}
+
 	void RKConsoleHistoryTest() {
 		QTemporaryFile oldhist;
 		QTemporaryFile emptyhist;
diff --git a/rkward/rbackend/rkrbackend.cpp b/rkward/rbackend/rkrbackend.cpp
index 080a0b6bf..afa96039d 100644
--- a/rkward/rbackend/rkrbackend.cpp
+++ b/rkward/rbackend/rkrbackend.cpp
@@ -1,6 +1,6 @@
 /*
 rkrbackend - This file is part of RKWard (https://rkward.kde.org). Created: Sun Jul 25 2004
-SPDX-FileCopyrightText: 2004-2024 by Thomas Friedrichsmeier <thomas.friedrichsmeier at kdemail.net>
+SPDX-FileCopyrightText: 2004-2025 by Thomas Friedrichsmeier <thomas.friedrichsmeier at kdemail.net>
 SPDX-FileContributor: The RKWard Team <rkward-devel at kde.org>
 SPDX-License-Identifier: GPL-2.0-or-later
 */
@@ -111,7 +111,7 @@ void RKRBackend::interruptCommand(int command_id) {
 
 	/** A request to interrupt a command may happen at very different points in the code:
 	 *  0) before it was even sent from the frontend -> frontend will not even send it to the backend code
-	 *  1) after is was sent from the frontend, but before we properly started handling it in the backend (deserializing the request, pushing it onto
+	 *  1) after it was sent from the frontend, but before we properly started handling it in the backend (deserializing the request, pushing it onto
 	 *     all_current_commands, feeding it into the REPL/eval) TODO: buggy!
 	 *  2) when it is properly running in the backend -> this is the typical case, we worry about
 	 *  2b) when it is properly running in the backend, but there is also an active sub-command, which we should allow to complete
@@ -1130,92 +1130,6 @@ void RKRBackend::enterEventLoop() {
 	RK_DEBUG(RBACKEND, DL_DEBUG, "R loop finished");
 }
 
-struct SafeParseWrap {
-	SEXP cv;
-	SEXP pr;
-	ParseStatus status;
-};
-
-void safeParseVector(void *data) {
-	SafeParseWrap *wrap = static_cast<SafeParseWrap *>(data);
-	wrap->pr = nullptr;
-	// TODO: Maybe we can use R_ParseGeneral instead. Then we could find the exact character, where parsing fails. Nope: not exported API
-	wrap->pr = RFn::R_ParseVector(wrap->cv, -1, &(wrap->status), ROb(R_NilValue));
-}
-
-SEXP parseCommand(const QString &command_qstring, RKRBackend::RKWardRError *error) {
-	RK_TRACE(RBACKEND);
-
-	SafeParseWrap wrap;
-	wrap.status = PARSE_NULL;
-
-	QByteArray localc = RKTextCodec::toNative(command_qstring); // needed so the string below does not go out of scope
-	const char *command = localc.data();
-
-	RFn::Rf_protect(wrap.cv = RFn::Rf_allocVector(STRSXP, 1));
-	RFn::SET_STRING_ELT(wrap.cv, 0, RFn::Rf_mkChar(command));
-
-	// Yes, if there is an error in the parse, R does jump back to toplevel!
-	// trying to parse list(""=1) is an example in R 3.1.1
-	RFn::R_ToplevelExec(safeParseVector, &wrap);
-	SEXP pr = wrap.pr;
-	RFn::Rf_unprotect(1);
-
-	if ((!pr) || (RFn::TYPEOF(pr) == NILSXP)) {
-		// got a null SEXP. This means parse was *not* ok, even if R_ParseVector told us otherwise
-		if (wrap.status == PARSE_OK) {
-			wrap.status = PARSE_ERROR;
-			printf("weird parse error\n");
-		}
-	}
-
-	if (wrap.status != PARSE_OK) {
-		if ((wrap.status == PARSE_INCOMPLETE) || (wrap.status == PARSE_EOF)) {
-			*error = RKRBackend::Incomplete;
-		} else if (wrap.status == PARSE_ERROR) {
-			// extern SEXP parseError (SEXP call, int linenum);
-			// parseError (ROb(R_NilValue), 0);
-			*error = RKRBackend::SyntaxError;
-		} else { // PARSE_NULL
-			*error = RKRBackend::OtherError;
-		}
-		pr = ROb(R_NilValue);
-	}
-
-	return pr;
-}
-
-SEXP runCommandInternalBase(SEXP pr, RKRBackend::RKWardRError *error) {
-	RK_TRACE(RBACKEND);
-
-	SEXP exp;
-	int r_error = 0;
-
-	exp = ROb(R_NilValue);
-
-	if (RFn::TYPEOF(pr) == EXPRSXP && RFn::LENGTH(pr) > 0) {
-		int bi = 0;
-		while (bi < RFn::LENGTH(pr)) {
-			SEXP pxp = RFn::VECTOR_ELT(pr, bi);
-			exp = RFn::R_tryEval(pxp, ROb(R_GlobalEnv), &r_error);
-			bi++;
-			if (r_error) {
-				break;
-			}
-		}
-	} else {
-		exp = RFn::R_tryEval(pr, ROb(R_GlobalEnv), &r_error);
-	}
-
-	if (r_error) {
-		*error = RKRBackend::OtherError;
-	} else {
-		*error = RKRBackend::NoError;
-	}
-
-	return exp;
-}
-
 bool RKRBackend::runDirectCommand(const QString &command) {
 	RK_TRACE(RBACKEND);
 
@@ -1233,7 +1147,7 @@ RCommandProxy *RKRBackend::runDirectCommand(const QString &command, RCommand::Co
 	return c;
 }
 
-void setWarnOption(int level) {
+static void setWarnOption(int level, bool tryeval = false) {
 	SEXP s, t;
 	RFn::Rf_protect(t = s = RFn::Rf_allocList(2));
 	RFn::SET_TYPEOF(s, LANGSXP);
@@ -1242,17 +1156,96 @@ void setWarnOption(int level) {
 	RFn::SETCAR(t, RFn::Rf_ScalarInteger(level));
 	RFn::SET_TAG(t, RFn::Rf_install("warn"));
 	// The above is rougly equivalent to parseCommand ("options(warn=" + QString::number (level) + ")", &error), but ~100 times faster
-	RKRBackend::RKWardRError error;
-	runCommandInternalBase(s, &error);
+	if (tryeval) {
+		int error;
+		RFn::R_tryEval(s, ROb(R_GlobalEnv), &error);
+	} else RFn::Rf_eval(s, ROb(R_GlobalEnv));
 	RFn::Rf_unprotect(1);
 }
 
+struct RKParseAndRunData {
+	enum ReachedState { None,
+		                Parsed,
+		                SetWarn,
+		                Evalled,
+		                ResetWarn };
+	ReachedState status = None;
+	RCommandProxy *command;
+	QByteArray commandbuf; // cannot live on stack inside parseAndRunWorker
+	int warn_level;
+};
+static void parseAndRunWorker(void *_data) {
+	RK_TRACE(RBACKEND);
+	auto data = static_cast<RKParseAndRunData *>(_data);
+
+	// WARNING: R may longjmp out of this function at several points. Therefore:
+	//          No objects with d'tors on the stack, please! (Or exceptions, or other fancy C++ stuff)
+	SEXP commandexp, parsed, exp;
+	RFn::Rf_protect(commandexp = RFn::Rf_allocVector(STRSXP, 1));
+	data->commandbuf = RKTextCodec::toNative(data->command->command);
+	RFn::SET_STRING_ELT(commandexp, 0, RFn::Rf_mkChar(data->commandbuf.data()));
+	ParseStatus parsestatus;
+	RFn::Rf_protect(parsed = RFn::R_ParseVector(commandexp, -1, &parsestatus, ROb(R_NilValue)));
+	// if we reached this point, parsing (successful or not) did not result in a longjmp - which may actually happen:
+	// trying to parse list(""=1) is an example in R 3.1.1
+	data->status = RKParseAndRunData::Parsed;
+
+	if ((parsestatus != PARSE_OK) || (!parsed) || (RFn::TYPEOF(parsed) == NILSXP)) {
+		// NOTE: according to historic code, it is possible for parsed to nullptr or R_NilValue, even though
+		//       parse_status == PARSE_OK. Not sure, if this is actually true
+		if ((parsestatus == PARSE_INCOMPLETE) || (parsestatus == PARSE_EOF)) {
+			data->command->status |= RCommand::ErrorIncomplete;
+		} else if (parsestatus == PARSE_NULL) {
+			data->status = RKParseAndRunData::ResetWarn; // we're done despite returning early
+		} else if (parsestatus == PARSE_ERROR) {
+			data->command->status |= RCommand::ErrorSyntax;
+		}
+		RFn::Rf_unprotect(2);
+		return;
+	}
+
+	// Make sure any warnings arising during the command actually get associated with it (rather than getting printed after the next user command)
+	data->warn_level = RKRSupport::SEXPToInt(RFn::Rf_GetOption1(RFn::Rf_install("warn")), 0);
+	if (data->warn_level != 1) {
+		setWarnOption(1);
+		data->status = RKParseAndRunData::SetWarn;
+	}
+
+	// evaluate the actual command - also, if it consists of multiple expressions, internally
+	if (RFn::TYPEOF(parsed) == EXPRSXP && RFn::LENGTH(parsed) > 0) {
+		const int len = RFn::LENGTH(parsed);
+		for (int bi = 0; bi < len; bi++) {
+			SEXP pxp = RFn::VECTOR_ELT(parsed, bi);
+			exp = RFn::Rf_eval(pxp, ROb(R_GlobalEnv));
+		}
+	} else {
+		exp = RFn::Rf_eval(parsed, ROb(R_GlobalEnv));
+	}
+	RFn::Rf_protect(exp);
+	data->status = RKParseAndRunData::Evalled;
+
+	if (data->warn_level != 1) setWarnOption(data->warn_level);
+	data->status = RKParseAndRunData::ResetWarn;
+
+	const auto ctype = data->command->type;
+	if (ctype & RCommand::GetStringVector) {
+		data->command->setData(RKRSupport::SEXPToStringList(exp));
+	} else if (ctype & RCommand::GetRealVector) {
+		data->command->setData(RKRSupport::SEXPToRealArray(exp));
+	} else if (ctype & RCommand::GetIntVector) {
+		data->command->setData(RKRSupport::SEXPToIntArray(exp));
+	} else if (ctype & RCommand::GetStructuredData) {
+		RData *dummy = RKRSupport::SEXPToRData(exp);
+		data->command->swallowData(*dummy);
+		delete dummy;
+	}
+	RFn::Rf_unprotect(3); // commandexp, parsed, exp
+}
+
 void RKRBackend::runCommand(RCommandProxy *command) {
 	RK_TRACE(RBACKEND);
 	RK_ASSERT(command);
 
-	RKWardRError error = NoError;
-
 	int ctype = command->type; // easier typing
 
 	// running user commands is quite different from all other commands and should have been handled by RReadConsole
@@ -1269,48 +1262,25 @@ void RKRBackend::runCommand(RCommandProxy *command) {
 		killed = ExitNow;
 	} else if (!(ctype & RCommand::EmptyCommand)) {
 		repl_status.eval_depth++;
-		SEXP parsed = parseCommand(command->command, &error);
-		if (error == NoError) {
-			RFn::Rf_protect(parsed);
-			SEXP exp;
-			// Make sure any warning arising during the command actually get assuciated with it (rather than getting printed, after the next user command)
-			int warn_level = RKRSupport::SEXPToInt(RFn::Rf_GetOption1(RFn::Rf_install("warn")), 0);
-			if (warn_level != 1) setWarnOption(1);
-			RFn::Rf_protect(exp = runCommandInternalBase(parsed, &error));
-			if (warn_level != 1) setWarnOption(warn_level);
-
-			if (error == NoError) {
-				if (ctype & RCommand::GetStringVector) {
-					command->setData(RKRSupport::SEXPToStringList(exp));
-				} else if (ctype & RCommand::GetRealVector) {
-					command->setData(RKRSupport::SEXPToRealArray(exp));
-				} else if (ctype & RCommand::GetIntVector) {
-					command->setData(RKRSupport::SEXPToIntArray(exp));
-				} else if (ctype & RCommand::GetStructuredData) {
-					RData *dummy = RKRSupport::SEXPToRData(exp);
-					command->swallowData(*dummy);
-					delete dummy;
-				}
+		RKParseAndRunData data;
+		data.command = command;
+		RFn::R_ToplevelExec(&parseAndRunWorker, &data);
+		// fix up after conditions that would have cause parseAndRunWorker() to exit via longjmp
+		command->status |= RCommand::WasTried;
+		if (data.status < RKParseAndRunData::ResetWarn) {
+			markLastWarningAsErrorMessage();
+			command->status |= RCommand::Failed;
+			if (data.status < RKParseAndRunData::Parsed) {
+				command->status |= RCommand::ErrorSyntax;
+			} else if (!(command->status & RCommand::Canceled)) {
+				command->status |= RCommand::ErrorOther;
+			}
+			if (data.status >= RKParseAndRunData::SetWarn) {
+				setWarnOption(data.warn_level, true);
 			}
-			RFn::Rf_unprotect(2); // exp, parsed
 		}
 		repl_status.eval_depth--;
 	}
-
-	// common error/status handling
-	if (error != NoError) {
-		command->status |= RCommand::WasTried | RCommand::Failed;
-		if (error == Incomplete) {
-			command->status |= RCommand::ErrorIncomplete;
-		} else if (error == SyntaxError) {
-			command->status |= RCommand::ErrorSyntax;
-		} else if (!(command->status & RCommand::Canceled)) {
-			command->status |= RCommand::ErrorOther;
-		}
-		markLastWarningAsErrorMessage();
-	} else {
-		command->status |= RCommand::WasTried;
-	}
 }
 
 void RKRBackend::setPriorityCommand(RCommandProxy *command) {
diff --git a/rkward/rbackend/rkrbackend.h b/rkward/rbackend/rkrbackend.h
index 35d0dad17..d5ca3ff4d 100644
--- a/rkward/rbackend/rkrbackend.h
+++ b/rkward/rbackend/rkrbackend.h
@@ -1,6 +1,6 @@
 /*
 rkrbackend - This file is part of the RKWard project. Created: Sun Jul 25 2004
-SPDX-FileCopyrightText: 2004-2024 by Thomas Friedrichsmeier <thomas.friedrichsmeier at kdemail.net>
+SPDX-FileCopyrightText: 2004-2025 by Thomas Friedrichsmeier <thomas.friedrichsmeier at kdemail.net>
 SPDX-FileContributor: The RKWard Team <rkward-devel at kde.org>
 SPDX-License-Identifier: GPL-2.0-or-later
 */
@@ -63,14 +63,6 @@ class RKRBackend : public RKROutputBuffer {
 	/** interrupt processing of the current command. This is much like the user pressing Ctrl+C in a terminal with R. This is probably the only non-portable function in RKRBackend, but I can't see a good way around placing it here, or to make it portable. */
 	static void interruptProcessing(bool interrupt);
 
-	/** Enum specifying types of errors that may occur while parsing/evaluating a command in R */
-	enum RKWardRError {
-		NoError = 0,     /**< No error */
-		Incomplete = 1,  /**< The command is incomplete. Command was syntactically ok up to given point, but incomplete. It may or may not be semantically correct. */
-		SyntaxError = 2, /**< Syntax error */
-		OtherError = 3   /**< Other error, usually a semantic error, e.g. object not found */
-	};
-
 	/** initializes the R-backend. Emits an RCallbackType::Started-request (with any error messages) when done.
 	Note that you should call initialize only once in a application */
 	void initialize(const QString &locale_dir, bool setup);