[education/rkward] rkward/rbackend: Remove Qt5 compatibiliy dependency (i.e. QTextCodec)

Thomas Friedrichsmeier null at kde.org
Thu Apr 18 17:40:15 BST 2024


Git commit 2991da575abd11d794ee32cb50d685f5da90dbab by Thomas Friedrichsmeier.
Committed on 18/04/2024 at 16:38.
Pushed by tfry into branch 'master'.

Remove Qt5 compatibiliy dependency (i.e. QTextCodec)

Encoding conversions are instead performed via the R iconv functions

M  +1    -1    rkward/rbackend/CMakeLists.txt
M  +20   -26   rkward/rbackend/rkrbackend.cpp
M  +0    -11   rkward/rbackend/rkrbackend.h
M  +42   -13   rkward/rbackend/rkrsupport.cpp
M  +15   -0    rkward/rbackend/rkrsupport.h
M  +8    -1    rkward/rbackend/rpackages/rkward/R/rk.filename-functions.R

https://invent.kde.org/education/rkward/-/commit/2991da575abd11d794ee32cb50d685f5da90dbab

diff --git a/rkward/rbackend/CMakeLists.txt b/rkward/rbackend/CMakeLists.txt
index 316e68ce7..e06855f67 100644
--- a/rkward/rbackend/CMakeLists.txt
+++ b/rkward/rbackend/CMakeLists.txt
@@ -48,7 +48,7 @@ ADD_DEFINITIONS (-DRKWARD_BACKEND_PATH="${KDE_INSTALL_FULL_LIBEXECDIR}")
 LINK_DIRECTORIES(${R_SHAREDLIBDIR})
 ADD_EXECUTABLE(rkward.rbackend ${rbackend_backend_SRCS} ${rbackend_shared_SRCS})
 ECM_MARK_NONGUI_EXECUTABLE(rkward.rbackend)
-TARGET_LINK_LIBRARIES(rkward.rbackend rkgraphicsdevice.backend ${R_USED_LIBS} ${CMAKE_THREAD_LIBS_INIT} Qt6::Network Qt6::Core KF6::I18n Qt6::Core5Compat)
+TARGET_LINK_LIBRARIES(rkward.rbackend rkgraphicsdevice.backend ${R_USED_LIBS} ${CMAKE_THREAD_LIBS_INIT} Qt6::Network Qt6::Core KF6::I18n)
 IF(WIN32)
 # 64MB stack size is what R itself is build with on Windows, while by default the stack size would be 1MB
 # Not sure, if 64MB is actually needed, but 1MB does cause trouble e.g. with some shiny apps.
diff --git a/rkward/rbackend/rkrbackend.cpp b/rkward/rbackend/rkrbackend.cpp
index 5b3518669..890957a86 100644
--- a/rkward/rbackend/rkrbackend.cpp
+++ b/rkward/rbackend/rkrbackend.cpp
@@ -24,7 +24,6 @@ RKRBackend::RKReplStatus RKRBackend::repl_status = { QByteArray (), 0, true, 0,
 void* RKRBackend::default_global_context = nullptr;
 
 #include <QString>
-#include <QTextCodec>
 #include <QStringList>
 #include <QThread>
 #include <QDir>
@@ -296,7 +295,7 @@ int RReadConsole (const char* prompt, unsigned char* buf, int buflen, int hist)
 					RKRBackend::repl_status.user_command_completely_transmitted = false;
 					RKRBackend::repl_status.user_command_parsed_up_to = 0;
 					RKRBackend::repl_status.user_command_successful_up_to = 0;
-					RKRBackend::repl_status.user_command_buffer = RKRBackend::fromUtf8 (command->command);
+					RKRBackend::repl_status.user_command_buffer = RKTextCodec::toNative(command->command);
 					RKTransmitNextUserCommandChunk (buf, buflen);
 					RKRBackend::repl_status.user_command_status = RKRBackend::RKReplStatus::UserCommandTransmitted;
 					return 1;
@@ -306,7 +305,7 @@ int RReadConsole (const char* prompt, unsigned char* buf, int buflen, int hist)
 					// fully transmitted, but R is still asking for more? This looks like an incomplete statement.
 					// HOWEVER: It may also have been an empty statement such as " ", so let's check whether the prompt looks like a "continue" prompt
 					bool incomplete = false;
-					if (RKRBackend::toUtf8 (prompt) == RKRSupport::SEXPToString (Rf_GetOption (Rf_install ("continue"), R_BaseEnv))) {
+					if (RKTextCodec::fromNative(prompt) == RKRSupport::SEXPToString(Rf_GetOption(Rf_install("continue"), R_BaseEnv))) {
 						incomplete = true;
 					}
 					if (incomplete) RKRBackend::this_pointer->current_command->status |= RCommand::Failed | RCommand::ErrorIncomplete;
@@ -405,7 +404,7 @@ int RReadConsole (const char* prompt, unsigned char* buf, int buflen, int hist)
 		RK_ASSERT (false);	// should not reach this point.
 	}
 
-	QByteArray localres = RKRBackend::fromUtf8 (request.params["result"].toString ());
+	QByteArray localres = RKTextCodec::toNative(request.params["result"].toString());
 	// need to append a newline, here. TODO: theoretically, RReadConsole comes back for more, if \0 was encountered before \n.
 	qstrncpy ((char *) buf, localres.left (buflen - 2).append ('\n').data (), buflen);
 	return 1;
@@ -438,7 +437,7 @@ bool RKRBackend::fetchStdoutStderr (bool forcibly) {
 		if (bytes <= 0) break;
 		buffer[bytes] = '\0';
 		// NOTE: we must not risk blocking inside handleOutput, while the stdout_stderr_mutex is locked!
-		handleOutput (RKRBackend::toUtf8 (buffer), bytes, ROutput::Warning, false);
+		handleOutput(RKTextCodec::fromNative(buffer), bytes, ROutput::Warning, false);
 	}
 
 	stdout_stderr_mutex.unlock ();
@@ -499,7 +498,7 @@ void RWriteConsoleEx (const char *buf, int buflen, int type) {
 	QByteArray str(buf, buflen);
 	QString utf8;
 	if (winutf8start.isEmpty()) {
-		utf8 = RKRBackend::toUtf8 (buf);
+		utf8 = RKTextCodec::fromNative(buf);
 	} else {
 		int pos = 0;
 		while (pos < buflen) {
@@ -522,7 +521,7 @@ void RWriteConsoleEx (const char *buf, int buflen, int type) {
 		}
 	}
 #else
-	QString utf8 = RKRBackend::toUtf8 (buf);
+	QString utf8 = RKTextCodec::fromNative(buf);
 #endif
 	RKRBackend::this_pointer->handleOutput (utf8, buflen, type == 0 ? ROutput::Output : ROutput::Warning);
 }
@@ -643,7 +642,7 @@ int RChooseFile(int isnew, char *buf, int len) {
 	params << "choosefile" << QString() /* caption */ << QString() /* initial */ << "*" /* filter */ << (isnew ? "newfile" : "file");
 	auto res = RKRBackend::this_pointer->handlePlainGenericRequest(params, true);
 
-	QByteArray localres = RKRBackend::fromUtf8(res.ret.toString());
+	QByteArray localres = RKTextCodec::toNative(res.ret.toString());
 	qstrncpy ((char *) buf, localres.data(), len);
 
 // return length of filename (strlen(buf))
@@ -784,7 +783,7 @@ void RBusy (int busy) {
 			}
 			if (RKRBackend::this_pointer->current_command->type & RCommand::CCCommand) {
 				QByteArray chunk = RKRBackend::repl_status.user_command_buffer.mid (RKRBackend::repl_status.user_command_parsed_up_to, RKRBackend::repl_status.user_command_transmitted_up_to - RKRBackend::repl_status.user_command_parsed_up_to);
-				RKRBackend::this_pointer->printCommand (RKRBackend::toUtf8 (chunk.data()));
+				RKRBackend::this_pointer->printCommand(RKTextCodec::fromNative(chunk.data()));
 			}
 			RKRBackend::repl_status.user_command_parsed_up_to = RKRBackend::repl_status.user_command_transmitted_up_to;
 			RKRBackend::repl_status.user_command_status = RKRBackend::RKReplStatus::UserCommandRunning;
@@ -938,8 +937,8 @@ SEXP doSubstackCall (SEXP _call, SEXP _args) {
 
 	// For now, for simplicity, assume args are always strings, although possibly nested in lists
 	auto ret = RKRBackend::this_pointer->handleRequestWithSubcommands(call, RKRSupport::SEXPToNestedStrings(_args));
-	if (!ret.warning.isEmpty()) Rf_warning(RKRBackend::fromUtf8(ret.warning).constData());  // print warnings, first, as errors will cause a stop
-	if (!ret.error.isEmpty()) Rf_error(RKRBackend::fromUtf8(ret.error.toLatin1()).constData());
+	if (!ret.warning.isEmpty()) Rf_warning(RKTextCodec::toNative(ret.warning).constData());  // print warnings, first, as errors will cause a stop
+	if (!ret.error.isEmpty()) Rf_error(RKTextCodec::toNative(ret.error).constData());
 
 	return RKRSupport::QVariantToSEXP(ret.ret);
 }
@@ -950,8 +949,8 @@ SEXP doPlainGenericRequest (SEXP call, SEXP synchronous) {
 	R_CheckUserInterrupt ();
 
 	auto ret = RKRBackend::this_pointer->handlePlainGenericRequest(RKRSupport::SEXPToStringList(call), RKRSupport::SEXPToInt(synchronous));
-	if (!ret.warning.isEmpty()) Rf_warning(RKRBackend::fromUtf8(ret.warning).constData());  // print warnings, first, as errors will cause a stop
-	if (!ret.error.isEmpty()) Rf_error(RKRBackend::fromUtf8(ret.error.toLatin1()).constData());
+	if (!ret.warning.isEmpty()) Rf_warning(RKTextCodec::toNative(ret.warning).constData());  // print warnings, first, as errors will cause a stop
+	if (!ret.error.isEmpty()) Rf_error(RKTextCodec::toNative(ret.error).constData());
 
 	return RKRSupport::QVariantToSEXP(ret.ret);
 }
@@ -983,9 +982,6 @@ SEXP doSimpleBackendCall (SEXP _call) {
 	} else if (call == QStringLiteral ("error")) {  // capture error message
 		doError (list.value (1));
 		return R_NilValue;
-	} else if (call == QStringLiteral ("locale.name")) {
-		RK_ASSERT (QTextCodec::codecForLocale());
-		return (RKRSupport::StringListToSEXP (QStringList (QTextCodec::codecForLocale()->name ().data ())));
 	} else if (call == QStringLiteral ("tempdir")) {
 		return (RKRSupport::StringListToSEXP (QStringList (RKRBackendProtocolBackend::dataDir ())));
 	}
@@ -998,13 +994,11 @@ void R_CheckStackWrapper (void *) {
 	R_CheckStack ();
 }
 
-SEXP doUpdateLocale () {
-	RK_TRACE (RBACKEND);
+SEXP doUpdateLocale() {
+	RK_TRACE(RBACKEND);
 
-	RK_DEBUG (RBACKEND, DL_WARNING, "Changing locale");
-	RKRBackend::this_pointer->current_locale_encoder = QStringEncoder(QStringEncoder::System);  // NOTE: shall pass non-representable characters unmodified, rather than stripping them.
-	RKRBackend::this_pointer->current_locale_decoder = QStringDecoder(QStringEncoder::System);
-	RK_DEBUG (RBACKEND, DL_WARNING, "New locale codec is %s", RKRBackend::this_pointer->current_locale_decoder.name());
+	RK_DEBUG(RBACKEND, DL_WARNING, "Changing locale");
+	RKTextCodec::reinit();
 
 	return R_NilValue;
 }
@@ -1251,7 +1245,7 @@ SEXP parseCommand (const QString &command_qstring, RKRBackend::RKWardRError *err
 	SafeParseWrap wrap;
 	wrap.status = PARSE_NULL;
 
-	QByteArray localc = RKRBackend::fromUtf8 (command_qstring); // needed so the string below does not go out of scope
+	QByteArray localc = RKTextCodec::toNative(command_qstring); // needed so the string below does not go out of scope
 	const char *command = localc.data ();
 
 	PROTECT(wrap.cv=Rf_allocVector(STRSXP, 1));
@@ -1477,8 +1471,8 @@ void RKRBackend::catToOutputFile (const QString &out) {
 		RK_ASSERT (false);
 		return;
 	}
-	f.write (RKRBackend::fromUtf8 (out));
-	f.close ();
+	f.write(RKTextCodec::toNative(out));
+	f.close();
 }
 
 void RKRBackend::printCommand (const QString &command) {
@@ -1536,7 +1530,7 @@ void RKRBackend::commandFinished (bool check_object_updates_needed) {
 
 		// This method may look a bit over-complex, but remember that repl_status.user_command_successful_up_to works on an *encoded* buffer
 		QByteArray remainder_encoded = repl_status.user_command_buffer.mid (repl_status.user_command_successful_up_to);
-		QString remainder = current_locale_decoder(remainder_encoded);
+		QString remainder = RKTextCodec::fromNative(remainder_encoded);
 		current_command->has_been_run_up_to = current_command->command.length () - remainder.length ();
 	}
 
diff --git a/rkward/rbackend/rkrbackend.h b/rkward/rbackend/rkrbackend.h
index 80fc284c4..6957b81e2 100644
--- a/rkward/rbackend/rkrbackend.h
+++ b/rkward/rbackend/rkrbackend.h
@@ -14,8 +14,6 @@ SPDX-License-Identifier: GPL-2.0-or-later
 #include <QVariant>
 #include <QRecursiveMutex>
 #include <QStringList>
-#include <QStringEncoder>
-#include <QStringDecoder>
 #include <QEvent>
 
 #include "rcommand.h"
@@ -129,15 +127,6 @@ handleHistoricalSubstackRequest(). Exactly which requests get handled by which f
 	void kill () { killed = ExitNow; };
 	bool isKilled () { return (killed != NotKilled); };
 
-	static QString toUtf8 (const char *local_coded) {
-		return this_pointer->current_locale_decoder(local_coded);
-	}
-	static QByteArray fromUtf8 (const QString &uni_coded) {
-		return this_pointer->current_locale_encoder(uni_coded);
-	}
-	QStringEncoder current_locale_encoder;
-	QStringDecoder current_locale_decoder;
-
 	struct RKReplStatus {
 		QByteArray user_command_buffer;
 		int user_command_transmitted_up_to;
diff --git a/rkward/rbackend/rkrsupport.cpp b/rkward/rbackend/rkrsupport.cpp
index 552e53845..a847d295c 100644
--- a/rkward/rbackend/rkrsupport.cpp
+++ b/rkward/rbackend/rkrsupport.cpp
@@ -9,6 +9,7 @@ SPDX-License-Identifier: GPL-2.0-or-later
 
 #include <Rdefines.h>
 #include <Rversion.h>
+#include <R_ext/Riconv.h>
 
 // needed to detect CHARSXP encoding
 #define IS_UTF8(x) (Rf_getCharCE(x) == CE_UTF8)
@@ -106,7 +107,7 @@ QStringList RKRSupport::SEXPToStringList (SEXP from_exp) {
 				} else if (IS_LATIN1 (dummy)) {
 					list.append (QString::fromLatin1 (CHAR (dummy)));
 				} else {
-					list.append (RKRBackend::toUtf8 (CHAR (dummy)));
+					list.append(RKTextCodec::fromNative(CHAR(dummy)));
 				}
 			}
 		}
@@ -120,12 +121,7 @@ SEXP RKRSupport::StringListToSEXP (const QStringList& list) {
 
 	SEXP ret = Rf_allocVector (STRSXP, list.size ());
 	for (int i = 0; i < list.size (); ++i) {
-#if R_VERSION >= R_Version(2,13,0)
-		SET_STRING_ELT (ret, i, Rf_mkCharCE (list[i].toUtf8 ().constData(), CE_UTF8));
-#else
-		// TODO Rf_mkCharCE _might_ have been introduced earlier. Check if still an ongoing concern.
-		SET_STRING_ELT (ret, i, Rf_mkChar (RKRBackend::fromUtf8 (list[i]).data ()));
-#endif
+		SET_STRING_ELT(ret, i, Rf_mkCharCE(list[i].toUtf8().constData(), CE_UTF8));
 	}
 	return ret;
 }
@@ -151,12 +147,7 @@ SEXP RKRSupport::QVariantToSEXP(const QVariant& var) {
 
 	SEXP ret = Rf_allocVector (STRSXP, list.size ());
 	for (int i = 0; i < list.size (); ++i) {
-#if R_VERSION >= R_Version(2,13,0)
-		SET_STRING_ELT (ret, i, Rf_mkCharCE (list[i].toUtf8 ().constData(), CE_UTF8));
-#else
-		// TODO Rf_mkCharCE _might_ have been introduced earlier. Check if still an ongoing concern.
-		SET_STRING_ELT (ret, i, Rf_mkChar (RKRBackend::fromUtf8 (list[i]).data ()));
-#endif
+		SET_STRING_ELT(ret, i, Rf_mkCharCE(list[i].toUtf8().constData(), CE_UTF8));
 	}
 	return ret;
 }
@@ -400,3 +391,41 @@ RKRShadowEnvironment::Result RKRShadowEnvironment::diffAndUpdate() {
 	RK_DEBUG(RBACKEND, DL_DEBUG, "removed %s\n", qPrintable(res.removed.join(", ")));
 	return res;
 }
+
+QByteArray RKTextCodec::doConv(void *cd, const QByteArray &inp) {
+	const char *inbuf = inp.constData();
+	size_t inbytesleft = inp.size();
+	const char *inbufpos = inbuf;
+	char outbuf[8192];
+	QByteArray ret;
+	while (inbytesleft) {
+		char *outbufpos = outbuf;
+		size_t outbytesleft = 8192;
+		Riconv(cd, nullptr, nullptr, &outbufpos, &outbytesleft); // init
+
+		Riconv(cd, &inbufpos, &inbytesleft, &outbufpos, &outbytesleft);
+		ret += QByteArray(outbuf, 8192-outbytesleft);
+// Do we need 0 termination?
+		if (!inbytesleft) return ret; // done
+
+		if (outbytesleft > 100) {
+			// conversion failed but the output buffer still has plenty of room ->
+			// we must have encountered an invalid / incomplete multibyte char in inbuf. Let's try next char.
+			ret.append(*inbufpos);
+			inbufpos++;
+			inbytesleft--;
+		} // NOTE else: outbuf buffer wasn't lage enough: we just loop
+	}
+	return ret;
+}
+
+void *RKTextCodec::from_native = nullptr;
+void *RKTextCodec::to_native = nullptr;
+void RKTextCodec::reinit() {
+	if (from_native) {
+		Riconv_close(from_native);
+		Riconv_close(to_native);
+	}
+	from_native = Riconv_open("UTF-8", "");
+	to_native = Riconv_open("", "UTF-8");
+}
diff --git a/rkward/rbackend/rkrsupport.h b/rkward/rbackend/rkrsupport.h
index 5385ec6e2..ae1fe3ee1 100644
--- a/rkward/rbackend/rkrsupport.h
+++ b/rkward/rbackend/rkrsupport.h
@@ -58,4 +58,19 @@ private:
 	static SEXP shadowenvbase;
 };
 
+class RKTextCodec {
+public:
+	static QString fromNative(const QByteArray& buf) {
+		return QString::fromUtf8(doConv(from_native, buf));
+	}
+	static QByteArray toNative(const QString& buf) {
+		return doConv(from_native, buf.toUtf8());
+	}
+	static void reinit();
+private:
+	static QByteArray doConv(void *cd, const QByteArray &inp);
+	static void *from_native;
+	static void *to_native;
+};
+
 #endif
diff --git a/rkward/rbackend/rpackages/rkward/R/rk.filename-functions.R b/rkward/rbackend/rpackages/rkward/R/rk.filename-functions.R
index 30ceed1a3..1edee5b88 100644
--- a/rkward/rbackend/rpackages/rkward/R/rk.filename-functions.R
+++ b/rkward/rbackend/rpackages/rkward/R/rk.filename-functions.R
@@ -126,7 +126,14 @@
 	assign (".rk.output.html.file", x, .rk.variables)
 
 	if (!file.exists (x)) {
-		.rk.cat.output (paste ("<?xml version=\"1.0\" encoding=\"", .rk.do.simple.call ("locale.name"), "\"?>\n", sep=""))
+		encoding.name <- function() {
+			li <- l10n_info();
+			if(isTrue(li$'UTF-8')) return("UTF-8")  # hopefully the most common case, these days
+			if(!is.null(li$codeset)) return(li$codeset)
+			if(!is.null(li$codepage)) return(paste0("windows-", li$codepage))
+			return(tail(strsplit(Sys.getlocale("LC_CTYPE", ".") ,1)))
+		}
+		.rk.cat.output (paste ("<?xml version=\"1.0\" encoding=\"", encoding.name(), "\"?>\n", sep=""))
 		.rk.cat.output ("<html><head>\n<title>RKWard Output</title>\n")
 		if (!is.null (css)) {
 			cssfilename <- paste (sub ("\\.[^.]*$", "", basename (x)), ".css", sep="")



More information about the rkward-tracker mailing list