[education/rkward] rkward: Add .Rmd parsing (navigation not functional, yet)

Thomas Friedrichsmeier null at kde.org
Mon May 26 15:06:38 BST 2025


Git commit 39163be97a6fa3bcf6ad9387ea7a618b920c3295 by Thomas Friedrichsmeier.
Committed on 26/05/2025 at 14:06.
Pushed by tfry into branch 'master'.

Add .Rmd parsing (navigation not functional, yet)

A  +26   -0    rkward/autotests/data/script1.Rmd
M  +31   -19   rkward/autotests/rkparsedscript_test.cpp
M  +54   -4    rkward/misc/rkparsedscript.cpp
M  +2    -1    rkward/misc/rkparsedscript.h
M  +1    -3    rkward/windows/rkcommandeditorwindow.cpp

https://invent.kde.org/education/rkward/-/commit/39163be97a6fa3bcf6ad9387ea7a618b920c3295

diff --git a/rkward/autotests/data/script1.Rmd b/rkward/autotests/data/script1.Rmd
new file mode 100644
index 000000000..370d57432
--- /dev/null
+++ b/rkward/autotests/data/script1.Rmd
@@ -0,0 +1,26 @@
+<!--
+- This file is part of the RKWard project (https://rkward.kde.org).
+SPDX-FileCopyrightText: by Thomas Friedrichsmeier <thomas.friedrichsmeier at kdemail.net>
+SPDX-FileContributor: The RKWard Team <rkward-devel at kde.org>
+SPDX-License-Identifier: GPL-2.0-or-later
+-->
+
+This is markdown ``{r} inline <- `markdown` `` and this is too, with some `inline(code)`.
+
+Usually, there would be lot of mardown (and this may contain many relevant markers, such as {}, and []).
+
+```{r}
+symb01 <-symb02()
+symb03 <- symb04
+symb05(symb06); symb07
+```
+
+Usually, code chunks shall contain {r}, but we're relaxed about that.
+
+```
+symb11 <-symb12()
+symb13 <- symb14
+symb15(symb16); symb17
+```
+
+Some more text
diff --git a/rkward/autotests/rkparsedscript_test.cpp b/rkward/autotests/rkparsedscript_test.cpp
index 57f4816e0..80cdf823f 100644
--- a/rkward/autotests/rkparsedscript_test.cpp
+++ b/rkward/autotests/rkparsedscript_test.cpp
@@ -41,12 +41,12 @@ class RKParsedScriptTest : public QObject {
 	QString script;
 	RKParsedScript ps;
 
-	void loadScript(const QString &relname) {
+	void loadScript(const QString &relname, bool rmd=false) {
 		QFile f(QStringLiteral(TEST_DATA_DIR) + relname);
 		bool ok = f.open(QIODevice::ReadOnly | QIODevice::Text);
 		QVERIFY(ok);
 		script = QString::fromUtf8(f.readAll());
-		ps = RKParsedScript(script);
+		ps = RKParsedScript(script, rmd);
 	}
 
 	void compareScript(RKParsedScript::ContextIndex pos, const QString &expected) {
@@ -58,25 +58,11 @@ class RKParsedScriptTest : public QObject {
 		compareScript(newpos, expected);
 		return newpos;
 	}
-  private Q_SLOTS:
-	void init() {
-		testLog("Starting next test");
-	}
-
-	void cleanup() {
-		testLog("Cleanup");
-	}
-
-	void initTestCase() {
-		QStandardPaths::setTestModeEnabled(true);
-		RK_Debug::RK_Debug_Level = DL_DEBUG;
-	}
 
-	void sanityTest() {
-		// no matter where we go, and for how long, we shall not crash or hang!
-		loadScript(u"script1.R"_s);
+	void sanityTestHelper() {
 		for (int startpos = 0; startpos < script.length(); ++startpos) {
-			const auto ctx0 = ps.contextAtPos(script.length() / 2);
+			const auto ctx0 = ps.contextAtPos(startpos);
+			testLog("%d", startpos);
 			auto ctx = ctx0;
 			while (ctx.valid())
 				ctx = ps.nextContext(ctx);
@@ -106,6 +92,25 @@ class RKParsedScriptTest : public QObject {
 				ctx = ps.parentRegion(ctx);
 		}
 	}
+  private Q_SLOTS:
+	void init() {
+		testLog("Starting next test");
+	}
+
+	void cleanup() {
+		testLog("Cleanup");
+	}
+
+	void initTestCase() {
+		QStandardPaths::setTestModeEnabled(true);
+		RK_Debug::RK_Debug_Level = DL_DEBUG;
+	}
+
+	void sanityTest() {
+		// no matter where we go, and for how long, we shall not crash or hang!
+		loadScript(u"script1.R"_s);
+		sanityTestHelper();
+	}
 
 	void nextPrevStatement() {
 		loadScript(u"script1.R"_s);
@@ -244,6 +249,13 @@ class RKParsedScriptTest : public QObject {
 		ctx = ps.contextAtPos(script.indexOf(u"Symbol08"));
 		QCOMPARE(script.at(ps.lastPositionInStatement(ctx)), u']');
 	}
+
+	void rmdTest() {
+		loadScript(u"script1.Rmd"_s, true);
+		sanityTestHelper();
+		testLog("%s", qPrintable(ps.serialize()));
+		// TODO real test
+	}
 };
 
 QTEST_MAIN(RKParsedScriptTest)
diff --git a/rkward/misc/rkparsedscript.cpp b/rkward/misc/rkparsedscript.cpp
index 9aa19be9b..9e1446a4d 100644
--- a/rkward/misc/rkparsedscript.cpp
+++ b/rkward/misc/rkparsedscript.cpp
@@ -12,14 +12,64 @@ SPDX-License-Identifier: GPL-2.0-or-later
 
 #include "../debug.h"
 
-RKParsedScript::RKParsedScript(const QString &content) : prevtype(None), allow_merge(true) {
+RKParsedScript::RKParsedScript(const QString &content, bool rmd) : prevtype(None), allow_merge(true) {
 	RK_TRACE(MISC);
 
 	context_list.reserve(200); // just a very wild guess.
-	addContext(Top, -1, content);
-	RK_ASSERT(context_list.front().end == content.size());
+	if (rmd) {
+		context_list.emplace_back(Top, -1, content.size());
+		int i = -1;
+		while (i < content.size()) {
+			i = addNextMarkdownChunk(i, content);
+		}
+	} else {
+		addContext(Top, -1, content);
+		RK_ASSERT(context_list.front().end == content.size());
+	}
 };
 
+int RKParsedScript::addNextMarkdownChunk(int start, const QString &content) {
+	int pos = start;
+	int chunkstart = -1;
+	QString chunk_barrier;
+	while (++pos < content.length()) {
+		QChar c = content.at(pos);
+		if (c == u'\\') ++pos;
+		else if (c == u'`') {
+			chunk_barrier = c;
+			while (++pos < content.length() && (content.at(pos) == u'`')) {
+				chunk_barrier.append(u'`');
+			}
+			// if there is a leading parameter block, skip that
+			if ((pos < content.length()) && content.at(pos) == u'{') {
+				while (++pos < content.length()) {
+					c = content.at(pos);
+					if (c == u'\\') ++pos;
+					else if (c == u'}') {
+						++pos;
+						break;
+					}
+				}
+			}
+			chunkstart = pos;
+			break;
+		}
+	}
+	if (chunkstart < 0 || chunkstart >= content.size()) {
+		context_list.emplace_back(Comment, start, content.size());
+		return content.size();
+	}
+	int chunkend = content.indexOf(chunk_barrier, chunkstart);
+	if (chunkend < 0) chunkend = content.size();
+
+	context_list.emplace_back(Comment, start, chunkstart-1);
+	addContext(Delimiter, chunkstart-1, content);
+	addContext(Top, chunkstart, content.left(chunkend)); // in case mardown region has incomplete syntax
+	                                                     // limit parsing to the actual markdown region
+	addContext(Delimiter, chunkend, content);
+	return chunkend + chunk_barrier.length();
+}
+
 int RKParsedScript::addContext(ContextType type, int start, const QString &content) {
 	RK_TRACE(MISC);
 	int index = context_list.size();
@@ -239,7 +289,7 @@ RKParsedScript::ContextIndex RKParsedScript::nextStatement(const ContextIndex fr
 	// forward past end of current statement
 	auto ni = nextContext(lastContextInStatement(from));
 	// consider advancing from "b" in "a = (b + c) + d; e" -> should be e, not "+ d"
-	while (getContext(ni).type != Delimiter) ni = nextContext(lastContextInStatement(ni));
+	while (ni.valid() && getContext(ni).type != Delimiter) ni = nextContext(lastContextInStatement(ni));
 	// skip over any following non-interesting contexts
 	while (true) {
 		auto type = getContext(ni).type;
diff --git a/rkward/misc/rkparsedscript.h b/rkward/misc/rkparsedscript.h
index efb80a94c..d25c1c6ca 100644
--- a/rkward/misc/rkparsedscript.h
+++ b/rkward/misc/rkparsedscript.h
@@ -51,7 +51,7 @@ class RKParsedScript {
 		int end;
 	};
 
-	explicit RKParsedScript(const QString &content = QString());
+	explicit RKParsedScript(const QString &content = QString(), bool rmd=false);
 
 	enum SearchFlags {
 		NoFlags,
@@ -111,6 +111,7 @@ class RKParsedScript {
   private:
 	// add and parse a context. This is where the actual parsing takes place
 	int addContext(ContextType type, int start, const QString &content);
+	int addNextMarkdownChunk(int start, const QString &content);
 
 	friend class RKParsedScriptTest;
 	// NOTE: used in debugging, only
diff --git a/rkward/windows/rkcommandeditorwindow.cpp b/rkward/windows/rkcommandeditorwindow.cpp
index 3b93b1055..5d4edb1e6 100644
--- a/rkward/windows/rkcommandeditorwindow.cpp
+++ b/rkward/windows/rkcommandeditorwindow.cpp
@@ -86,14 +86,12 @@ class RKCodeNavigation : public QWidget {
 		box->addWidget(input);
 		connect(input, &QLineEdit::textChanged, this, &RKCodeNavigation::navigate);
 
-		ps = RKParsedScript(doc->text());
+		ps = RKParsedScript(doc->text(), doc->highlightingMode() == u"R Markdown"_s);
 		StoredPosition initial;
 		// translate cursor position to string index
 		initial.pos = cursorToPosition(view->cursorPosition());
 		initial.selection = view->selectionRange();
 		stored_positions.append(initial);
-
-		bool multilanguage = doc->embeddedHighlightingModes().size() > 1;
 	}
 
 	void updatePos() {



More information about the rkward-tracker mailing list