[education/rkward] rkward/windows: Change parse data represenation to flat list

Thomas Friedrichsmeier null at kde.org
Mon May 26 15:06:38 BST 2025


Git commit ab4e3772b06c593b99c4dac1cae8dc3a59d6ee51 by Thomas Friedrichsmeier.
Committed on 26/05/2025 at 14:06.
Pushed by tfry into branch 'master'.

Change parse data represenation to flat list

M  +141  -85   rkward/windows/rkcommandeditorwindow.cpp

https://invent.kde.org/education/rkward/-/commit/ab4e3772b06c593b99c4dac1cae8dc3a59d6ee51

diff --git a/rkward/windows/rkcommandeditorwindow.cpp b/rkward/windows/rkcommandeditorwindow.cpp
index 14a94b0bd..0f2b68673 100644
--- a/rkward/windows/rkcommandeditorwindow.cpp
+++ b/rkward/windows/rkcommandeditorwindow.cpp
@@ -1,6 +1,6 @@
 /*
 rkcommandeditorwindow - This file is part of RKWard (https://rkward.kde.org). Created: Mon Aug 30 2004
-SPDX-FileCopyrightText: 2004-2022 by Thomas Friedrichsmeier <thomas.friedrichsmeier at kdemail.net>
+SPDX-FileCopyrightText: 2004-2025 by Thomas Friedrichsmeier <thomas.friedrichsmeier at kdemail.net>
 SPDX-FileContributor: The RKWard Team <rkward-devel at kde.org>
 SPDX-License-Identifier: GPL-2.0-or-later
 */
@@ -67,27 +67,79 @@ SPDX-License-Identifier: GPL-2.0-or-later
 
 #include "../debug.h"
 
-/** Very crude, but very fast R parser. Parses the basic structure, only */
-class RKScriptContext {
+/** Very crude, but very fast R parser. Parses the basic structure, only
+
+Technical note on data structure: While, logically, contexts form a nested hierarchy, a nested data layout does not really lend itself
+to our purpose, which is to navigate the underlying code, sequentially. So rather, we keep a flat list of contexts, sorted (inherently, during parsing)
+by start position.
+
+Inside this flat list, a child context is defined by starting after (or at) the parent's start, and ending before (or at) the parent's end. Child
+contexts are always found after their parent in the list.
+*/
+class RKParsedScript {
 public:
 	enum ContextType {
+		None,
 		Top,
 		Parenthesis,
 		Brace,
 		Bracket,
-		Comment,
+		Comment, // 5
 		SingleQuoted,
 		DoubleQuoted,
 		BackQuoted,
 		SubsetOperator,
-		OtherOperator,
+		OtherOperator, // 10
 		Delimiter,
 		AnySymbol
-	} type;
+	};
+
+	struct Context {
+		Context(ContextType type, int start) : type(type), start(start) {};
+		Context(ContextType type, int start, int end) : type(type), start(start), end(end) {};
+		ContextType type;
+		int start;
+		int end;
+	};
+
+	std::vector<Context> context_list;
+
+	int contextAtPos(int pos) const {
+		// Context 0 is Top, not really of interest
+		for (int i = 1; i < context_list.size(); ++i) {
+			if (context_list.at(i).start > pos) {
+				return i - 1;
+			}
+		}
+		return 0;
+	}
+	
+	const Context &getContext(int index) const {
+		return context_list.at(index);
+	}
+
+	RKParsedScript(const QString &content) {
+		context_list.reserve(200); // just a very wild guess
+		addContext(Top, -1, content);
+	};
+
+	int addContext(ContextType type, int start, const QString &content) {
+		ContextType prevtype = context_list.empty() ? None : context_list.back().type;
 
-	RKScriptContext(ContextType type, int start, const QStringView &content) : type(type), start(start) {
-		int pos = start-1;
+		int index = context_list.size();
+		// some contexts need (or benefit from) special handling depending on the preceding context
+		if (type == OtherOperator && prevtype == OtherOperator) {
+			// Merge any two subsequent operators into one token
+			// i.e. do not add a context, we'll reuse the previous one.
+			--index;
+		} else if (type == Delimiter && content.at(start) == u'\n' && (prevtype == OtherOperator || prevtype == SubsetOperator)) {
+			// newlines do not count as delimiter on operator RHS, so skip ahead, instead of really adding this
+			return start;
+		} else {
+			context_list.emplace_back(type, start); // end will be filled in, later
+		}
 
+		int pos = start;
 		if (type == SingleQuoted || type == DoubleQuoted || type == BackQuoted) {
 			while (++pos < content.length()) {
 				const QChar c = content.at(pos);
@@ -130,65 +182,65 @@ public:
 			}
 		}
 
-		end = pos;
-	}
+		// NOTE: we can't just keep a reference to the context at the start of this function, as the vector
+		//       may re-allocate during nested parsing
+		context_list.at(index).end = pos;
+		return pos;
+	};
 
-	int addContext(ContextType type, int start, const QStringView &content) {
-		auto ctx = RKScriptContext(type, start+1, content);
-		// post cleanups (all these depend on the previous context at this level:
-		if (!children.isEmpty()) {
-			auto &prev = children.last();
-			auto prevtype = prev.type;
-			// Merge any two subsequent operators into one token
-			if (type == OtherOperator && prevtype == OtherOperator) {
-				prev.end = ctx.end;
-				return ctx.end;
-			}
-			// newlines do not count as delimiter on operator RHS
-			if (type == Delimiter && content.at(start) == '\n' && (prevtype == OtherOperator || prevtype == SubsetOperator)) {
-				return ctx.end;
-			}
-			// TODO: post cleanup (possibly incomplete list):
-			//       - special treatment for subsetting operators: Treat as continuation of symbol -> but maybe not here?
-		}
+	// NOTE: used in debugging, only
+	QString serialize() {
+		QString ret;
+		std::vector<Context> stack;
+		stack.push_back(Context(None, -1, INT_MAX)); // dummy context, to avoid empty stack
 
-		children.append(ctx);
-		return ctx.end;
-	}
+		for (unsigned int i = 0; i < context_list.size(); ++i) {
+			const auto ctx = context_list.at(i);
 
-	// purely for debugging:
-	QString serialize(int level=0) const {
-		QString ret;
+			// end any finished contexts
+			while (ctx.start >= stack.back().end) {
+				ret += endContext(stack.back(), stack.size());
+				stack.pop_back();
+			}
 
-		if (type == Parenthesis) ret += u'(';
-		if (type == Brace) ret += u'{';
-		if (type == Bracket) ret += u'[';
-		if (type == SingleQuoted) ret += u'\'';
-		if (type == DoubleQuoted) ret += u'"';
-		if (type == BackQuoted) ret += u'`';
-		if (type == Comment) ret += u'#';
-		if (type == SubsetOperator) ret += u'$';
-		if (type == OtherOperator) ret += u'+';
-		if (type == AnySymbol) ret += u'x';
-
-		for(const auto &c : std::as_const(children)) ret += c.serialize(level+4);
-
-		if (type == Parenthesis) ret += u')';
-		if (type == Brace) ret += u'}';
-		if (type == Bracket) ret += u']';
-		if (type == SingleQuoted) ret += u'\'';
-		if (type == DoubleQuoted) ret += u'"';
-		if (type == BackQuoted) ret += u'`';
-		if (type == Comment || type == Delimiter) {
-			ret += u'\n';
-			for (int i = 0; i < level; ++i) ret.append(u' ');
+			// now deal with the current context
+			stack.push_back(ctx);
+			const auto type = ctx.type;
+			if (type == Parenthesis) ret += u'(';
+			if (type == Brace) ret += u'{';
+			if (type == Bracket) ret += u'[';
+			if (type == SingleQuoted) ret += u'\'';
+			if (type == DoubleQuoted) ret += u'"';
+			if (type == BackQuoted) ret += u'`';
+			if (type == Comment) ret += u'#';
+			if (type == SubsetOperator) ret += u'$';
+			if (type == OtherOperator) ret += u'+';
+			if (type == AnySymbol) ret += u'x';
 		}
+		while (!stack.empty()) {
+			ret += endContext(stack.back(), stack.size());
+			stack.pop_back();
+		}
+
 		return ret;
 	}
 
-	int start;
-	int end;
-	QList<RKScriptContext> children;
+	QString endContext(const Context &ctx, int level) {
+		const auto ptype = ctx.type;
+
+		if (ptype == Parenthesis) return u")"_s;
+		if (ptype == Brace) return u"}"_s;
+		if (ptype == Bracket) return u"]"_s;
+		if (ptype == SingleQuoted) return u"'"_s;
+		if (ptype == DoubleQuoted) return u"\""_s;
+		if (ptype == BackQuoted) return u"`"_s;
+		if (ptype == Comment || ptype == Delimiter) {
+			QString ret = u"\n"_s;
+			for (int j = 0; j < (level-1) * 4; ++j) ret += u" "_s;
+			return ret;
+		}
+		return QString();
+	}
 };
 
 class RKCodeNavigation : public QWidget {
@@ -226,33 +278,37 @@ class RKCodeNavigation : public QWidget {
 	}
 
 	void navigate(const QString &current) {
-		qDebug("%s", qPrintable(RKScriptContext(RKScriptContext::Top, 0, doc->text()).serialize()));
-/*		auto cursor = view->cursorPosition();
-		enum { Seek, FoundStart, FoundEnd, eof } status = Seek;
-		for (; cursor.advance(); status != Seek) {
-			auto s = doc->defaultStyleAt(cursor);
-			// Skip over all comments (any easier to type logic for this?)
-			if (s == KSyntaxHighlighting::Theme::Comment ||
-			s == KSyntaxHighlighting::Theme::Documentation ||
-			s == KSyntaxHighlighting::Theme::Annotation ||
-			s == KSyntaxHighlighting::Theme::CommentVar ||
-			s == KSyntaxHighlighting::Theme::RegionMarker ||
-			s == KSyntaxHighlighting::Theme::Information ||
-			s == KSyntaxHighlighting::Theme::Warning ||
-			s == KSyntaxHighlighting::Theme::Alert) {
-				continue;
-			}
-			// Skip over all strings (any easier to type logic for this?)
-			if (s == KSyntaxHighlighting::Theme::Char ||
-			    s == KSyntaxHighlighting::Theme::SpecialChar ||
-			    s == KSyntaxHighlighting::Theme::String ||
-			    s == KSyntaxHighlighting::Theme::VerbatimString ||
-			    s == KSyntaxHighlighting::Theme::SpecialString) {
-				continue;
-			}
-			if (s == KSyntaxHighlighting::Theme::Operator ) {
+		// TODO: cache the parse tree. But for testing, it's not so bad to have it all parsed per keypress
+		RKParsedScript tree(doc->text());
+//		qDebug("%s", qPrintable(tree.serialize()));
+
+		// translate cursor position to string index
+		const auto cursor = view->cursorPosition();
+		int pos = cursor.column();
+		for (int l = 0; l < cursor.line(); ++l) {
+			pos += doc->lineLength(l) + 1; 
+		}
+
+		// then find out, where that is in the parse tree
+		auto ci = tree.contextAtPos(pos);
+
+		// apply navigation command
+		QChar command = current.back();
+		int newpos = pos;
+		if (command == u'n') {
+#warning Will overflow-crash and is not yet correct, either
+			 newpos = tree.getContext(ci+1).start;
+		}
+		RK_DEBUG(COMMANDEDITOR, DL_WARNING, "navigate %d to %d", pos, newpos);
+
+		// translate new position back to cursor coordinates
+		for (int l = 0; l < doc->lines(); ++l) {
+			newpos -= (doc->lineLength(l) + 1);
+			if (newpos < 0) {
+				view->setCursorPosition(KTextEditor::Cursor(l, newpos + doc->lineLength(l) + 1));
+				break;
 			}
-		} */
+		}
 	}
 
 	bool eventFilter(QObject *, QEvent *event) override {



More information about the rkward-tracker mailing list