[education/rkward] rkward: Split out RKParsedScript-class into separate files
Thomas Friedrichsmeier
null at kde.org
Mon May 26 15:06:38 BST 2025
Git commit 730cc38a87681ebd788e8df87238d10ee3e470e8 by Thomas Friedrichsmeier.
Committed on 26/05/2025 at 14:06.
Pushed by tfry into branch 'master'.
Split out RKParsedScript-class into separate files
M +1 -0 rkward/misc/CMakeLists.txt
A +149 -0 rkward/misc/rkparsedscript.cpp [License: GPL(v2.0+)]
A +77 -0 rkward/misc/rkparsedscript.h [License: GPL(v2.0+)]
M +0 -178 rkward/windows/rkcommandeditorwindow.cpp
https://invent.kde.org/education/rkward/-/commit/730cc38a87681ebd788e8df87238d10ee3e470e8
diff --git a/rkward/misc/CMakeLists.txt b/rkward/misc/CMakeLists.txt
index ff0da775d..af1ee9f76 100644
--- a/rkward/misc/CMakeLists.txt
+++ b/rkward/misc/CMakeLists.txt
@@ -33,6 +33,7 @@ SET(misc_STAT_SRCS
rkdialogbuttonbox.cpp
rkoutputdirectory.cpp
rkstyle.cpp
+ rkparsedscript.cpp
rkparsedversion.cpp
rkradiogroup.cpp
rkrapimenu.cpp
diff --git a/rkward/misc/rkparsedscript.cpp b/rkward/misc/rkparsedscript.cpp
new file mode 100644
index 000000000..96b1c89d4
--- /dev/null
+++ b/rkward/misc/rkparsedscript.cpp
@@ -0,0 +1,149 @@
+/*
+celleditor - This file is part of the RKWard project. Created: Sat May 17 2025
+SPDX-FileCopyrightText: 2025 by Thomas Friedrichsmeier <thomas.friedrichsmeier at kdemail.net>
+SPDX-FileContributor: The RKWard Team <rkward-devel at kde.org>
+SPDX-License-Identifier: GPL-2.0-or-later
+*/
+#include "rkparsedscript.h"
+
+#include <QChar>
+
+#include <limits.h>
+
+#include "../debug.h"
+
+RKParsedScript::RKParsedScript(const QString &content) {
+ context_list.reserve(200); // just a very wild guess
+ addContext(Top, -1, content);
+};
+
+int RKParsedScript::addContext(ContextType type, int start, const QString &content) {
+ ContextType prevtype = context_list.empty() ? None : context_list.back().type;
+
+ int index = context_list.size();
+ // some contexts need (or benefit from) special handling depending on the preceding context
+ if (type == OtherOperator && prevtype == OtherOperator) {
+ // Merge any two subsequent operators into one token
+ // i.e. do not add a context, we'll reuse the previous one.
+ --index;
+ } else if (type == Delimiter && content.at(start) == u'\n' && (prevtype == OtherOperator || prevtype == SubsetOperator)) {
+ // newlines do not count as delimiter on operator RHS, so skip ahead, instead of really adding this
+ return start;
+ } else {
+ context_list.emplace_back(type, start); // end will be filled in, later
+ }
+
+ int pos = start;
+ if (type == SingleQuoted || type == DoubleQuoted || type == BackQuoted) {
+ while (++pos < content.length()) {
+ const QChar c = content.at(pos);
+ if (c == u'\\') ++pos;
+ else if (c == u'\'' && type == SingleQuoted) break;
+ else if (c == u'"' && type == DoubleQuoted) break;
+ else if (c == u'`' && type == BackQuoted) break;
+ }
+ } else if (type == AnySymbol) {
+ while (++pos < content.length()) {
+ const QChar c = content.at(pos);
+ if (!c.isLetterOrNumber() && c != u'.') {
+ --pos;
+ break;
+ }
+ }
+ } else if (type == Comment) {
+ while (++pos < content.length()) {
+ if (content.at(pos) == u'\n') break;
+ }
+ } else if (type == OtherOperator || type == SubsetOperator || type == Delimiter) {
+ // leave context, immediately
+ } else {
+ while (++pos < content.length()) {
+ QChar c = content.at(pos);
+ if (c == u'\'') pos = addContext(SingleQuoted, pos, content);
+ else if (c == u'"') pos = addContext(DoubleQuoted, pos, content);
+ else if (c == u'`') pos = addContext(BackQuoted, pos, content);
+ else if (c == u'#') pos = addContext(Comment, pos, content);
+ else if (c == u'(') pos = addContext(Parenthesis, pos, content);
+ else if (c == u')' && type == Parenthesis) break;
+ else if (c == u'{') pos = addContext(Brace, pos, content);
+ else if (c == u'}' && type == Brace) break;
+ else if (c == u'[') pos = addContext(Bracket, pos, content);
+ else if (c == u']' && type == Bracket) break;
+ else if (c.isLetterOrNumber() || c == u'.') pos = addContext(AnySymbol, pos, content);
+ else if (c == u'\n' || c == u',' || c == u';') pos = addContext(Delimiter, pos, content);
+ else if (c == u'$' || c == u'@') pos = addContext(SubsetOperator, pos, content);
+ else if (!c.isSpace()) pos = addContext(OtherOperator, pos, content);
+ }
+ }
+
+ // NOTE: we can't just keep a reference to the context at the start of this function, as the vector
+ // may re-allocate during nested parsing
+ context_list.at(index).end = pos;
+ return pos;
+};
+
+int RKParsedScript::contextAtPos(int pos) const {
+ // Context 0 is Top, not really of interest
+ for (int i = 1; i < context_list.size(); ++i) {
+ if (context_list.at(i).start > pos) {
+ return i - 1;
+ }
+ }
+ return 0;
+}
+
+// NOTE: used in debugging, only
+QString RKParsedScript::serialize() const {
+ QString ret;
+ std::vector<Context> stack;
+ stack.push_back(Context(None, -1, INT_MAX)); // dummy context, to avoid empty stack
+
+ for (unsigned int i = 0; i < context_list.size(); ++i) {
+ const auto ctx = context_list.at(i);
+
+ // end any finished contexts
+ while (ctx.start >= stack.back().end) {
+ ret += serializeContextEnd(stack.back(), stack.size());
+ stack.pop_back();
+ }
+
+ // now deal with the current context
+ stack.push_back(ctx);
+ const auto type = ctx.type;
+ if (type == Parenthesis) ret += u'(';
+ if (type == Brace) ret += u'{';
+ if (type == Bracket) ret += u'[';
+ if (type == SingleQuoted) ret += u'\'';
+ if (type == DoubleQuoted) ret += u'"';
+ if (type == BackQuoted) ret += u'`';
+ if (type == Comment) ret += u'#';
+ if (type == SubsetOperator) ret += u'$';
+ if (type == OtherOperator) ret += u'+';
+ if (type == AnySymbol) ret += u'x';
+ }
+ while (!stack.empty()) {
+ ret += serializeContextEnd(stack.back(), stack.size());
+ stack.pop_back();
+ }
+
+ return ret;
+}
+
+using namespace Qt::Literals::StringLiterals;
+
+QString RKParsedScript::serializeContextEnd(const Context &ctx, int level) const {
+ const auto ptype = ctx.type;
+
+ if (ptype == Parenthesis) return u")"_s;
+ if (ptype == Brace) return u"}"_s;
+ if (ptype == Bracket) return u"]"_s;
+ if (ptype == SingleQuoted) return u"'"_s;
+ if (ptype == DoubleQuoted) return u"\""_s;
+ if (ptype == BackQuoted) return u"`"_s;
+ if (ptype == Comment || ptype == Delimiter) {
+ QString ret = u"\n"_s;
+ for (int j = 0; j < (level-1) * 4; ++j) ret += u" "_s;
+ return ret;
+ }
+ return QString();
+}
diff --git a/rkward/misc/rkparsedscript.h b/rkward/misc/rkparsedscript.h
new file mode 100644
index 000000000..f08b5aa36
--- /dev/null
+++ b/rkward/misc/rkparsedscript.h
@@ -0,0 +1,77 @@
+/*
+celleditor - This file is part of the RKWard project. Created: Sat May 17 2025
+SPDX-FileCopyrightText: 2025 by Thomas Friedrichsmeier <thomas.friedrichsmeier at kdemail.net>
+SPDX-FileContributor: The RKWard Team <rkward-devel at kde.org>
+SPDX-License-Identifier: GPL-2.0-or-later
+*/
+#ifndef RKPARSEDSCRIPT_H
+#define RKPARSEDSCRIPT_H
+
+#include <QString>
+
+#include <vector>
+
+#include "../debug.h"
+
+/** Very crude, but very fast R parser, with some helper functions for code navigation. Parses the basic structure, only
+
+Technical note on data structure: While, logically, contexts form a nested hierarchy, a nested data layout does not really lend itself
+to our purpose, which is to navigate the underlying code, sequentially. So rather, we keep a flat list of contexts, sorted (inherently, during parsing)
+by start position.
+
+Inside this flat list, a child context is defined by starting after (or at) the parent's start, and ending before (or at) the parent's end. Child
+contexts are always found after their parent in the list.
+
+Type of context. Parenthesis, Brace, and Bracket are the only ContextType s that we actually consider as nested.
+*/
+class RKParsedScript {
+ public:
+ enum ContextType {
+ None,
+ Top,
+ Parenthesis,
+ Brace,
+ Bracket,
+ Comment,
+ SingleQuoted,
+ DoubleQuoted,
+ BackQuoted,
+ SubsetOperator,
+ OtherOperator,
+ Delimiter,
+ AnySymbol
+ };
+
+ struct Context {
+ Context(ContextType type, int start) : type(type), start(start) {};
+ Context(ContextType type, int start, int end) : type(type), start(start), end(end) {};
+ ContextType type;
+ int start;
+ int end;
+ };
+
+ RKParsedScript(const QString &content);
+
+ /** Find the (index of the) innermost context containing pos.
+ * returns the previous context, if no context actually contains this position (e.g. on a space) */
+ int contextAtPos(int pos) const;
+
+ const Context &getContext(int index) const {
+ return context_list.at(index);
+ }
+
+ private:
+ // add and parse a context. This is where the actual parsing takes place
+ int addContext(ContextType type, int start, const QString &content);
+
+friend class RKCodeNavigation;
+ // NOTE: used in debugging, only
+ QString serialize() const;
+ QString serializeContextEnd(const Context &ctx, int level) const;
+
+ // I want to modify some objects in place during parsing, without triggering copy-on-write
+ // hence no Qt container
+ std::vector<Context> context_list;
+};
+
+#endif
diff --git a/rkward/windows/rkcommandeditorwindow.cpp b/rkward/windows/rkcommandeditorwindow.cpp
index 0f2b68673..902b5666c 100644
--- a/rkward/windows/rkcommandeditorwindow.cpp
+++ b/rkward/windows/rkcommandeditorwindow.cpp
@@ -65,184 +65,6 @@ SPDX-License-Identifier: GPL-2.0-or-later
#include "rktexthints.h"
#include "rkworkplace.h"
-#include "../debug.h"
-
-/** Very crude, but very fast R parser. Parses the basic structure, only
-
-Technical note on data structure: While, logically, contexts form a nested hierarchy, a nested data layout does not really lend itself
-to our purpose, which is to navigate the underlying code, sequentially. So rather, we keep a flat list of contexts, sorted (inherently, during parsing)
-by start position.
-
-Inside this flat list, a child context is defined by starting after (or at) the parent's start, and ending before (or at) the parent's end. Child
-contexts are always found after their parent in the list.
-*/
-class RKParsedScript {
-public:
- enum ContextType {
- None,
- Top,
- Parenthesis,
- Brace,
- Bracket,
- Comment, // 5
- SingleQuoted,
- DoubleQuoted,
- BackQuoted,
- SubsetOperator,
- OtherOperator, // 10
- Delimiter,
- AnySymbol
- };
-
- struct Context {
- Context(ContextType type, int start) : type(type), start(start) {};
- Context(ContextType type, int start, int end) : type(type), start(start), end(end) {};
- ContextType type;
- int start;
- int end;
- };
-
- std::vector<Context> context_list;
-
- int contextAtPos(int pos) const {
- // Context 0 is Top, not really of interest
- for (int i = 1; i < context_list.size(); ++i) {
- if (context_list.at(i).start > pos) {
- return i - 1;
- }
- }
- return 0;
- }
-
- const Context &getContext(int index) const {
- return context_list.at(index);
- }
-
- RKParsedScript(const QString &content) {
- context_list.reserve(200); // just a very wild guess
- addContext(Top, -1, content);
- };
-
- int addContext(ContextType type, int start, const QString &content) {
- ContextType prevtype = context_list.empty() ? None : context_list.back().type;
-
- int index = context_list.size();
- // some contexts need (or benefit from) special handling depending on the preceding context
- if (type == OtherOperator && prevtype == OtherOperator) {
- // Merge any two subsequent operators into one token
- // i.e. do not add a context, we'll reuse the previous one.
- --index;
- } else if (type == Delimiter && content.at(start) == u'\n' && (prevtype == OtherOperator || prevtype == SubsetOperator)) {
- // newlines do not count as delimiter on operator RHS, so skip ahead, instead of really adding this
- return start;
- } else {
- context_list.emplace_back(type, start); // end will be filled in, later
- }
-
- int pos = start;
- if (type == SingleQuoted || type == DoubleQuoted || type == BackQuoted) {
- while (++pos < content.length()) {
- const QChar c = content.at(pos);
- if (c == u'\\') ++pos;
- else if (c == u'\'' && type == SingleQuoted) break;
- else if (c == u'"' && type == DoubleQuoted) break;
- else if (c == u'`' && type == BackQuoted) break;
- }
- } else if (type == AnySymbol) {
- while (++pos < content.length()) {
- const QChar c = content.at(pos);
- if (!c.isLetterOrNumber() && c != u'.') {
- --pos;
- break;
- }
- }
- } else if (type == Comment) {
- while (++pos < content.length()) {
- if (content.at(pos) == u'\n') break;
- }
- } else if (type == OtherOperator || type == SubsetOperator || type == Delimiter) {
- // leave context, immediately
- } else {
- while (++pos < content.length()) {
- QChar c = content.at(pos);
- if (c == u'\'') pos = addContext(SingleQuoted, pos, content);
- else if (c == u'"') pos = addContext(DoubleQuoted, pos, content);
- else if (c == u'`') pos = addContext(BackQuoted, pos, content);
- else if (c == u'#') pos = addContext(Comment, pos, content);
- else if (c == u'(') pos = addContext(Parenthesis, pos, content);
- else if (c == u')' && type == Parenthesis) break;
- else if (c == u'{') pos = addContext(Brace, pos, content);
- else if (c == u'}' && type == Brace) break;
- else if (c == u'[') pos = addContext(Bracket, pos, content);
- else if (c == u']' && type == Bracket) break;
- else if (c.isLetterOrNumber() || c == u'.') pos = addContext(AnySymbol, pos, content);
- else if (c == u'\n' || c == u',' || c == u';') pos = addContext(Delimiter, pos, content);
- else if (c == u'$' || c == u'@') pos = addContext(SubsetOperator, pos, content);
- else if (!c.isSpace()) pos = addContext(OtherOperator, pos, content);
- }
- }
-
- // NOTE: we can't just keep a reference to the context at the start of this function, as the vector
- // may re-allocate during nested parsing
- context_list.at(index).end = pos;
- return pos;
- };
-
- // NOTE: used in debugging, only
- QString serialize() {
- QString ret;
- std::vector<Context> stack;
- stack.push_back(Context(None, -1, INT_MAX)); // dummy context, to avoid empty stack
-
- for (unsigned int i = 0; i < context_list.size(); ++i) {
- const auto ctx = context_list.at(i);
-
- // end any finished contexts
- while (ctx.start >= stack.back().end) {
- ret += endContext(stack.back(), stack.size());
- stack.pop_back();
- }
-
- // now deal with the current context
- stack.push_back(ctx);
- const auto type = ctx.type;
- if (type == Parenthesis) ret += u'(';
- if (type == Brace) ret += u'{';
- if (type == Bracket) ret += u'[';
- if (type == SingleQuoted) ret += u'\'';
- if (type == DoubleQuoted) ret += u'"';
- if (type == BackQuoted) ret += u'`';
- if (type == Comment) ret += u'#';
- if (type == SubsetOperator) ret += u'$';
- if (type == OtherOperator) ret += u'+';
- if (type == AnySymbol) ret += u'x';
- }
- while (!stack.empty()) {
- ret += endContext(stack.back(), stack.size());
- stack.pop_back();
- }
-
- return ret;
- }
-
- QString endContext(const Context &ctx, int level) {
- const auto ptype = ctx.type;
-
- if (ptype == Parenthesis) return u")"_s;
- if (ptype == Brace) return u"}"_s;
- if (ptype == Bracket) return u"]"_s;
- if (ptype == SingleQuoted) return u"'"_s;
- if (ptype == DoubleQuoted) return u"\""_s;
- if (ptype == BackQuoted) return u"`"_s;
- if (ptype == Comment || ptype == Delimiter) {
- QString ret = u"\n"_s;
- for (int j = 0; j < (level-1) * 4; ++j) ret += u" "_s;
- return ret;
- }
- return QString();
- }
-};
-
class RKCodeNavigation : public QWidget {
private:
RKCodeNavigation(KTextEditor::View *view) : QWidget(view, Qt::Popup | Qt::FramelessWindowHint | Qt::BypassWindowManagerHint), view(view), doc(view->document()) {
More information about the rkward-tracker
mailing list