[Kst] branches/work/kst/portto4/kst/src/datasources/ascii
Peter Kümmel
syntheticpp at gmx.net
Sun Oct 21 15:22:36 UTC 2012
SVN commit 1321751 by kuemmel:
don't split whitespace separated colums with QRegEx
M +41 -0 asciidatareader.cpp
M +7 -0 asciidatareader.h
M +22 -12 asciisource.cpp
M +1 -1 asciisource.h
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciidatareader.cpp #1321750:1321751
@@ -22,6 +22,7 @@
#include <QFile>
#include <QDebug>
#include <QMutexLocker>
+#include <QStringList>
#include <ctype.h>
#include <stdlib.h>
@@ -228,6 +229,7 @@
}
return 0;
}
+#undef constData
//
// template instantiation chain to generate optimal code for all possible data configurations
@@ -333,5 +335,44 @@
return n;
}
+//-------------------------------------------------------------------------------------------
+template<>
+int AsciiDataReader::splitColumns<IsWhiteSpace>(const QByteArray& line, const IsWhiteSpace& isWhitespace, QStringList* cols)
+{
+ int colstart = 0;
+ const int size = line.size();
+ //ignore whitespace at the beginning
+ for (; colstart < size && isWhitespace(line[colstart]); colstart++) {}
+ int count = 0;
+ int incol = true;
+ for (int i = colstart; i < size; i++) {
+ // entering column
+ if (!incol && !isWhitespace(line[i])) {
+ incol = true;
+ colstart = i;
+ continue;
+ }
+ // leaving column
+ if (incol && isWhitespace(line[i])) {
+ count++;
+ if (cols) {
+ const QByteArray col(line.constData() + colstart, i - colstart);
+ cols->push_back(QString(col));
+ }
+ incol = false;
+ }
+ }
+ if (incol) {
+ const QByteArray col(line.begin() + colstart, size - 1 - colstart);
+ QString lastCol = QString(col).simplified();
+ if (!lastCol.isEmpty()) {
+ count++;
+ if (cols)
+ cols->push_back(lastCol);
+ }
+ }
+ return count;
+}
+
// vim: ts=2 sw=2 et
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciidatareader.h #1321750:1321751
@@ -44,6 +44,9 @@
int readField(const AsciiFileData &buf, int col, double *v, const QString& field, int s, int n);
int readFieldFromChunk(const AsciiFileData& chunk, int col, double *v, const QString& field);
+ template<typename ColumnDelimiter>
+ static int splitColumns(const QByteArray& line, const ColumnDelimiter& column_del, QStringList* cols = 0);
+
private:
int _numFrames;
AsciiFileBuffer::RowIndex _rowIndex;
@@ -76,5 +79,9 @@
mutable QMutex _localeMutex;
};
+
+template<>
+int AsciiDataReader::splitColumns<AsciiCharacterTraits::IsWhiteSpace>(const QByteArray& line, const AsciiCharacterTraits::IsWhiteSpace& column_del, QStringList* cols);
+
#endif
// vim: ts=2 sw=2 et
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciisource.cpp #1321750:1321751
@@ -405,22 +405,33 @@
//-------------------------------------------------------------------------------------------
-int AsciiSource::splitHeaderLine(const QByteArray& line, AsciiSourceConfig* cfg, QStringList& parts)
+int AsciiSource::splitHeaderLine(const QByteArray& line, const AsciiSourceConfig& cfg, QStringList* stringList)
{
+ QStringList dummy;
+ QStringList& parts(stringList ? *stringList : dummy);
parts.clear();
- const QRegExp regexColumnDelimiter(QString("[%1]").arg(QRegExp::escape(cfg->_columnDelimiter.value())));
+ const QRegExp regexColumnDelimiter(QString("[%1]").arg(QRegExp::escape(cfg._columnDelimiter.value())));
- if (cfg->_columnType == AsciiSourceConfig::Custom && !cfg->_columnDelimiter.value().isEmpty()) {
+ if (cfg._columnType == AsciiSourceConfig::Custom && !cfg._columnDelimiter.value().isEmpty()) {
parts += QString(line).trimmed().split(regexColumnDelimiter, QString::SkipEmptyParts);
- } else if (cfg->_columnType == AsciiSourceConfig::Fixed) {
- int cnt = line.length() / cfg->_columnWidth;
+ } else if (cfg._columnType == AsciiSourceConfig::Fixed) {
+ int cnt = line.length() / cfg._columnWidth;
for (int i = 0; i < cnt; ++i) {
- QString sub = line.mid(i * cfg->_columnWidth).left(cfg->_columnWidth);
+ QString sub = line.mid(i * cfg._columnWidth).left(cfg._columnWidth);
parts += sub.trimmed();
}
} else {
- parts += QString(line).trimmed().split(QRegExp("\\s"), QString::SkipEmptyParts);
+ if (!stringList) {
+ //MeasureTime t("AsciiDataReader::countColumns()");
+ int columns = AsciiDataReader::splitColumns(line, AsciiCharacterTraits::IsWhiteSpace());
+ Q_ASSERT(columns == QString(line).trimmed().split(QRegExp("\\s"), QString::SkipEmptyParts).size());
+ return columns;
+ } else {
+ //MeasureTime t("AsciiDataReader::countColumns(parts)");
+ AsciiDataReader::splitColumns(line, AsciiCharacterTraits::IsWhiteSpace(), &parts);
+ Q_ASSERT(parts == QString(line).trimmed().split(QRegExp("\\s"), QString::SkipEmptyParts));
}
+ }
return parts.count();
}
@@ -444,7 +455,7 @@
int r = line.size();
if (currentLine == fieldsLine && r >= 0) {
QStringList parts;
- AsciiSource::splitHeaderLine(line, cfg, parts);
+ AsciiSource::splitHeaderLine(line, *cfg, &parts);
fields += parts;
break;
}
@@ -480,7 +491,6 @@
int cnt;
int nextscan = 0;
int curscan = 0;
- QStringList parts;
while (!file.atEnd() && !done && (nextscan < 200)) {
QByteArray line = file.readLine();
int r = line.size();
@@ -494,7 +504,7 @@
if (maxcnt >= 0) { //original skip value == 0, so scan some lines
if (curscan >= nextscan) {
if (r > 1 && !regex.exactMatch(line)) {
- cnt = splitHeaderLine(line, cfg, parts);
+ cnt = splitHeaderLine(line, *cfg);
if (cnt > maxcnt) {
maxcnt = cnt;
}
@@ -507,7 +517,7 @@
continue;
}
if (r > 1 && !regex.exactMatch(line)) { //at desired line, find count
- maxcnt = splitHeaderLine(line, cfg, parts);
+ maxcnt = splitHeaderLine(line, *cfg);
done = true;
} else if (r < 0) {
return fields;
@@ -540,7 +550,7 @@
int r = line.size();
if (currentLine == unitsLine && r >= 0) {
QStringList parts;
- AsciiSource::splitHeaderLine(line, cfg, parts);
+ AsciiSource::splitHeaderLine(line, *cfg, &parts);
units += parts;
break;
}
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciisource.h #1321750:1321751
@@ -92,7 +92,7 @@
int columnOfField(const QString& field) const;
- static int splitHeaderLine(const QByteArray& line, AsciiSourceConfig* cfg, QStringList& result);
+ static int splitHeaderLine(const QByteArray& line, const AsciiSourceConfig& cfg, QStringList* parts = 0);
DataInterfaceAsciiString* is;
DataInterfaceAsciiVector* iv;
More information about the Kst
mailing list