[Kst] branches/work/kst/portto4/kst/src/datasources/ascii

Peter Kümmel syntheticpp at gmx.net
Sun Oct 21 15:22:36 UTC 2012


SVN commit 1321751 by kuemmel:

don't split whitespace separated colums with QRegEx

 M  +41 -0     asciidatareader.cpp  
 M  +7 -0      asciidatareader.h  
 M  +22 -12    asciisource.cpp  
 M  +1 -1      asciisource.h  


--- branches/work/kst/portto4/kst/src/datasources/ascii/asciidatareader.cpp #1321750:1321751
@@ -22,6 +22,7 @@
 #include <QFile>
 #include <QDebug>
 #include <QMutexLocker>
+#include <QStringList>
 #include <ctype.h>
 #include <stdlib.h>
 
@@ -228,6 +229,7 @@
   }
   return 0;
 }
+#undef constData
 
 //
 // template instantiation chain to generate optimal code for all possible data configurations
@@ -333,5 +335,44 @@
   return n;
 }
 
+//-------------------------------------------------------------------------------------------
+template<>
+int AsciiDataReader::splitColumns<IsWhiteSpace>(const QByteArray& line, const IsWhiteSpace& isWhitespace, QStringList* cols)
+{
+  int colstart = 0;
+  const int size =  line.size();
+  //ignore whitespace at the beginning
+  for (; colstart < size && isWhitespace(line[colstart]); colstart++) {}
+  int count = 0;
+  int incol = true;
+  for (int i = colstart; i < size; i++) {
+    // entering column
+    if (!incol && !isWhitespace(line[i])) {
+      incol = true;
+      colstart = i;
+      continue;
+    }
+    // leaving column
+    if (incol && isWhitespace(line[i])) {
+      count++;
+      if (cols) {
+        const QByteArray col(line.constData() + colstart, i - colstart);
+        cols->push_back(QString(col));
+      }
+      incol = false;
+    }
+  }
+  if (incol) {
+    const QByteArray col(line.begin() + colstart, size - 1 - colstart);
+    QString lastCol = QString(col).simplified();
+    if (!lastCol.isEmpty()) {
+      count++;
+      if (cols)
+        cols->push_back(lastCol);
+    }
+  }
+  return count;
+}
 
+
 // vim: ts=2 sw=2 et
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciidatareader.h #1321750:1321751
@@ -44,6 +44,9 @@
     int readField(const AsciiFileData &buf, int col, double *v, const QString& field, int s, int n);
     int readFieldFromChunk(const AsciiFileData& chunk, int col, double *v, const QString& field);
 
+    template<typename ColumnDelimiter>
+    static int splitColumns(const QByteArray& line, const ColumnDelimiter& column_del, QStringList* cols = 0);
+
   private:
     int _numFrames;
     AsciiFileBuffer::RowIndex _rowIndex;
@@ -76,5 +79,9 @@
     mutable QMutex _localeMutex;
 };
 
+
+template<>
+int AsciiDataReader::splitColumns<AsciiCharacterTraits::IsWhiteSpace>(const QByteArray& line, const AsciiCharacterTraits::IsWhiteSpace& column_del, QStringList* cols);
+
 #endif
 // vim: ts=2 sw=2 et
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciisource.cpp #1321750:1321751
@@ -405,22 +405,33 @@
 
 
 //-------------------------------------------------------------------------------------------
-int AsciiSource::splitHeaderLine(const QByteArray& line, AsciiSourceConfig* cfg, QStringList& parts)
+int AsciiSource::splitHeaderLine(const QByteArray& line, const AsciiSourceConfig& cfg, QStringList* stringList)
 {
+  QStringList dummy;
+  QStringList& parts(stringList ? *stringList : dummy);
   parts.clear();
-  const QRegExp regexColumnDelimiter(QString("[%1]").arg(QRegExp::escape(cfg->_columnDelimiter.value())));
+  const QRegExp regexColumnDelimiter(QString("[%1]").arg(QRegExp::escape(cfg._columnDelimiter.value())));
   
-  if (cfg->_columnType == AsciiSourceConfig::Custom && !cfg->_columnDelimiter.value().isEmpty()) {
+  if (cfg._columnType == AsciiSourceConfig::Custom && !cfg._columnDelimiter.value().isEmpty()) {
     parts += QString(line).trimmed().split(regexColumnDelimiter, QString::SkipEmptyParts);
-  } else if (cfg->_columnType == AsciiSourceConfig::Fixed) {
-    int cnt = line.length() / cfg->_columnWidth;
+  } else if (cfg._columnType == AsciiSourceConfig::Fixed) {
+    int cnt = line.length() / cfg._columnWidth;
     for (int i = 0; i < cnt; ++i) {
-      QString sub = line.mid(i * cfg->_columnWidth).left(cfg->_columnWidth);
+      QString sub = line.mid(i * cfg._columnWidth).left(cfg._columnWidth);
       parts += sub.trimmed();
     }
   } else {
-    parts += QString(line).trimmed().split(QRegExp("\\s"), QString::SkipEmptyParts);
+    if (!stringList) {
+      //MeasureTime t("AsciiDataReader::countColumns()");
+      int columns = AsciiDataReader::splitColumns(line, AsciiCharacterTraits::IsWhiteSpace());
+      Q_ASSERT(columns == QString(line).trimmed().split(QRegExp("\\s"), QString::SkipEmptyParts).size());
+      return columns;
+    } else {
+      //MeasureTime t("AsciiDataReader::countColumns(parts)");
+      AsciiDataReader::splitColumns(line, AsciiCharacterTraits::IsWhiteSpace(), &parts);
+      Q_ASSERT(parts == QString(line).trimmed().split(QRegExp("\\s"), QString::SkipEmptyParts));
   }
+  }
   return parts.count();
 }
 
@@ -444,7 +455,7 @@
       int r = line.size();
       if (currentLine == fieldsLine && r >= 0) {
         QStringList parts;
-        AsciiSource::splitHeaderLine(line, cfg, parts);
+        AsciiSource::splitHeaderLine(line, *cfg, &parts);
         fields += parts;
         break;
       }
@@ -480,7 +491,6 @@
   int cnt;
   int nextscan = 0;
   int curscan = 0;
-  QStringList parts;
   while (!file.atEnd() && !done && (nextscan < 200)) {
     QByteArray line = file.readLine();
     int r = line.size();
@@ -494,7 +504,7 @@
     if (maxcnt >= 0) { //original skip value == 0, so scan some lines
       if (curscan >= nextscan) {
         if (r > 1 && !regex.exactMatch(line)) {
-          cnt = splitHeaderLine(line, cfg, parts);
+          cnt = splitHeaderLine(line, *cfg);
           if (cnt > maxcnt) {
             maxcnt = cnt;
           }
@@ -507,7 +517,7 @@
       continue;
     }
     if (r > 1 && !regex.exactMatch(line)) { //at desired line, find count
-      maxcnt = splitHeaderLine(line, cfg, parts);
+      maxcnt = splitHeaderLine(line, *cfg);
       done = true;
     } else if (r < 0) {
       return fields;
@@ -540,7 +550,7 @@
     int r = line.size();
     if (currentLine == unitsLine && r >= 0) {
       QStringList parts;
-      AsciiSource::splitHeaderLine(line, cfg, parts);
+      AsciiSource::splitHeaderLine(line, *cfg, &parts);
       units += parts;
       break;
     }
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciisource.h #1321750:1321751
@@ -92,7 +92,7 @@
     
 
     int columnOfField(const QString& field) const;
-    static int splitHeaderLine(const QByteArray& line, AsciiSourceConfig* cfg, QStringList& result);
+    static int splitHeaderLine(const QByteArray& line, const AsciiSourceConfig& cfg, QStringList* parts = 0);
 
     DataInterfaceAsciiString* is;
     DataInterfaceAsciiVector* iv;


More information about the Kst mailing list