[Kst] branches/work/kst/portto4/kst/src

Peter Kümmel syntheticpp at gmx.net
Fri Jan 21 18:17:32 CET 2011


SVN commit 1216165 by kuemmel:

Some ascii reading optimization:

Here some measurments: gyrodata c&p to 8 million rows

Delimiters (comment, column)      : time in readColumns
-----------------------------------------------------------
No comment, custom: one space     : 1.69102 seconds
No comment, whitespace            : 2.11194 seconds
default comment, custom: one space: 4.01525 seconds (default comment with 4 characters)
default comment, whitespace       : 4.34039 seconds
default comment, custom ,;|       : 6.49383 seconds (space at the end of 4 characters)





 M  +19 -3     datasources/ascii/asciisource.cpp  
 M  +22 -1     datasources/ascii/asciisource.h  
 M  +2 -2      libkst/measuretime.cpp  


--- branches/work/kst/portto4/kst/src/datasources/ascii/asciisource.cpp #1216164:1216165
@@ -432,13 +432,16 @@
     return n;
   } else if (_config._columnType == AsciiSourceConfig::Custom) {
     if (_config._columnDelimiter.value().size() == 1) {
+      //MeasureTime t("character");
       _columnDelimiterCharacter = _config._columnDelimiter.value()[0].toAscii();
       return readColumns(v, buffer, bufstart, bufread, col, s, n, &AsciiSource::isColumnDelimiter);
     } if (_config._columnDelimiter.value().size() > 1) {
+      //MeasureTime t("string");
       _columnDelimiterString = _config._columnDelimiter.value();
       return readColumns(v, buffer, bufstart, bufread, col, s, n, &AsciiSource::isInColumnDelimiterString);
     }
   } else if (_config._columnType == AsciiSourceConfig::Whitespace) {
+    //MeasureTime t("whitespace");
     return readColumns(v, buffer, bufstart, bufread, col, s, n, &AsciiSource::isWhiteSpace);
   }
 
@@ -447,11 +450,24 @@
 
 
 //-------------------------------------------------------------------------------------------
-int AsciiSource::readColumns(double* v, const char* buffer, int bufstart, int bufread, int col, int s, int n, bool (AsciiSource::*isColumnDelemiterFunction)(char))
+int AsciiSource::readColumns(double* v, const char* buffer, int bufstart, int bufread, int col, int s, int n, DelimiterFunction columnDelemiterFunction)
 {
   LexicalCast lexc;
   lexc.setDecimalSeparator(_config._useDot, _config._localSeparator);
   const QString delimiters = _config._delimiters.value();
+
+  DelimiterFunction commentDelemiterFunction;
+
+  if (_config._delimiters.value().size() == 0) {
+    commentDelemiterFunction = &AsciiSource::noCommentDelimiter;
+  } else if (_config._delimiters.value().size() == 1) {
+    _commentDelimiterCharacter = _config._delimiters.value()[0].toAscii();
+    commentDelemiterFunction = &AsciiSource::isCommentDelimiter;
+  } else if (_config._delimiters.value().size() > 1) {
+    _commentDelimiterString = _config._delimiters.value();
+    commentDelemiterFunction = &AsciiSource::isInCommentDelimiterString;
+  }
+
   for (int i = 0; i < n; i++, s++) {
     bool incol = false;
     int i_col = 0;
@@ -461,9 +477,9 @@
     for (ch = _rowIndex[s] - bufstart; ch < bufread; ++ch) {
       if (buffer[ch] == '\n' || buffer[ch] == '\r') {
         break;
-      } else if ((this->*isColumnDelemiterFunction)(buffer[ch])) { //<- check for column start
+      } else if ((this->*columnDelemiterFunction)(buffer[ch])) { //<- check for column start
         incol = false;
-      } else if (delimiters.contains(buffer[ch])) {
+      } else if ((this->*commentDelemiterFunction)(buffer[ch])) {
         break;
       } else {
         if (!incol) {
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciisource.h #1216164:1216165
@@ -102,6 +102,9 @@
     template<class T>
     int readFromFile(QFile&, T& buffer, int start, int numberOfBytes, int maximalBytes = -1);
 
+
+    // column delimiter functions
+
     inline bool isWhiteSpace(char c) { 
       return isspace((unsigned char)c); 
     }
@@ -116,7 +119,25 @@
       return _columnDelimiterString.contains(c);
     }
 
-    int readColumns(double* v, const char* buffer, int bufstart, int bufread, int col, int s, int n, bool (AsciiSource::*isColumnDelemiterFunction)(char));
+
+    // comment delimiter functions
+
+    inline bool noCommentDelimiter(char) {
+      return false;
+    }
+    char _commentDelimiterCharacter;
+    inline bool isCommentDelimiter(char c) {
+      return _commentDelimiterCharacter == c;
+    }
+
+    QString _commentDelimiterString;
+    inline bool isInCommentDelimiterString(char c) {
+      return _commentDelimiterString.contains(c);
+    }
+
+    typedef bool (AsciiSource::*DelimiterFunction)(char);
+
+    int readColumns(double* v, const char* buffer, int bufstart, int bufread, int col, int s, int n, DelimiterFunction);
     void toDouble(const LexicalCast& lexc, const char* buffer, int bufread, int ch, double* v, int row);
 
     // TODO remove
--- branches/work/kst/portto4/kst/src/libkst/measuretime.cpp #1216164:1216165
@@ -12,13 +12,13 @@
 
 #include "measuretime.h"
 
- 
 #ifdef Q_OS_WIN
 #include <windows.h>
 #else
 #include <time.h>
 #endif
 
+#include <iostream>
  
 MeasureTime::MeasureTime(const QString& n) :
     started(0),
@@ -101,7 +101,7 @@
 void MeasureTime::print()
 {
   measure();
-  qDebug("%s: %f sec", qPrintable(name), interval);
+  std::cout << qPrintable(name) << ": " << interval << " seconds\n";
 }
 
 


More information about the Kst mailing list