[Kst] branches/work/kst/portto4/kst/src/datasources/ascii

Peter Kümmel syntheticpp at gmx.net
Tue Jan 25 17:39:41 CET 2011


SVN commit 1217076 by kuemmel:

optimize internalDataSourceUpdate:
mingw build: before 17.6s, after 2.1s (# as comment delimiter)

 M  +40 -35    asciisource.cpp  
 M  +4 -3      asciisource.h  


--- branches/work/kst/portto4/kst/src/datasources/ascii/asciisource.cpp #1217075:1217076
@@ -280,7 +280,7 @@
 
 
 //-------------------------------------------------------------------------------------------
-#define MAXBUFREADLEN 32768
+#define MAXBUFREADLEN KST_PREALLOC
 Kst::Object::UpdateType AsciiSource::internalDataSourceUpdate() 
 {
   MeasureTime t("internalDataSourceUpdate");
@@ -304,25 +304,19 @@
     return NoChange;
   }
 
-  bool forceUpdate;
+  bool new_data = false;
+  bool force_update = true;
   if (_byteLength == file.size()) {
-    forceUpdate = false;
-  } else {
-    forceUpdate = true;
+    force_update = false;
+  }
     _byteLength = file.size();
-  }
 
-  int bufread;
-  bool new_data = false;
-  //bool first_read = (_numFrames == 0);
-
-  QByteArray delbytes = _config._delimiters.value().toLatin1();
-  const char *del = delbytes.constData();
-
+  int bufread = 0;
   do {
     // Read the tmpbuffer, starting at row_index[_numFrames]
     QVarLengthArray<char, MAXBUFREADLEN + 1> varBuffer;
     varBuffer.resize(varBuffer.capacity());
+
     int bufstart = _rowIndex[_numFrames];
     bufread = readFromFile(file, varBuffer, bufstart, _byteLength - bufstart, MAXBUFREADLEN);    
 
@@ -334,45 +328,56 @@
     const char* bufferData = buffer.data();
 #endif
 
-    findDataRows(buffer, bufstart, bufread, del, new_data);
+    if (_config._delimiters.value().size() == 0) {
+      const NoDelimiter comment_del;
+      new_data = findDataRows(buffer, bufstart, bufread, comment_del);
+    } else if (_config._delimiters.value().size() == 1) {
+      const IsCharacter comment_del(_config._delimiters.value()[0].toAscii());
+      new_data = findDataRows(buffer, bufstart, bufread, comment_del);
+    } else if (_config._delimiters.value().size() > 1) {
+      const IsInString comment_del(_config._delimiters.value());
+      new_data = findDataRows(buffer, bufstart, bufread, comment_del);
+    }
     
-  } while ((bufread == MAXBUFREADLEN)); // && (!first_read));
+  } while ((bufread == MAXBUFREADLEN));
 
-  return (forceUpdate ? Updated : (new_data ? Updated : NoChange));
+  _rowIndex.resize(_numFrames);
+
+  return (!new_data && !force_update ? NoChange : Updated);
 }
 
 
-int AsciiSource::findDataRows(const char* buffer, int bufstart, int bufread, const char *del, bool& new_data)
+template<typename CommentDelimiter>
+bool AsciiSource::findDataRows(const char* buffer, int bufstart, int bufread, const CommentDelimiter& comment_del)
 {
-  bool is_comment = false, has_dat = false;
-  char *comment = strpbrk(const_cast<char*>(buffer), del);
+  const IsLineBreak isLineBreak;
+  const IsWhiteSpace isWhiteSpace;
+
+  bool new_data = false;
+
+  bool is_data = false;
+  bool is_comment = false;
+
   for (int i = 0; i < bufread; i++) {
-      if (comment == &(buffer[i])) {
+      if (comment_del(buffer[i])) {
         is_comment = true;
-      } else if (buffer[i] == '\n' || buffer[i] == '\r') {
-        if (has_dat) {
+      } else if (isLineBreak(buffer[i])) {
+        is_comment = false;
+        if (is_data) {
+          is_data = false;
           ++_numFrames;
           if (_numFrames >= _rowIndex.size()) {
             _rowIndex.resize(_rowIndex.size() + MAXBUFREADLEN);
-            if (_numFrames >= _rowIndex.size()) {
-              // TODO where could we report an error;
-              return NoChange;
             }
-          }
+          _rowIndex[_numFrames] = bufstart + i + 1;
           new_data = true;
         }
-        _rowIndex[_numFrames] = bufstart + i + 1;
-        has_dat = is_comment = false;
-        if (comment && comment < &(buffer[i])) {
-          comment = strpbrk(const_cast<char*>(&(buffer[i])), del);
+      } else if (!is_data && !isWhiteSpace(buffer[i]) && !comment_del(buffer[i])) {
+        is_data = is_comment ? false : true;
         }
-      } else if (!is_comment && !isspace((unsigned char)buffer[i])) {
-        // FIXME: this breaks custom delimiters
-        has_dat = true;
       }
+  return new_data;
   }
-  return 0;
-}
 
 
 //-------------------------------------------------------------------------------------------
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciisource.h #1217075:1217076
@@ -77,7 +77,7 @@
     // TODO Is this too big or should we use even more: 1MB on the stack?
 #define KST_PREALLOC 1 * 1024 * 1024
     QVarLengthArray<char, KST_PREALLOC> _tmpBuffer;
-    QVarLengthArray<int, KST_PREALLOC / 4> _rowIndex;
+    QVarLengthArray<int, KST_PREALLOC> _rowIndex;
 
     friend class ConfigWidgetAscii;
     mutable AsciiSourceConfig _config;
@@ -103,8 +103,6 @@
     int readFromFile(QFile&, T& buffer, int start, int numberOfBytes, int maximalBytes = -1);
 
 
-    int findDataRows(const char* buffer, int bufstart, int bufread, const char *del, bool& new_data);
-
     // column and comment delimiter functions
 
     struct AlwaysTrue {
@@ -178,6 +176,9 @@
     int readColumns(double* v, const char* buffer, int bufstart, int bufread, int col, int s, int n,
                     const ColumnDelimiter&, const CommentDelimiter&, const ColumnWidthsAreConst&);
 
+    template<typename CommentDelimiter>
+    bool findDataRows(const char* buffer, int bufstart, int bufread, const CommentDelimiter&);
+
     void toDouble(const LexicalCast& lexc, const char* buffer, int bufread, int ch, double* v, int row);
 
     // TODO remove


More information about the Kst mailing list