[Kst] branches/work/kst/portto4/kst/src/datasources/ascii

Barth Netterfield netterfield at astro.utoronto.ca
Sun Jun 17 02:53:22 UTC 2012


SVN commit 1301214 by netterfield:

Ascii source optimization:
Don't re-open and re-read the file for every field.

This is about a 10x speedup for wide files.  Now all fields from files
with 10k + columns are easily readable (~3s on my laptop) in fixed with
per column mode.

It scales a little faster than n, but not as n^2.


 M  +19 -4     asciisource.cpp  
 M  +4 -0      asciisource.h  


--- branches/work/kst/portto4/kst/src/datasources/ascii/asciisource.cpp #1301213:1301214
@@ -187,6 +187,8 @@
 AsciiSource::AsciiSource(Kst::ObjectStore *store, QSettings *cfg, const QString& filename, const QString& type, const QDomElement& e) :
   Kst::DataSource(store, cfg, filename, type),  
   _tmpBuffer(),
+  _bufferedS(-10),
+  _bufferedN(-10),
   _rowIndex(),
   is(new DataInterfaceAsciiString(*this)),
   iv(new DataInterfaceAsciiVector(*this))
@@ -224,6 +226,10 @@
 //-------------------------------------------------------------------------------------------
 void AsciiSource::reset() 
 {
+  // forget about cached data
+  _bufferedN = -10;
+  _bufferedS = -10;
+
   _tmpBuffer.clear();
   _rowIndex.clear();
 
@@ -330,6 +336,10 @@
 {
   MeasureTime t("AsciiSource::internalDataSourceUpdate: " + _filename);
 
+  // forget about cached data
+  _bufferedN = -10;
+  _bufferedS = -10;
+
   if (!_haveHeader) {
     _haveHeader = initRowIndex();
     if (!_haveHeader) {
@@ -498,14 +508,19 @@
     return 0;
   }
 
+  if ((s != _bufferedS) || (n != _bufferedN)) {
   QFile file(_filename);
   if (!openValidFile(file)) {
     return 0;
   }
   
-  LineEndingType lineending = detectLineEndingType(file);
+    _lineending = detectLineEndingType(file);
 
+
   bufread = readFromFile(file, _tmpBuffer, bufstart, bufread);
+    _bufferedS = s;
+    _bufferedN = n;
+  }
 
 #ifdef KST_DONT_CHECK_INDEX_IN_DEBUG
   const char* buffer = _tmpBuffer.constData();
@@ -528,16 +543,16 @@
     if (_config._columnDelimiter.value().size() == 1) {
       MeasureTime t("AsciiSource::readField: 1 custom column delimiter");
       const IsCharacter column_del(_config._columnDelimiter.value()[0].toAscii());
-      return readColumns(v, buffer, bufstart, bufread, col, s, n, lineending, column_del);
+      return readColumns(v, buffer, bufstart, bufread, col, s, n, _lineending, column_del);
     } if (_config._columnDelimiter.value().size() > 1) {
       MeasureTime t(QString("AsciiSource::readField: %1 custom column delimiters").arg(_config._columnDelimiter.value().size()));
       const IsInString column_del(_config._columnDelimiter.value());
-      return readColumns(v, buffer, bufstart, bufread, col, s, n, lineending, column_del);
+      return readColumns(v, buffer, bufstart, bufread, col, s, n, _lineending, column_del);
     }
   } else if (_config._columnType == AsciiSourceConfig::Whitespace) {
     MeasureTime t("AsciiSource::readField: whitespace separated columns");
     const IsWhiteSpace column_del;
-    return readColumns(v, buffer, bufstart, bufread, col, s, n, lineending, column_del);
+    return readColumns(v, buffer, bufstart, bufread, col, s, n, _lineending, column_del);
   }
 
   return 0;
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciisource.h #1301213:1301214
@@ -82,6 +82,8 @@
     // TODO Is this too big or should we use even more: 1MB on the stack?
 #define KST_PREALLOC 1 * 1024 * 1024
     QVarLengthArray<char, KST_PREALLOC> _tmpBuffer;
+    int _bufferedS;
+    int _bufferedN;
     QVarLengthArray<int, KST_PREALLOC> _rowIndex;
 
     friend class ConfigWidgetAscii;
@@ -93,6 +95,7 @@
     bool _haveHeader;
     bool _fieldListComplete;
 
+
     QStringList _scalarList;
     QMap<QString, QString> _strings;
     QStringList _fieldList;
@@ -117,6 +120,7 @@
       bool isLF() const { return character == '\n'; }
     };
     LineEndingType detectLineEndingType(QFile& file) const;
+    LineEndingType _lineending;
 
     // column and comment delimiter functions
 


More information about the Kst mailing list