[Kst] branches/work/kst/portto4/kst/src/datasources/ascii
Barth Netterfield
netterfield at astro.utoronto.ca
Sun Jun 17 02:53:22 UTC 2012
SVN commit 1301214 by netterfield:
Ascii source optimization:
Don't re-open and re-read the file for every field.
This is about a 10x speedup for wide files. Now all fields from files
with 10k + columns are easily readable (~3s on my laptop) in fixed with
per column mode.
It scales a little faster than n, but not as n^2.
M +19 -4 asciisource.cpp
M +4 -0 asciisource.h
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciisource.cpp #1301213:1301214
@@ -187,6 +187,8 @@
AsciiSource::AsciiSource(Kst::ObjectStore *store, QSettings *cfg, const QString& filename, const QString& type, const QDomElement& e) :
Kst::DataSource(store, cfg, filename, type),
_tmpBuffer(),
+ _bufferedS(-10),
+ _bufferedN(-10),
_rowIndex(),
is(new DataInterfaceAsciiString(*this)),
iv(new DataInterfaceAsciiVector(*this))
@@ -224,6 +226,10 @@
//-------------------------------------------------------------------------------------------
void AsciiSource::reset()
{
+ // forget about cached data
+ _bufferedN = -10;
+ _bufferedS = -10;
+
_tmpBuffer.clear();
_rowIndex.clear();
@@ -330,6 +336,10 @@
{
MeasureTime t("AsciiSource::internalDataSourceUpdate: " + _filename);
+ // forget about cached data
+ _bufferedN = -10;
+ _bufferedS = -10;
+
if (!_haveHeader) {
_haveHeader = initRowIndex();
if (!_haveHeader) {
@@ -498,14 +508,19 @@
return 0;
}
+ if ((s != _bufferedS) || (n != _bufferedN)) {
QFile file(_filename);
if (!openValidFile(file)) {
return 0;
}
- LineEndingType lineending = detectLineEndingType(file);
+ _lineending = detectLineEndingType(file);
+
bufread = readFromFile(file, _tmpBuffer, bufstart, bufread);
+ _bufferedS = s;
+ _bufferedN = n;
+ }
#ifdef KST_DONT_CHECK_INDEX_IN_DEBUG
const char* buffer = _tmpBuffer.constData();
@@ -528,16 +543,16 @@
if (_config._columnDelimiter.value().size() == 1) {
MeasureTime t("AsciiSource::readField: 1 custom column delimiter");
const IsCharacter column_del(_config._columnDelimiter.value()[0].toAscii());
- return readColumns(v, buffer, bufstart, bufread, col, s, n, lineending, column_del);
+ return readColumns(v, buffer, bufstart, bufread, col, s, n, _lineending, column_del);
} if (_config._columnDelimiter.value().size() > 1) {
MeasureTime t(QString("AsciiSource::readField: %1 custom column delimiters").arg(_config._columnDelimiter.value().size()));
const IsInString column_del(_config._columnDelimiter.value());
- return readColumns(v, buffer, bufstart, bufread, col, s, n, lineending, column_del);
+ return readColumns(v, buffer, bufstart, bufread, col, s, n, _lineending, column_del);
}
} else if (_config._columnType == AsciiSourceConfig::Whitespace) {
MeasureTime t("AsciiSource::readField: whitespace separated columns");
const IsWhiteSpace column_del;
- return readColumns(v, buffer, bufstart, bufread, col, s, n, lineending, column_del);
+ return readColumns(v, buffer, bufstart, bufread, col, s, n, _lineending, column_del);
}
return 0;
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciisource.h #1301213:1301214
@@ -82,6 +82,8 @@
// TODO Is this too big or should we use even more: 1MB on the stack?
#define KST_PREALLOC 1 * 1024 * 1024
QVarLengthArray<char, KST_PREALLOC> _tmpBuffer;
+ int _bufferedS;
+ int _bufferedN;
QVarLengthArray<int, KST_PREALLOC> _rowIndex;
friend class ConfigWidgetAscii;
@@ -93,6 +95,7 @@
bool _haveHeader;
bool _fieldListComplete;
+
QStringList _scalarList;
QMap<QString, QString> _strings;
QStringList _fieldList;
@@ -117,6 +120,7 @@
bool isLF() const { return character == '\n'; }
};
LineEndingType detectLineEndingType(QFile& file) const;
+ LineEndingType _lineending;
// column and comment delimiter functions
More information about the Kst
mailing list