[Kst] branches/work/kst/portto4/kst/src/datasources/ascii
Peter Kümmel
syntheticpp at gmx.net
Tue Oct 16 14:47:48 UTC 2012
SVN commit 1320704 by kuemmel:
read big files into multiple chunks
M +6 -5 asciidatareader.cpp
M +5 -4 asciidatareader.h
M +144 -20 asciifilebuffer.cpp
M +39 -5 asciifilebuffer.h
M +11 -2 asciisource.cpp
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciidatareader.cpp #1320703:1320704
@@ -20,6 +20,7 @@
#include "measuretime.h"
#include <QFile>
+#include <QDebug>
#include <ctype.h>
#include <stdlib.h>
@@ -114,13 +115,13 @@
detectLineEndingType(file);
bool new_data = false;
- AsciiFileBuffer buf;
+ AsciiFileData buf;
do {
// Read the tmpbuffer, starting at row_index[_numFrames]
buf.clear();
// always read from the start of a line
- buf.read(file, _rowIndex[_numFrames], _byteLength - buf.begin(), AsciiFileBuffer::Prealloc - 1);
+ buf.read(file, _rowIndex[_numFrames], _byteLength - buf.begin(), AsciiFileData::Prealloc - 1);
if (buf.bytesRead() == 0) {
return false;
}
@@ -147,7 +148,7 @@
new_data = findDataRows(buf.constData(), buf.begin(), buf.bytesRead(), IsLineBreakCR(_lineending), comment_del);
}
}
- } while (buf.bytesRead() == AsciiFileBuffer::Prealloc - 1 && read_completely);
+ } while (buf.bytesRead() == AsciiFileData::Prealloc - 1 && read_completely);
_rowIndex.resize(_numFrames + 1);
@@ -173,7 +174,7 @@
_rowIndex[_numFrames] = row_start;
++_numFrames;
if (_numFrames >= _rowIndex.size()) {
- _rowIndex.resize(_rowIndex.size() + AsciiFileBuffer::Prealloc - 1);
+ _rowIndex.resize(_rowIndex.size() + AsciiFileData::Prealloc - 1);
}
new_data = true;
row_start = row_offset+i;
@@ -191,7 +192,7 @@
}
//-------------------------------------------------------------------------------------------
-int AsciiDataReader::readField(const AsciiFileBuffer& buf, int col, double *v, const QString& field, int s, int n)
+int AsciiDataReader::readField(const AsciiFileData& buf, int col, double *v, const QString& field, int s, int n)
{
if (_config._columnType == AsciiSourceConfig::Fixed) {
MeasureTime t("AsciiSource::readField: same width for all columns");
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciidatareader.h #1320703:1320704
@@ -29,21 +29,22 @@
AsciiDataReader(AsciiSourceConfig& config);
~AsciiDataReader();
- typedef QVarLengthArray<int, AsciiFileBuffer::Prealloc> RowIndex;
-
void clear();
void setRow0Begin(int begin);
inline int beginOfRow(int row) const { return _rowIndex[row]; }
inline int numberOfFrames() const { return _numFrames; }
+ // where
+ const AsciiFileBuffer::RowIndex& rowIndex() const { return _rowIndex; }
+
void detectLineEndingType(QFile& file);
bool findDataRows(bool read_completely, QFile& file, int _byteLength);
- int readField(const AsciiFileBuffer &buf, int col, double *v, const QString& field, int s, int n);
+ int readField(const AsciiFileData &buf, int col, double *v, const QString& field, int s, int n);
private:
int _numFrames;
- RowIndex _rowIndex;
+ AsciiFileBuffer::RowIndex _rowIndex;
AsciiSourceConfig& _config;
AsciiCharacterTraits::LineEndingType _lineending;
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciifilebuffer.cpp #1320703:1320704
@@ -26,21 +26,21 @@
#undef qFree
#include "asciifilebuffer.h"
-
#include "debug.h"
#include <QFile>
#include <QDebug>
-#include <QMap>
+
static int MB = 1024*1024;
// Simulate out of memory scenario
//#define KST_TEST_OOM
+
#ifdef KST_TEST_OOM
-static const size_t maxMB = 50;
+static size_t maxAllocate = 2 * MB;
#else
-static const size_t maxMB = 0;
+static size_t maxAllocate = (size_t) -1;
#endif
#define KST_MEMORY_DEBUG if(1)
@@ -57,25 +57,26 @@
it.next();
sum += it.value();
}
- Kst::Debug::self()->log(QString("AsciiFileBuffer: %1 MB used").arg(sum/MB), Kst::Debug::Warning);
- KST_MEMORY_DEBUG qDebug() << "AsciiFileBuffer: " << sum/MB<< "MB used";
+ Kst::Debug::self()->log(QString("AsciiFileData: %1 MB used").arg(sum / MB), Kst::Debug::Warning);
+ KST_MEMORY_DEBUG qDebug() << "AsciiFileData: " << sum / MB<< "MB used";
}
//-------------------------------------------------------------------------------------------
void* fileBufferMalloc(size_t bytes)
{
void* ptr = 0;
- if (maxMB == 0 || bytes < maxMB*MB) {
+#ifdef KST_TEST_OOM
+ if (bytes <= maxAllocate)
+#endif
ptr = malloc(bytes);
- }
if (ptr) {
allocatedMBs[ptr] = bytes;
KST_MEMORY_DEBUG qDebug() << "AsciiFileBuffer: " << bytes/MB << "MB allocated";
KST_MEMORY_DEBUG logMemoryUsed();
} else {
- Kst::Debug::self()->log(QString("AsciiFileBuffer: failed to allocate %1 MBs").arg(bytes/MB), Kst::Debug::Warning);
+ Kst::Debug::self()->log(QString("AsciiFileData: failed to allocate %1 MBs").arg(bytes / MB), Kst::Debug::Warning);
logMemoryUsed();
- KST_MEMORY_DEBUG qDebug() << "AsciiFileBuffer: error when allocating " << bytes/MB << "MB";
+ KST_MEMORY_DEBUG qDebug() << "AsciiFileData: error when allocating " << bytes / MB << "MB";
}
return ptr;
}
@@ -84,7 +85,7 @@
void fileBufferFree(void* ptr)
{
if (allocatedMBs.contains(ptr)) {
- KST_MEMORY_DEBUG qDebug() << "AsciiFileBuffer: " << allocatedMBs[ptr]/MB << "MB freed";
+ KST_MEMORY_DEBUG qDebug() << "AsciiFileData: " << allocatedMBs[ptr] / MB << "MB freed";
allocatedMBs.remove(ptr);
}
KST_MEMORY_DEBUG logMemoryUsed();
@@ -92,31 +93,29 @@
}
//-------------------------------------------------------------------------------------------
-AsciiFileBuffer::AsciiFileBuffer() : _array(new Array), _begin(-1), _bytesRead(0)
+AsciiFileData::AsciiFileData() : _array(new Array), _begin(-1), _bytesRead(0), _rowBegin(-1), _rowsRead(0)
{
}
//-------------------------------------------------------------------------------------------
-AsciiFileBuffer::~AsciiFileBuffer()
+AsciiFileData::~AsciiFileData()
{
- delete _array;
}
-
//-------------------------------------------------------------------------------------------
-char* AsciiFileBuffer::data()
+char* AsciiFileData::data()
{
return _array->data();
}
//-------------------------------------------------------------------------------------------
-const char* const AsciiFileBuffer::constPointer() const
+const char* const AsciiFileData::constPointer() const
{
return _array->data();
}
//-------------------------------------------------------------------------------------------
-bool AsciiFileBuffer::resize(int bytes)
+bool AsciiFileData::resize(int bytes)
{
try {
_array->resize(bytes);
@@ -129,7 +128,7 @@
}
//-------------------------------------------------------------------------------------------
-void AsciiFileBuffer::clear(bool forceDeletingArray)
+void AsciiFileData::clear(bool forceDeletingArray)
{
// force deletion of heap allocated memory if any
if (forceDeletingArray || _array->capacity() > Prealloc) {
@@ -141,11 +140,20 @@
}
//-------------------------------------------------------------------------------------------
-void AsciiFileBuffer::read(QFile& file, int start, int bytesToRead, int maximalBytes)
+void AsciiFileData::release()
{
+ delete _array;
+ _array = 0;
_begin = -1;
_bytesRead = 0;
+}
+//-------------------------------------------------------------------------------------------
+void AsciiFileData::read(QFile& file, int start, int bytesToRead, int maximalBytes)
+{
+ _begin = -1;
+ _bytesRead = 0;
+
if (bytesToRead <= 0)
return;
@@ -167,3 +175,119 @@
_bytesRead = bytesRead;
}
+
+//-------------------------------------------------------------------------------------------
+AsciiFileBuffer::AsciiFileBuffer()
+{
+}
+
+//-------------------------------------------------------------------------------------------
+AsciiFileBuffer::~AsciiFileBuffer()
+{
+ clear();
+}
+
+//-------------------------------------------------------------------------------------------
+void AsciiFileBuffer::clear(bool forceDeletingArray)
+{
+ foreach (AsciiFileData chunk, _fileData) {
+ chunk.release();
+ }
+ _fileData.clear();
+}
+
+//-------------------------------------------------------------------------------------------
+const QVector<AsciiFileData>& AsciiFileBuffer::data() const
+{
+ return _fileData;
+}
+
+//-------------------------------------------------------------------------------------------
+void AsciiFileBuffer::logData() const
+{
+ int i = 0;
+ foreach (const AsciiFileData& chunk, _fileData) {
+ qDebug() << "_fileData: " << i << ". " << chunk.rowBegin() << " ... " << chunk.rowBegin() + chunk.rowsRead();
+ i++;
+ }
+}
+
+//-------------------------------------------------------------------------------------------
+static int findRowOfPosition(const AsciiFileBuffer::RowIndex& rowIndex, int searchStart, int pos)
+{
+ //TODO too expensive
+ const int size = rowIndex.size();
+ for (int row = searchStart; row != size; row++) {
+ if (rowIndex[row] > pos)
+ return row - 1;
+ }
+ // must be the last row
+ return size - 1;
+}
+
+//-------------------------------------------------------------------------------------------
+void AsciiFileBuffer::read(QFile& file, const RowIndex& rowIndex, int start, int bytesToRead, int maximalBytes)
+{
+ _begin = -1;
+ _bytesRead = 0;
+ _fileData.clear();
+
+ // first try to read the whole file into one array
+ AsciiFileData wholeFile;
+ wholeFile.read(file, start, bytesToRead, maximalBytes);
+ if (bytesToRead == wholeFile.bytesRead()) {
+ wholeFile.setRowBegin(0);
+ wholeFile.setRowsRead(rowIndex.size());
+ _begin = start;
+ _bytesRead = bytesToRead;
+ _fileData << wholeFile;
+ return;
+ } else {
+ wholeFile.release();
+ }
+
+
+ // reading whole file into one array failed, try to read into smaller arrays
+ int chunkSize = qMin((size_t) 10 * MB, maxAllocate);
+ int end = start + bytesToRead;
+ int chunkRead = 0;
+ int row = 0;
+ for (int pos = start; pos < end; pos += chunkRead) {
+ AsciiFileData chunk;
+ // remember first row index
+ chunk.setRowBegin(row);
+ // read complete chunk or to end of file
+ chunkRead = (pos + chunkSize < end ? chunkSize : end - pos);
+ // adjust to row end: pos + chunkRead is in the middle of a row, find index of this row
+ row = findRowOfPosition(rowIndex, row, pos + chunkRead);
+ // read until the beginning of this row
+ chunkRead = (rowIndex[row] - 1);
+ // check if it is the last row, and read remaining bytes from pos
+ chunkRead = (row == rowIndex.size() - 1) ? end - pos : chunkRead - pos;
+ // read the rows
+ chunk.read(file, pos, chunkRead);
+ if (chunkRead != chunk.bytesRead()) {
+ Kst::Debug::self()->log(QString("AsciiFileBuffer: error when reading into chunk"));
+ chunk.release();
+ break;
+ }
+ // remember number of read rows
+ chunk.setRowsRead(row - chunk.rowBegin());
+ _fileData << chunk;
+ _bytesRead += chunk.bytesRead();
+ }
+ if (_bytesRead == bytesToRead) {
+ _begin = start;
+ return;
+ } else {
+ _bytesRead = 0;
+ _fileData.clear();
+ Kst::Debug::self()->log(QString("AsciiFileBuffer: error while reading %1 chunks").arg(_fileData.size()));
+ }
+
+ // sliding window
+ // TODO
+}
+
+
+
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciifilebuffer.h #1320703:1320704
@@ -13,12 +13,14 @@
#ifndef ASCII_FILE_BUFFER_H
#define ASCII_FILE_BUFFER_H
+#include <QVector>
+
template<class T, int Prealloc>
class QVarLengthArray;
class QFile;
-class AsciiFileBuffer
+class AsciiFileData
{
public:
@@ -34,14 +36,12 @@
typedef QVarLengthArray<char, Prealloc> Array;
- AsciiFileBuffer();
- ~AsciiFileBuffer();
+ AsciiFileData();
+ ~AsciiFileData();
inline int begin() const { return _begin; }
inline int bytesRead() const { return _bytesRead; }
-
void read(QFile&, int start, int numberOfBytes, int maximalBytes = -1);
-
char* data();
const char* const constPointer() const;
@@ -49,13 +49,47 @@
bool resize(int size);
void clear(bool forceDeletingArray = false);
+ void release();
+ inline int rowBegin() const { return _rowBegin; }
+ inline int rowsRead() const { return _rowsRead; }
+ inline void setRowBegin(int begin) { _rowBegin = begin; }
+ inline void setRowsRead(int read) { _rowsRead = read; }
+
private:
Array* _array;
int _begin;
int _bytesRead;
+ int _rowBegin;
+ int _rowsRead;
};
+Q_DECLARE_TYPEINFO(AsciiFileData, Q_MOVABLE_TYPE);
+
+class AsciiFileBuffer
+{
+public:
+ AsciiFileBuffer();
+ ~AsciiFileBuffer();
+
+ typedef QVarLengthArray<int, AsciiFileData::Prealloc> RowIndex;
+
+ inline int begin() const { return _begin; }
+ inline int bytesRead() const { return _bytesRead; }
+
+ void clear(bool forceDeletingArray = false);
+
+ void read(QFile&, const RowIndex& rowIndex, int start, int numberOfBytes, int maximalBytes = -1);
+
+ const QVector<AsciiFileData>& data() const;
+
+private:
+ QVector<AsciiFileData> _fileData;
+ int _begin;
+ int _bytesRead;
+ void logData() const;
+};
+
#endif
// vim: ts=2 sw=2 et
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciisource.cpp #1320703:1320704
@@ -242,6 +242,7 @@
// reading whole file into memory failed
+ /*
// find a smaller allocatable size
_fileBuffer.clear();
int realloc_size = n / 4;
@@ -271,6 +272,8 @@
// don't buffer partial files
_fileBuffer.clear();
return n_read;
+ */
+ return 0;
}
@@ -302,7 +305,7 @@
if (!openValidFile(file)) {
return 0;
}
- _fileBuffer.read(file, begin, bytesToRead);
+ _fileBuffer.read(file, _reader.rowIndex(), begin, bytesToRead);
if (_fileBuffer.bytesRead() == 0) {
success = false;
return 0;
@@ -310,10 +313,16 @@
_reader.detectLineEndingType(file);
}
- return _reader.readField(_fileBuffer, col, v, field, s, n);
+ int sRead = 0;
+ const QVector<AsciiFileData> data = _fileBuffer.data();
+ foreach (const AsciiFileData& chunk, data) {
+ sRead += _reader.readField(chunk, col, v + sRead, field, chunk.rowBegin(), chunk.rowsRead());
}
+ return sRead;
+}
+
//-------------------------------------------------------------------------------------------
QString AsciiSource::fileType() const
{
More information about the Kst
mailing list