[Kst] branches/work/kst/portto4/kst/src/datasources/ascii

Peter Kümmel syntheticpp at gmx.net
Tue Oct 16 14:47:53 UTC 2012


SVN commit 1320706 by kuemmel:

add lazyRead and splitFile function

 M  +38 -24    asciifilebuffer.cpp  
 M  +2 -0      asciifilebuffer.h  
 M  +27 -1     asciifiledata.cpp  
 M  +10 -0     asciifiledata.h  


--- branches/work/kst/portto4/kst/src/datasources/ascii/asciifilebuffer.cpp #1320705:1320706
@@ -51,10 +51,8 @@
 //-------------------------------------------------------------------------------------------
 void AsciiFileBuffer::logData() const
 {
-  int i = 0;
   foreach (const AsciiFileData& chunk, _fileData) {
-    qDebug() << "_fileData: " << i << ". " << chunk.rowBegin() << " ... " << chunk.rowBegin() + chunk.rowsRead();
-    i++;
+    chunk.logData();
   }
 }
 
@@ -72,6 +70,38 @@
 }
 
 //-------------------------------------------------------------------------------------------
+const QVector<AsciiFileData> AsciiFileBuffer::splitFile(int chunkSize, const RowIndex& rowIndex, int start, int bytesToRead) const
+{
+  // reading whole file into one array failed, try to read into smaller arrays
+  const int end = start + bytesToRead;
+  int chunkRead = 0;
+  int lastRow = 0;
+  QVector<AsciiFileData> chunks;
+  for (int pos = start; pos < end; pos += chunkRead) {
+    // use for storing reading information only
+    AsciiFileData chunk;
+    // read complete chunk or to end of file
+    chunkRead = (pos + chunkSize < end ? chunkSize : end - pos);
+    // adjust to row end: pos + chunkRead is in the middle of a row, find index of this row
+    const int rowBegin = lastRow;
+    lastRow = findRowOfPosition(rowIndex, lastRow, pos + chunkRead);
+    // read until the beginning of this row
+    chunkRead = (rowIndex[lastRow] - 1);
+    // check if it is the last row, and read remaining bytes from pos
+    chunkRead = (lastRow == rowIndex.size() - 1) ? end - pos : chunkRead - pos;
+    // set information about positions in the file
+    chunk.setBegin(pos);
+    chunk.setBytesRead(chunkRead);
+    // set information about rows
+    chunk.setRowBegin(rowBegin);
+    chunk.setRowsRead(lastRow - rowBegin);
+    chunks << chunk;
+  }
+  //qDebug() << "File splitted into " << chunks.size() << " chunks:"; logData();
+  return chunks;
+}
+
+//-------------------------------------------------------------------------------------------
 void AsciiFileBuffer::read(QFile& file, const RowIndex& rowIndex, int start, int bytesToRead, int maximalBytes)
 {
   _begin = -1;
@@ -94,31 +124,15 @@
 
   // reading whole file into one array failed, try to read into smaller arrays
   int chunkSize = qMin((size_t) 10 * MB, maxAllocate);
-  int end = start + bytesToRead;
-  int chunkRead = 0;
-  int row = 0;
-  for (int pos = start; pos < end; pos += chunkRead) {
-    AsciiFileData chunk;
-    // remember first row index
-    chunk.setRowBegin(row);
-    // read complete chunk or to end of file
-    chunkRead = (pos + chunkSize < end ? chunkSize : end - pos);  
-    // adjust to row end: pos + chunkRead is in the middle of a row, find index of this row
-    row = findRowOfPosition(rowIndex, row, pos + chunkRead);
-    // read until the beginning of this row
-    chunkRead = (rowIndex[row] - 1);
-    // check if it is the last row, and read remaining bytes from pos
-    chunkRead = (row == rowIndex.size() - 1) ? end - pos : chunkRead - pos;
-    // read the rows
-    chunk.read(file, pos, chunkRead);
-    if (chunkRead != chunk.bytesRead()) {
+  _fileData = splitFile(chunkSize, rowIndex, start, bytesToRead);
+  _bytesRead = 0;
+  foreach (AsciiFileData chunk, _fileData) {
+    // use alread set
+    if (!chunk.lazyRead(file)) {
       Kst::Debug::self()->log(QString("AsciiFileBuffer: error when reading into chunk"));
       chunk.release();
       break;
     }
-    // remember number of read rows
-    chunk.setRowsRead(row - chunk.rowBegin());
-    _fileData << chunk;
     _bytesRead += chunk.bytesRead();
   }
   if (_bytesRead == bytesToRead) {
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciifilebuffer.h #1320705:1320706
@@ -40,6 +40,8 @@
   int _begin;
   int _bytesRead;
   void logData() const;
+
+  const QVector<AsciiFileData> splitFile(int chunkSize, const RowIndex& rowIndex, int start, int bytesToRead) const;
 };
 
 #endif
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciifiledata.cpp #1320705:1320706
@@ -30,6 +30,7 @@
 
 #include <QFile>
 #include <QDebug>
+#include <QByteArray>
 
 
 int MB = 1024*1024;
@@ -93,7 +94,7 @@
 }
 
 //-------------------------------------------------------------------------------------------
-AsciiFileData::AsciiFileData() : _array(new Array), _begin(-1), _bytesRead(0), _rowBegin(-1), _rowsRead(0)
+AsciiFileData::AsciiFileData() : _array(new Array), _lazyRead(false), _begin(-1), _bytesRead(0), _rowBegin(-1), _rowsRead(0)
 {
 }
 
@@ -175,3 +176,28 @@
   _bytesRead = bytesRead;
 }
 
+//-------------------------------------------------------------------------------------------
+bool AsciiFileData::lazyRead(QFile& file)
+{
+  int start = _begin;
+  int bytesToRead = _bytesRead;
+  read(file, start, bytesToRead);
+  if (begin() != start || bytesRead() != bytesToRead) {
+    clear(true);
+    return false;
+  }
+  return true;
+}
+
+//-------------------------------------------------------------------------------------------
+void AsciiFileData::logData() const
+{
+  QString This = QString::fromLatin1(QByteArray((const char*)this, sizeof(AsciiFileData*)).toHex()).toUpper();
+  QString array = QString::fromLatin1(QByteArray((const char*)_array, sizeof(Array*)).toHex()).toUpper();
+  qDebug() << QString("%1 array %2, byte %3 ... %4, row %5 ... %6")
+    .arg(This).arg(array)
+    .arg(begin(), 8).arg(begin() + bytesRead(), 8)
+    .arg(rowBegin(), 8).arg(rowBegin() + rowsRead(), 8);
+}
+
+
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciifiledata.h #1320705:1320706
@@ -39,9 +39,16 @@
   AsciiFileData();
   ~AsciiFileData();
 
+  inline bool lazyRead() const { return _lazyRead; }
+  inline void setLazyRead(bool value) { _lazyRead = value; }
+
   inline int begin() const { return _begin; }
   inline int bytesRead() const { return _bytesRead; }
+  inline void setBegin(int begin) { _begin = begin; }
+  inline void setBytesRead(int read) { _bytesRead = read; }
+
   void read(QFile&, int start, int numberOfBytes, int maximalBytes = -1);
+  bool lazyRead(QFile&);
   char* data();
 
   const char* const constPointer() const;
@@ -56,8 +63,11 @@
   inline void setRowBegin(int begin) { _rowBegin = begin; }
   inline void setRowsRead(int read) { _rowsRead = read; }
 
+  void logData() const;
+
 private:
   Array* _array;
+  bool _lazyRead;
   int _begin;
   int _bytesRead;
   int _rowBegin;


More information about the Kst mailing list