[Kst] branches/work/kst/portto4/kst/src/datasources/ascii

Peter Kümmel syntheticpp at gmx.net
Wed Jan 26 12:37:40 CET 2011


SVN commit 1217211 by kuemmel:

also read data with windows line endings

 M  +61 -23    asciisource.cpp  
 M  +27 -9     asciisource.h  


--- branches/work/kst/portto4/kst/src/datasources/ascii/asciisource.cpp #1217210:1217211
@@ -280,6 +280,19 @@
 
 
 //-------------------------------------------------------------------------------------------
+AsciiSource::LineEndingType AsciiSource::detectLineEndingType(QFile& file) const
+{
+  LineEndingType end;
+  QByteArray line = file.readLine();
+  file.seek(0);
+  int lsize = line.size();
+  end.is_crlf = line[lsize - 2] == '\r' && line[lsize - 1] == '\n' ;
+  end.character =  end.is_crlf ? line[lsize - 2] : line[lsize - 1];
+  return end;
+}
+
+
+//-------------------------------------------------------------------------------------------
 #define MAXBUFREADLEN KST_PREALLOC
 Kst::Object::UpdateType AsciiSource::internalDataSourceUpdate() 
 {
@@ -304,6 +317,8 @@
     return NoChange;
   }
 
+  LineEndingType lineending = detectLineEndingType(file);
+
   bool new_data = false;
   bool force_update = true;
   if (_byteLength == file.size()) {
@@ -328,16 +343,29 @@
     const char* bufferData = buffer.data();
 #endif
 
+
     if (_config._delimiters.value().size() == 0) {
       const NoDelimiter comment_del;
-      new_data = findDataRows(buffer, bufstart, bufread, comment_del);
+      if (lineending.isLF()) {
+        new_data = findDataRows(buffer, bufstart, bufread, IsLineBreakLF(lineending), comment_del);
+      } else {
+        new_data = findDataRows(buffer, bufstart, bufread, IsLineBreakCR(lineending), comment_del);
+      }
     } else if (_config._delimiters.value().size() == 1) {
       const IsCharacter comment_del(_config._delimiters.value()[0].toAscii());
-      new_data = findDataRows(buffer, bufstart, bufread, comment_del);
+      if (lineending.isLF()) {
+        new_data = findDataRows(buffer, bufstart, bufread, IsLineBreakLF(lineending), comment_del);
+      } else {
+        new_data = findDataRows(buffer, bufstart, bufread, IsLineBreakCR(lineending), comment_del);
+      }
     } else if (_config._delimiters.value().size() > 1) {
       const IsInString comment_del(_config._delimiters.value());
-      new_data = findDataRows(buffer, bufstart, bufread, comment_del);
+      if (lineending.isLF()) {
+        new_data = findDataRows(buffer, bufstart, bufread, IsLineBreakLF(lineending), comment_del);
+      } else {
+        new_data = findDataRows(buffer, bufstart, bufread, IsLineBreakCR(lineending), comment_del);
     }
+    }
 
   } while ((bufread == MAXBUFREADLEN));
 
@@ -347,17 +375,17 @@
 }
 
 
-template<typename CommentDelimiter>
-bool AsciiSource::findDataRows(const char* buffer, int bufstart, int bufread, const CommentDelimiter& comment_del)
+template<typename IsLineBreak, typename CommentDelimiter>
+bool AsciiSource::findDataRows(const char* buffer, int bufstart, int bufread, const IsLineBreak& isLineBreak, const CommentDelimiter& comment_del)
 {
-  const IsLineBreak isLineBreak;
   const IsWhiteSpace isWhiteSpace;
 
   bool new_data = false;
-
   bool is_data = false;
   bool is_comment = false;
 
+  const int row_offset = bufstart + isLineBreak.size;
+
   for (int i = 0; i < bufread; i++) {
       if (comment_del(buffer[i])) {
         is_comment = true;
@@ -369,7 +397,7 @@
           if (_numFrames >= _rowIndex.size()) {
             _rowIndex.resize(_rowIndex.size() + MAXBUFREADLEN);
           }
-          _rowIndex[_numFrames] = bufstart + i + 1;
+          _rowIndex[_numFrames] = row_offset + i;
           new_data = true;
         }
       } else if (!is_data && !isWhiteSpace(buffer[i]) && !comment_del(buffer[i])) {
@@ -430,6 +458,9 @@
   if (!openValidFile(file)) {
     return 0;
   }
+  
+  LineEndingType lineending = detectLineEndingType(file);
+
   bufread = readFromFile(file, _tmpBuffer, bufstart, bufread);
 
 #ifdef KST_DONT_CHECK_INDEX_IN_DEBUG
@@ -450,16 +481,16 @@
     if (_config._columnDelimiter.value().size() == 1) {
       MeasureTime t("character");
       const IsCharacter column_del(_config._columnDelimiter.value()[0].toAscii());
-      return readColumns(v, buffer, bufstart, bufread, col, s, n, column_del);
+      return readColumns(v, buffer, bufstart, bufread, col, s, n, lineending, column_del);
     } if (_config._columnDelimiter.value().size() > 1) {
       MeasureTime t("string");
       const IsInString column_del(_config._columnDelimiter.value());
-      return readColumns(v, buffer, bufstart, bufread, col, s, n, column_del);
+      return readColumns(v, buffer, bufstart, bufread, col, s, n, lineending, column_del);
     }
   } else if (_config._columnType == AsciiSourceConfig::Whitespace) {
     MeasureTime t("whitespace");
     const IsWhiteSpace column_del;
-    return readColumns(v, buffer, bufstart, bufread, col, s, n, column_del);
+    return readColumns(v, buffer, bufstart, bufread, col, s, n, lineending, column_del);
   }
 
   return 0;
@@ -469,18 +500,18 @@
 //-------------------------------------------------------------------------------------------
 template<typename ColumnDelimiter>
 int AsciiSource::readColumns(double* v, const char* buffer, int bufstart, int bufread, int col, int s, int n,
-                              const ColumnDelimiter& column_del)
+                              const LineEndingType& lineending, const ColumnDelimiter& column_del)
 {
 
   if (_config._delimiters.value().size() == 0) {
     const NoDelimiter comment_del;
-    return readColumns(v, buffer, bufstart, bufread, col, s, n, column_del, comment_del);
+    return readColumns(v, buffer, bufstart, bufread, col, s, n, lineending, column_del, comment_del);
   } else if (_config._delimiters.value().size() == 1) {
     const IsCharacter comment_del(_config._delimiters.value()[0].toAscii());
-    return readColumns(v, buffer, bufstart, bufread, col, s, n, column_del, comment_del);
+    return readColumns(v, buffer, bufstart, bufread, col, s, n, lineending, column_del, comment_del);
   } else if (_config._delimiters.value().size() > 1) {
     const IsInString comment_del(_config._delimiters.value());
-    return readColumns(v, buffer, bufstart, bufread, col, s, n, column_del, comment_del);
+    return readColumns(v, buffer, bufstart, bufread, col, s, n, lineending, column_del, comment_del);
   }
 
   return 0;
@@ -488,20 +519,29 @@
 
 template<typename ColumnDelimiter, typename CommentDelimiter>
 int AsciiSource::readColumns(double* v, const char* buffer, int bufstart, int bufread, int col, int s, int n,
-                              const ColumnDelimiter& column_del, const CommentDelimiter& comment_del)
+                              const LineEndingType& lineending, const ColumnDelimiter& column_del, const CommentDelimiter& comment_del)
 {
   if (_config._columnWidthIsConst) {
-    const AlwaysTrue column_withs_are_const;
-    return readColumns(v, buffer, bufstart, bufread, col, s, n, column_del, comment_del, column_withs_are_const);
+    const AlwaysTrue column_withs_const;
+    if (lineending.isLF()) {
+      return readColumns(v, buffer, bufstart, bufread, col, s, n, IsLineBreakLF(lineending), column_del, comment_del, column_withs_const);
   } else {
-    const AlwaysFalse column_withs_are_not_const;
-    return readColumns(v, buffer, bufstart, bufread, col, s, n, column_del, comment_del, column_withs_are_not_const);
+      return readColumns(v, buffer, bufstart, bufread, col, s, n, IsLineBreakCR(lineending), column_del, comment_del, column_withs_const);
   }
+  } else {
+    const AlwaysFalse column_withs_const;
+    if (lineending.isLF()) {
+      return readColumns(v, buffer, bufstart, bufread, col, s, n, IsLineBreakLF(lineending), column_del, comment_del, column_withs_const);
+    } else {
+      return readColumns(v, buffer, bufstart, bufread, col, s, n, IsLineBreakCR(lineending), column_del, comment_del, column_withs_const);
 }
+  }
+}
 
 
-template<typename ColumnDelimiter, typename CommentDelimiter, typename ColumnWidthsAreConst>
+template<typename IsLineBreak, typename ColumnDelimiter, typename CommentDelimiter, typename ColumnWidthsAreConst>
 int AsciiSource::readColumns(double* v, const char* buffer, int bufstart, int bufread, int col, int s, int n,
+                              const IsLineBreak& isLineBreak,
                               const ColumnDelimiter& column_del, const CommentDelimiter& comment_del,
                               const ColumnWidthsAreConst& are_column_widths_const)
 {
@@ -509,8 +549,6 @@
   lexc.setDecimalSeparator(_config._useDot, _config._localSeparator);
   const QString delimiters = _config._delimiters.value();
 
-  const IsLineBreak isLineBreak;
-
   int col_start = -1;
   for (int i = 0; i < n; i++, s++) {
     bool incol = false;
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciisource.h #1217210:1217211
@@ -99,9 +99,17 @@
 
     bool openValidFile(QFile &file);
     static bool openFile(QFile &file);
+
     template<class T>
     int readFromFile(QFile&, T& buffer, int start, int numberOfBytes, int maximalBytes = -1);
 
+    struct LineEndingType {
+      bool is_crlf;
+      char character;
+      bool isCR() const { return character == '\r'; }
+      bool isLF() const { return character == '\n'; }
+    };
+    LineEndingType detectLineEndingType(QFile& file) const;
 
     // column and comment delimiter functions
 
@@ -178,29 +186,39 @@
       }
     };
 
-    struct IsLineBreak {
-      IsLineBreak() {
+    struct IsLineBreakLF {
+      IsLineBreakLF(const LineEndingType& t) : size(1) {
       }
+      const int size;
       inline bool operator()(const char c) const {
-        return c == '\n' || c == '\r';
+        return c == '\n';
       }
     };
 
+    struct IsLineBreakCR {
+      IsLineBreakCR(const LineEndingType& t) : size( t.is_crlf ? 2 : 1 ) {
+      }
+      const int size;
+      inline bool operator()(const char c) const {
+        return c == '\r';
+      }
+    };
 
+
     template<typename ColumnDelimiter>
     int readColumns(double* v, const char* buffer, int bufstart, int bufread, int col, int s, int n,
-                    const ColumnDelimiter&);
+                    const LineEndingType&, const ColumnDelimiter&);
 
     template<typename ColumnDelimiter, typename CommentDelimiter>
     int readColumns(double* v, const char* buffer, int bufstart, int bufread, int col, int s, int n,
-                    const ColumnDelimiter&, const CommentDelimiter&);
+                    const LineEndingType&, const ColumnDelimiter&, const CommentDelimiter&);
 
-    template<typename ColumnDelimiter, typename CommentDelimiter, typename ColumnWidthsAreConst>
+    template<typename IsLineBreak, typename ColumnDelimiter, typename CommentDelimiter, typename ColumnWidthsAreConst>
     int readColumns(double* v, const char* buffer, int bufstart, int bufread, int col, int s, int n,
-                    const ColumnDelimiter&, const CommentDelimiter&, const ColumnWidthsAreConst&);
+                    const IsLineBreak&, const ColumnDelimiter&, const CommentDelimiter&, const ColumnWidthsAreConst&);
 
-    template<typename CommentDelimiter>
-    bool findDataRows(const char* buffer, int bufstart, int bufread, const CommentDelimiter&);
+    template<typename IsLineBreak, typename CommentDelimiter>
+    bool findDataRows(const char* buffer, int bufstart, int bufread, const IsLineBreak&, const CommentDelimiter&);
 
     void toDouble(const LexicalCast& lexc, const char* buffer, int bufread, int ch, double* v, int row);
 


More information about the Kst mailing list