[Kst] branches/work/kst/portto4/kst/src/datasources/ascii
Peter Kümmel
syntheticpp at gmx.net
Wed Jan 26 12:37:40 CET 2011
SVN commit 1217211 by kuemmel:
also read data with windows line endings
M +61 -23 asciisource.cpp
M +27 -9 asciisource.h
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciisource.cpp #1217210:1217211
@@ -280,6 +280,19 @@
//-------------------------------------------------------------------------------------------
+AsciiSource::LineEndingType AsciiSource::detectLineEndingType(QFile& file) const
+{
+ LineEndingType end;
+ QByteArray line = file.readLine();
+ file.seek(0);
+ int lsize = line.size();
+ end.is_crlf = line[lsize - 2] == '\r' && line[lsize - 1] == '\n' ;
+ end.character = end.is_crlf ? line[lsize - 2] : line[lsize - 1];
+ return end;
+}
+
+
+//-------------------------------------------------------------------------------------------
#define MAXBUFREADLEN KST_PREALLOC
Kst::Object::UpdateType AsciiSource::internalDataSourceUpdate()
{
@@ -304,6 +317,8 @@
return NoChange;
}
+ LineEndingType lineending = detectLineEndingType(file);
+
bool new_data = false;
bool force_update = true;
if (_byteLength == file.size()) {
@@ -328,16 +343,29 @@
const char* bufferData = buffer.data();
#endif
+
if (_config._delimiters.value().size() == 0) {
const NoDelimiter comment_del;
- new_data = findDataRows(buffer, bufstart, bufread, comment_del);
+ if (lineending.isLF()) {
+ new_data = findDataRows(buffer, bufstart, bufread, IsLineBreakLF(lineending), comment_del);
+ } else {
+ new_data = findDataRows(buffer, bufstart, bufread, IsLineBreakCR(lineending), comment_del);
+ }
} else if (_config._delimiters.value().size() == 1) {
const IsCharacter comment_del(_config._delimiters.value()[0].toAscii());
- new_data = findDataRows(buffer, bufstart, bufread, comment_del);
+ if (lineending.isLF()) {
+ new_data = findDataRows(buffer, bufstart, bufread, IsLineBreakLF(lineending), comment_del);
+ } else {
+ new_data = findDataRows(buffer, bufstart, bufread, IsLineBreakCR(lineending), comment_del);
+ }
} else if (_config._delimiters.value().size() > 1) {
const IsInString comment_del(_config._delimiters.value());
- new_data = findDataRows(buffer, bufstart, bufread, comment_del);
+ if (lineending.isLF()) {
+ new_data = findDataRows(buffer, bufstart, bufread, IsLineBreakLF(lineending), comment_del);
+ } else {
+ new_data = findDataRows(buffer, bufstart, bufread, IsLineBreakCR(lineending), comment_del);
}
+ }
} while ((bufread == MAXBUFREADLEN));
@@ -347,17 +375,17 @@
}
-template<typename CommentDelimiter>
-bool AsciiSource::findDataRows(const char* buffer, int bufstart, int bufread, const CommentDelimiter& comment_del)
+template<typename IsLineBreak, typename CommentDelimiter>
+bool AsciiSource::findDataRows(const char* buffer, int bufstart, int bufread, const IsLineBreak& isLineBreak, const CommentDelimiter& comment_del)
{
- const IsLineBreak isLineBreak;
const IsWhiteSpace isWhiteSpace;
bool new_data = false;
-
bool is_data = false;
bool is_comment = false;
+ const int row_offset = bufstart + isLineBreak.size;
+
for (int i = 0; i < bufread; i++) {
if (comment_del(buffer[i])) {
is_comment = true;
@@ -369,7 +397,7 @@
if (_numFrames >= _rowIndex.size()) {
_rowIndex.resize(_rowIndex.size() + MAXBUFREADLEN);
}
- _rowIndex[_numFrames] = bufstart + i + 1;
+ _rowIndex[_numFrames] = row_offset + i;
new_data = true;
}
} else if (!is_data && !isWhiteSpace(buffer[i]) && !comment_del(buffer[i])) {
@@ -430,6 +458,9 @@
if (!openValidFile(file)) {
return 0;
}
+
+ LineEndingType lineending = detectLineEndingType(file);
+
bufread = readFromFile(file, _tmpBuffer, bufstart, bufread);
#ifdef KST_DONT_CHECK_INDEX_IN_DEBUG
@@ -450,16 +481,16 @@
if (_config._columnDelimiter.value().size() == 1) {
MeasureTime t("character");
const IsCharacter column_del(_config._columnDelimiter.value()[0].toAscii());
- return readColumns(v, buffer, bufstart, bufread, col, s, n, column_del);
+ return readColumns(v, buffer, bufstart, bufread, col, s, n, lineending, column_del);
} if (_config._columnDelimiter.value().size() > 1) {
MeasureTime t("string");
const IsInString column_del(_config._columnDelimiter.value());
- return readColumns(v, buffer, bufstart, bufread, col, s, n, column_del);
+ return readColumns(v, buffer, bufstart, bufread, col, s, n, lineending, column_del);
}
} else if (_config._columnType == AsciiSourceConfig::Whitespace) {
MeasureTime t("whitespace");
const IsWhiteSpace column_del;
- return readColumns(v, buffer, bufstart, bufread, col, s, n, column_del);
+ return readColumns(v, buffer, bufstart, bufread, col, s, n, lineending, column_del);
}
return 0;
@@ -469,18 +500,18 @@
//-------------------------------------------------------------------------------------------
template<typename ColumnDelimiter>
int AsciiSource::readColumns(double* v, const char* buffer, int bufstart, int bufread, int col, int s, int n,
- const ColumnDelimiter& column_del)
+ const LineEndingType& lineending, const ColumnDelimiter& column_del)
{
if (_config._delimiters.value().size() == 0) {
const NoDelimiter comment_del;
- return readColumns(v, buffer, bufstart, bufread, col, s, n, column_del, comment_del);
+ return readColumns(v, buffer, bufstart, bufread, col, s, n, lineending, column_del, comment_del);
} else if (_config._delimiters.value().size() == 1) {
const IsCharacter comment_del(_config._delimiters.value()[0].toAscii());
- return readColumns(v, buffer, bufstart, bufread, col, s, n, column_del, comment_del);
+ return readColumns(v, buffer, bufstart, bufread, col, s, n, lineending, column_del, comment_del);
} else if (_config._delimiters.value().size() > 1) {
const IsInString comment_del(_config._delimiters.value());
- return readColumns(v, buffer, bufstart, bufread, col, s, n, column_del, comment_del);
+ return readColumns(v, buffer, bufstart, bufread, col, s, n, lineending, column_del, comment_del);
}
return 0;
@@ -488,20 +519,29 @@
template<typename ColumnDelimiter, typename CommentDelimiter>
int AsciiSource::readColumns(double* v, const char* buffer, int bufstart, int bufread, int col, int s, int n,
- const ColumnDelimiter& column_del, const CommentDelimiter& comment_del)
+ const LineEndingType& lineending, const ColumnDelimiter& column_del, const CommentDelimiter& comment_del)
{
if (_config._columnWidthIsConst) {
- const AlwaysTrue column_withs_are_const;
- return readColumns(v, buffer, bufstart, bufread, col, s, n, column_del, comment_del, column_withs_are_const);
+ const AlwaysTrue column_withs_const;
+ if (lineending.isLF()) {
+ return readColumns(v, buffer, bufstart, bufread, col, s, n, IsLineBreakLF(lineending), column_del, comment_del, column_withs_const);
} else {
- const AlwaysFalse column_withs_are_not_const;
- return readColumns(v, buffer, bufstart, bufread, col, s, n, column_del, comment_del, column_withs_are_not_const);
+ return readColumns(v, buffer, bufstart, bufread, col, s, n, IsLineBreakCR(lineending), column_del, comment_del, column_withs_const);
}
+ } else {
+ const AlwaysFalse column_withs_const;
+ if (lineending.isLF()) {
+ return readColumns(v, buffer, bufstart, bufread, col, s, n, IsLineBreakLF(lineending), column_del, comment_del, column_withs_const);
+ } else {
+ return readColumns(v, buffer, bufstart, bufread, col, s, n, IsLineBreakCR(lineending), column_del, comment_del, column_withs_const);
}
+ }
+}
-template<typename ColumnDelimiter, typename CommentDelimiter, typename ColumnWidthsAreConst>
+template<typename IsLineBreak, typename ColumnDelimiter, typename CommentDelimiter, typename ColumnWidthsAreConst>
int AsciiSource::readColumns(double* v, const char* buffer, int bufstart, int bufread, int col, int s, int n,
+ const IsLineBreak& isLineBreak,
const ColumnDelimiter& column_del, const CommentDelimiter& comment_del,
const ColumnWidthsAreConst& are_column_widths_const)
{
@@ -509,8 +549,6 @@
lexc.setDecimalSeparator(_config._useDot, _config._localSeparator);
const QString delimiters = _config._delimiters.value();
- const IsLineBreak isLineBreak;
-
int col_start = -1;
for (int i = 0; i < n; i++, s++) {
bool incol = false;
--- branches/work/kst/portto4/kst/src/datasources/ascii/asciisource.h #1217210:1217211
@@ -99,9 +99,17 @@
bool openValidFile(QFile &file);
static bool openFile(QFile &file);
+
template<class T>
int readFromFile(QFile&, T& buffer, int start, int numberOfBytes, int maximalBytes = -1);
+ struct LineEndingType {
+ bool is_crlf;
+ char character;
+ bool isCR() const { return character == '\r'; }
+ bool isLF() const { return character == '\n'; }
+ };
+ LineEndingType detectLineEndingType(QFile& file) const;
// column and comment delimiter functions
@@ -178,29 +186,39 @@
}
};
- struct IsLineBreak {
- IsLineBreak() {
+ struct IsLineBreakLF {
+ IsLineBreakLF(const LineEndingType& t) : size(1) {
}
+ const int size;
inline bool operator()(const char c) const {
- return c == '\n' || c == '\r';
+ return c == '\n';
}
};
+ struct IsLineBreakCR {
+ IsLineBreakCR(const LineEndingType& t) : size( t.is_crlf ? 2 : 1 ) {
+ }
+ const int size;
+ inline bool operator()(const char c) const {
+ return c == '\r';
+ }
+ };
+
template<typename ColumnDelimiter>
int readColumns(double* v, const char* buffer, int bufstart, int bufread, int col, int s, int n,
- const ColumnDelimiter&);
+ const LineEndingType&, const ColumnDelimiter&);
template<typename ColumnDelimiter, typename CommentDelimiter>
int readColumns(double* v, const char* buffer, int bufstart, int bufread, int col, int s, int n,
- const ColumnDelimiter&, const CommentDelimiter&);
+ const LineEndingType&, const ColumnDelimiter&, const CommentDelimiter&);
- template<typename ColumnDelimiter, typename CommentDelimiter, typename ColumnWidthsAreConst>
+ template<typename IsLineBreak, typename ColumnDelimiter, typename CommentDelimiter, typename ColumnWidthsAreConst>
int readColumns(double* v, const char* buffer, int bufstart, int bufread, int col, int s, int n,
- const ColumnDelimiter&, const CommentDelimiter&, const ColumnWidthsAreConst&);
+ const IsLineBreak&, const ColumnDelimiter&, const CommentDelimiter&, const ColumnWidthsAreConst&);
- template<typename CommentDelimiter>
- bool findDataRows(const char* buffer, int bufstart, int bufread, const CommentDelimiter&);
+ template<typename IsLineBreak, typename CommentDelimiter>
+ bool findDataRows(const char* buffer, int bufstart, int bufread, const IsLineBreak&, const CommentDelimiter&);
void toDouble(const LexicalCast& lexc, const char* buffer, int bufread, int ch, double* v, int row);
More information about the Kst
mailing list