patch: keditbookmarks html import

Simon Hausmann hausmann at kde.org
Sat Sep 21 11:57:51 BST 2002


Hi,

Please take a quick look at

        http://bugs.kde.org/show_bug.cgi?id=46962

The bug is about " not getting resolved when importing html
bookmarks. 

While looking into fixing it I noticed that the code resolving of
amp, lt and gt is hardcoded. I took the two-step approach and wrote
a method that generally resolves entities in a string using
KCharsets and made the bookmark code use that. See attached patch.

Ok to commit?

Simon

P.S.: I think the resolveEntities method is generally useful and
might be worth as addition to KCharsets. But that's for later :)
-------------- next part --------------
Index: kbookmarkimporter.cc
===================================================================
RCS file: /home/kde/kdelibs/kio/bookmarks/kbookmarkimporter.cc,v
retrieving revision 1.16
diff -u -p -r1.16 kbookmarkimporter.cc
--- kbookmarkimporter.cc	2002/03/09 00:53:27	1.16
+++ kbookmarkimporter.cc	2002/09/21 10:47:22
@@ -21,12 +21,14 @@
 #include <kstringhandler.h>
 #include <klocale.h>
 #include <kdebug.h>
+#include <kcharsets.h>
 #include <qtextcodec.h>
 
 #include <sys/types.h>
 #include <stddef.h>
 #include <dirent.h>
 #include <sys/stat.h>
+#include <assert.h>
 
 ////////////////////
 
@@ -150,9 +152,6 @@ void KBookmarkImporter::parseBookmark( Q
 void KNSBookmarkImporter::parseNSBookmarks( bool utf8 )
 {
     QFile f(m_fileName);
-    QRegExp amp("&");
-    QRegExp lt("<");
-    QRegExp gt(">");
     QTextCodec * codec = utf8 ? QTextCodec::codecForName("UTF-8") : QTextCodec::codecForLocale();
     Q_ASSERT(codec);
     if (!codec)
@@ -184,10 +183,10 @@ void KNSBookmarkImporter::parseNSBookmar
                 name = name.left(name.findRev('<'));
                 if ( name.right(4) == "</A>" )
                     name = name.left( name.length() - 4 );
-                name.replace( amp, "&" ).replace( lt, "<" ).replace( gt, ">" );
+                QString qname = resolveEntities( codec->toUnicode( name ) );
                 QCString additionnalInfo = t.mid( secondQuotes+1, endTag-secondQuotes-1 );
 
-                emit newBookmark( KStringHandler::csqueeze(codec->toUnicode(name)),
+                emit newBookmark( KStringHandler::csqueeze(qname),
                                   link, codec->toUnicode(additionnalInfo) );
               }
             }
@@ -195,12 +194,12 @@ void KNSBookmarkImporter::parseNSBookmar
                 int endTag = t.find('>', 7);
                 QCString name = t.mid(endTag+1);
                 name = name.left(name.findRev('<'));
-                name.replace( amp, "&" ).replace( lt, "<" ).replace( gt, ">" );
+                QString qname = resolveEntities( codec->toUnicode( name ) );
                 QCString additionnalInfo = t.mid( 8, endTag-8 );
                 bool folded = (additionnalInfo.left(6) == "FOLDED");
                 if (folded) additionnalInfo.remove(0,7);
 
-                emit newFolder( KStringHandler::csqueeze(codec->toUnicode(name)),
+                emit newFolder( KStringHandler::csqueeze(qname),
                                 !folded,
                                 codec->toUnicode(additionnalInfo) );
             }
@@ -228,6 +227,52 @@ QString KNSBookmarkImporter::mozillaBook
     else
         return KFileDialog::getOpenFileName( QDir::homeDirPath() + "/.mozilla",
                                              i18n("*.html|HTML files (*.html)") );
+}
+
+QString KNSBookmarkImporter::resolveEntities( const QString &input )
+{
+    QString text = input;
+    const QChar *p = text.unicode();
+    const QChar *end = p + text.length();
+    const QChar *ampersand = 0;
+    bool scanForSemicolon = false;
+
+    for ( ; p < end; ++p ) {
+        QChar ch = *p;
+
+        if ( ch == '&' ) {
+            ampersand = p;
+            scanForSemicolon = true;
+            continue;
+        }
+
+        if ( ch != ';' || scanForSemicolon == false )
+            continue;
+
+        assert( ampersand );
+
+        scanForSemicolon = false;
+
+        const QChar *entityBegin = ampersand + 1;
+
+        uint entityLength = p - entityBegin;
+        if ( entityLength == 0 )
+            continue;
+
+        QChar entityValue = KGlobal::charsets()->fromEntity( QConstString( entityBegin, entityLength ).string() );
+        if ( entityValue.isNull() )
+            continue;
+
+        uint ampersandPos = ( entityBegin - 1 ) - text.unicode();
+
+        text[ ampersandPos ] = entityValue;
+        text.remove( ampersandPos + 1, entityLength + 1 );
+        p = text.unicode() + ampersandPos;
+        end = text.unicode() + text.length();
+        ampersand = 0;
+    }
+
+    return text;
 }
 
 #include "kbookmarkimporter.moc"
Index: kbookmarkimporter.h
===================================================================
RCS file: /home/kde/kdelibs/kio/bookmarks/kbookmarkimporter.h,v
retrieving revision 1.9
diff -u -p -r1.9 kbookmarkimporter.h
--- kbookmarkimporter.h	2002/03/09 00:53:27	1.9
+++ kbookmarkimporter.h	2002/09/21 10:47:22
@@ -93,6 +93,9 @@ signals:
 
 protected:
     QString m_fileName;
+
+private:
+    QString resolveEntities( const QString &input );
 };
 
 #endif


More information about the kfm-devel mailing list