[office/tellico] /: Update MultiFetcher to chain data sources in order

Robby Stephenson null at kde.org
Mon Feb 15 16:37:45 GMT 2021


Git commit 646b960b2c410bb6f6e9877d8d9a4caa1694047e by Robby Stephenson.
Committed on 15/02/2021 at 16:37.
Pushed by rstephenson into branch 'master'.

Update MultiFetcher to chain data sources in order

Every result from the first data source is taken as input to an update
request for each subsequent data source. Previously, each data source
was queried independently with results showing up in any order.

M  +4    -0    ChangeLog
M  +2    -2    doc/configuration.docbook
M  +1    -0    src/fetch/fetcher.cpp
M  +2    -0    src/fetch/fetchmanager.h
M  +71   -37   src/fetch/multifetcher.cpp
M  +3    -1    src/fetch/multifetcher.h
M  +14   -0    src/tests/CMakeLists.txt
M  +3    -3    src/tests/data/cat_mods.spec
A  +104  -0    src/tests/multifetchertest.cpp     [License: GPL (v2/3)]
A  +41   -0    src/tests/multifetchertest.h     [License: GPL (v2/3)]

https://invent.kde.org/office/tellico/commit/646b960b2c410bb6f6e9877d8d9a4caa1694047e

diff --git a/ChangeLog b/ChangeLog
index f5d7e9e7..9e1f2c50 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2021-02-15  Robby Stephenson  <robby at periapsis.org>
+
+	* Improved MultiFetcher to chain data source searches in order.
+
 2021-02-07  Robby Stephenson  <robby at periapsis.org>
 
 	* Added UPCItemDB.com data source.
diff --git a/doc/configuration.docbook b/doc/configuration.docbook
index 800d38d2..d850ce69 100644
--- a/doc/configuration.docbook
+++ b/doc/configuration.docbook
@@ -536,7 +536,7 @@ For updating entries already in the collection, the final check box and edit box
 <sect2 id="multiple-sources">
 <title>Multiple Combined Data Sources</title>
 <para>
-Combinations of up to eight existing data sources can be used as a single source, where the search results from all the sources are merged. The collection type to be used must be set before adding sources.
+Combinations of up to eight existing data sources can be used as a single source, where each search result from the first source is updated from the subsequent sources. The collection type to be used must be set before adding sources.
 </para>
 
 <screenshot>
@@ -546,7 +546,7 @@ Combinations of up to eight existing data sources can be used as a single source
 </screenshot>
 
 <para>
-Only existing data sources can be used in combination. The search request is sent to each source, and the results are combined. Since the merged results depend on the order of the search results, the combined set may be different depending on network and source speed.
+Only existing data sources can be used in combination. Only the search type for the first source can be used in this source since the results come from the first data source. For example, a UPCitemDb search may first be done, with each result then updated from the TheMovieDB.
 </para>
 </sect2>
 
diff --git a/src/fetch/fetcher.cpp b/src/fetch/fetcher.cpp
index 94281e78..02b213b6 100644
--- a/src/fetch/fetcher.cpp
+++ b/src/fetch/fetcher.cpp
@@ -76,6 +76,7 @@ const Tellico::Fetch::FetchRequest& Fetcher::request() const {
 void Fetcher::startSearch(const FetchRequest& request_) {
   m_request = request_;
   if(!canFetch(m_request.collectionType)) {
+    myDebug() << "Bad collection type:" << source() << m_request.collectionType;
     message(i18n("%1 does not allow searching for this collection type.", source()),
             MessageHandler::Warning);
     emit signalDone(this);
diff --git a/src/fetch/fetchmanager.h b/src/fetch/fetchmanager.h
index eba3aba3..3710cf76 100644
--- a/src/fetch/fetchmanager.h
+++ b/src/fetch/fetchmanager.h
@@ -36,6 +36,7 @@
 #include <QPixmap>
 
 class QUrl;
+class MultiFetcherTest;
 
 namespace Tellico {
   namespace Fetch {
@@ -121,6 +122,7 @@ private Q_SLOTS:
 private:
   friend class ManagerMessage;
   friend class FetcherInitializer;
+  friend class ::MultiFetcherTest;
   static Manager* s_self;
 
   Manager();
diff --git a/src/fetch/multifetcher.cpp b/src/fetch/multifetcher.cpp
index cf540433..d410ff53 100644
--- a/src/fetch/multifetcher.cpp
+++ b/src/fetch/multifetcher.cpp
@@ -40,7 +40,7 @@ using namespace Tellico;
 using Tellico::Fetch::MultiFetcher;
 
 MultiFetcher::MultiFetcher(QObject* parent_)
-    : Fetcher(parent_), m_collType(0), m_started(false) {
+    : Fetcher(parent_), m_collType(0), m_fetcherIndex(0), m_started(false) {
 }
 
 MultiFetcher::~MultiFetcher() {
@@ -55,8 +55,8 @@ bool MultiFetcher::canFetch(int type) const {
 }
 
 bool MultiFetcher::canSearch(FetchKey k) const {
-  return k == Title ||
-         (k == ISBN && m_collType == Data::Collection::Book);
+  // can fetch anything supported by the first data source
+  return m_fetchers.isEmpty() || m_fetchers.front()->canSearch(k);;
 }
 
 void MultiFetcher::readConfigHook(const KConfigGroup& config_) {
@@ -71,11 +71,15 @@ void MultiFetcher::readSources() const {
   foreach(const QString& uuid, m_uuids) {
     Fetcher::Ptr fetcher = Manager::self()->fetcherByUuid(uuid);
     if(fetcher) {
+//      myDebug() << "Adding fetcher:" << fetcher->source();
       m_fetchers.append(fetcher);
-      connect(fetcher.data(), &Fetcher::signalResultFound,
-              this, &MultiFetcher::slotResult);
-      connect(fetcher.data(), &Fetcher::signalDone,
-              this, &MultiFetcher::slotDone);
+      // in the event we have multiple instances of same fetcher, only need to connect once
+      if(m_fetchers.count(fetcher) == 1) {
+        connect(fetcher.data(), &Fetcher::signalResultFound,
+                this, &MultiFetcher::slotResult);
+        connect(fetcher.data(), &Fetcher::signalDone,
+                this, &MultiFetcher::slotDone);
+      }
     }
   }
 }
@@ -83,17 +87,14 @@ void MultiFetcher::readSources() const {
 void MultiFetcher::search() {
   m_started = true;
   readSources();
-  foreach(Fetcher::Ptr fetcher, m_fetchers) {
-    fetcher->startSearch(request());
-  }
-}
-
-void MultiFetcher::continueSearch() {
-  m_started = true;
-  readSources();
-  foreach(Fetcher::Ptr fetcher, m_fetchers) {
-    fetcher->continueSearch();
+  if(m_fetchers.isEmpty()) {
+//    myDebug() << source() << "has no sources";
+    slotDone();
+    return;
   }
+  m_matches.clear();
+//  myDebug() << "Starting" << m_fetchers.front()->source();
+  m_fetchers.front()->startSearch(request());
 }
 
 void MultiFetcher::stop() {
@@ -109,30 +110,64 @@ void MultiFetcher::stop() {
 
 void MultiFetcher::slotResult(Tellico::Fetch::FetchResult* result) {
   Data::EntryPtr newEntry = result->fetchEntry();
-  // first check if we've already received this entry result from another fetcher
-  bool alreadyFound = false;
-  foreach(Data::EntryPtr entry, m_entries) {
-    if(entry->collection()->sameEntry(entry, newEntry) > EntryComparison::ENTRY_GOOD_MATCH) {
-      // same entry, so instead of adding a new result, just merge it
-      Data::Document::mergeEntry(entry, newEntry);
-      alreadyFound = true;
-      break;
-    }
-  }
-
-  if(!alreadyFound) {
+  // if fetcher index is 0, this is the first set of results, save them all
+  if(m_fetcherIndex == 0) {
+//    myDebug() << "...found new result:" << newEntry->title();
     m_entries.append(newEntry);
+    return;
   }
+
+  // otherwise, keep the entry to compare later
+  m_matches.append(newEntry);
 }
 
 void MultiFetcher::slotDone() {
-  //keep it simple, if a little slow
-  // iterate over all fetchers, if they are still running, do not emit done
-  foreach(Fetcher::Ptr fetcher, m_fetchers) {
-    if(fetcher->isSearching())  {
-      return;
+  if(m_fetcherIndex == 0) {
+    m_fetcherIndex++;
+    m_resultIndex = 0;
+  } else {
+    // iterate over all the matches from this data source and figure out which one is the best match to the existing result
+    Data::EntryPtr entry = m_entries.at(m_resultIndex);
+    int bestScore = -1;
+    int bestIndex = -1;
+    for(int idx = 0; idx < m_matches.count(); ++idx) {
+      auto match = m_matches.at(idx);
+      const int score = entry->collection()->sameEntry(entry, match);
+      if(score > bestScore) {
+        bestScore = score;
+        bestIndex = idx;
+      }
+      if(score > EntryComparison::ENTRY_PERFECT_MATCH) {
+        // no need to compare further
+        break;
+      }
+    }
+//    myDebug() << "best score" << bestScore  << "; index:" << bestIndex;
+    if(bestIndex > -1 && bestScore > EntryComparison::ENTRY_GOOD_MATCH) {
+      auto newEntry = m_matches.at(bestIndex);
+//      myDebug() << "...merging from" << newEntry->title() << "into" << entry->title();
+      Data::Document::mergeEntry(entry, newEntry);
+    } else {
+//      myDebug() << "___no match for" << entry->title();
     }
+
+    // now, bump to next result and continue trying to match
+    m_resultIndex++;
+    if(m_resultIndex >= m_entries.count()) {
+      m_fetcherIndex++;
+      m_resultIndex = 0;
+    }
+  }
+
+  if(m_fetcherIndex < m_fetchers.count() && m_resultIndex < m_entries.count()) {
+    Fetcher::Ptr fetcher = m_fetchers.at(m_fetcherIndex);
+    Q_ASSERT(fetcher);
+//    myDebug() << "updating entry#" << m_resultIndex << "from" << fetcher->source();
+    fetcher->startUpdate(m_entries.at(m_resultIndex));
+    return;
   }
+
+  // at this point, all the fetchers have run through all the results, so we're
   // done so emit all results
   foreach(Data::EntryPtr entry, m_entries) {
     FetchResult* r = new FetchResult(Fetcher::Ptr(this), entry);
@@ -152,12 +187,11 @@ Tellico::Data::EntryPtr MultiFetcher::fetchEntryHook(uint uid_) {
 }
 
 Tellico::Fetch::FetchRequest MultiFetcher::updateRequest(Data::EntryPtr entry_) {
-//  myDebug();
-  QString isbn = entry_->field(QStringLiteral("isbn"));
+  const QString isbn = entry_->field(QStringLiteral("isbn"));
   if(!isbn.isEmpty()) {
     return FetchRequest(ISBN, isbn);
   }
-  QString title = entry_->field(QStringLiteral("title"));
+  const QString title = entry_->field(QStringLiteral("title"));
   if(!title.isEmpty()) {
     return FetchRequest(Title, title);
   }
diff --git a/src/fetch/multifetcher.h b/src/fetch/multifetcher.h
index ca658d33..c7495739 100644
--- a/src/fetch/multifetcher.h
+++ b/src/fetch/multifetcher.h
@@ -61,7 +61,6 @@ public:
    */
   virtual QString source() const Q_DECL_OVERRIDE;
   virtual bool isSearching() const Q_DECL_OVERRIDE { return m_started; }
-  virtual void continueSearch() Q_DECL_OVERRIDE;
   virtual bool canSearch(FetchKey k) const Q_DECL_OVERRIDE;
   virtual void stop() Q_DECL_OVERRIDE;
   virtual Data::EntryPtr fetchEntryHook(uint uid) Q_DECL_OVERRIDE;
@@ -93,10 +92,13 @@ private:
   void readSources() const;
 
   Data::EntryList m_entries;
+  Data::EntryList m_matches;
   QHash<uint, Data::EntryPtr> m_entryHash;
   int m_collType;
   QStringList m_uuids;
   mutable QList<Fetcher::Ptr> m_fetchers;
+  int m_fetcherIndex;
+  int m_resultIndex;
 
   bool m_started;
 };
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 57793661..b0c38ee0 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -794,6 +794,20 @@ add_test(mrlookupfetchertest mrlookupfetchertest)
 ecm_mark_as_test(mrlookupfetchertest)
 TARGET_LINK_LIBRARIES(mrlookupfetchertest fetcherstest ${TELLICO_BTPARSE_LIBS} ${TELLICO_TEST_LIBS})
 
+add_executable(multifetchertest multifetchertest.cpp abstractfetchertest.cpp
+  ../fetch/multifetcher.cpp
+  ../fetch/execexternalfetcher.cpp
+  ../fetch/messagelogger.cpp
+  ../translators/bibteximporter.cpp
+  ../translators/risimporter.cpp
+  ../gui/collectiontypecombo.cpp
+  ../gui/kwidgetlister.cpp
+)
+ecm_mark_nongui_executable(multifetchertest)
+add_test(multifetchertest multifetchertest)
+ecm_mark_as_test(multifetchertest)
+TARGET_LINK_LIBRARIES(multifetchertest fetcherstest newstuff ${TELLICO_BTPARSE_LIBS} ${TELLICO_TEST_LIBS})
+
 add_executable(musicbrainzfetchertest musicbrainzfetchertest.cpp abstractfetchertest.cpp
   ../fetch/musicbrainzfetcher.cpp
 )
diff --git a/src/tests/data/cat_mods.spec b/src/tests/data/cat_mods.spec
index 9561b8fe..cb8060e3 100644
--- a/src/tests/data/cat_mods.spec
+++ b/src/tests/data/cat_mods.spec
@@ -1,8 +1,8 @@
-ArgumentKeys=1
-Arguments=%1
+ArgumentKeys=1,3
+Arguments=%1,%1
 CollectionType=2
 FormatType=6
 Name=Cat File
 Type=data-source
-UpdateArgs=%{title}
+UpdateArgs=%{title} %{isbn}
 Uuid={d8eeb500-0333-4c29-a390-618c97e060cb}
diff --git a/src/tests/multifetchertest.cpp b/src/tests/multifetchertest.cpp
new file mode 100644
index 00000000..a966df48
--- /dev/null
+++ b/src/tests/multifetchertest.cpp
@@ -0,0 +1,104 @@
+/***************************************************************************
+    Copyright (C) 2021 Robby Stephenson <robby at periapsis.org>
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or         *
+ *   modify it under the terms of the GNU General Public License as        *
+ *   published by the Free Software Foundation; either version 2 of        *
+ *   the License or (at your option) version 3 or any later version        *
+ *   accepted by the membership of KDE e.V. (or its successor approved     *
+ *   by the membership of KDE e.V.), which shall act as a proxy            *
+ *   defined in Section 14 of version 3 of the license.                    *
+ *                                                                         *
+ *   This program is distributed in the hope that it will be useful,       *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+ *   GNU General Public License for more details.                          *
+ *                                                                         *
+ *   You should have received a copy of the GNU General Public License     *
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
+ *                                                                         *
+ ***************************************************************************/
+
+#include "multifetchertest.h"
+
+#include "../fetch/multifetcher.h"
+#include "../fetch/execexternalfetcher.h"
+#include "../fetch/fetchmanager.h"
+#include "../fetch/messagelogger.h"
+#include "../collections/bookcollection.h"
+#include "../collectionfactory.h"
+#include "../entry.h"
+#include "../utils/datafileregistry.h"
+
+#include <KSharedConfig>
+#include <KConfigGroup>
+
+#include <QTest>
+
+QTEST_GUILESS_MAIN( MultiFetcherTest )
+
+MultiFetcherTest::MultiFetcherTest() : AbstractFetcherTest() {
+}
+
+void MultiFetcherTest::initTestCase() {
+  Tellico::RegisterCollection<Tellico::Data::BookCollection> registerBook(Tellico::Data::Collection::Book, "book");
+  Tellico::DataFileRegistry::self()->addDataLocation(QFINDTESTDATA("../../xslt/mods2tellico.xsl"));
+}
+
+void MultiFetcherTest::testEmpty() {
+  auto config = KSharedConfig::openConfig(QString(), KConfig::SimpleConfig)->group(QStringLiteral("multi"));
+  config.writeEntry("CollectionType", int(Tellico::Data::Collection::Book));
+
+  Tellico::Fetch::FetchRequest request(Tellico::Data::Collection::Book, Tellico::Fetch::Title, QString());
+  Tellico::Fetch::Fetcher::Ptr fetcher(new Tellico::Fetch::MultiFetcher(this));
+  fetcher->readConfig(config);
+
+  Tellico::Data::EntryList results = DO_FETCH(fetcher, request);
+  QVERIFY(results.isEmpty());
+}
+
+void MultiFetcherTest::testIsbn() {
+  // just going to chain two trivial data sources together to test multifetcher
+  Tellico::Fetch::Fetcher::Ptr modsFetcher1(new Tellico::Fetch::ExecExternalFetcher(this));
+  Tellico::Fetch::Fetcher::Ptr modsFetcher2(new Tellico::Fetch::ExecExternalFetcher(this));
+
+  KSharedConfig::Ptr catConfig = KSharedConfig::openConfig(QFINDTESTDATA("data/cat_mods.spec"), KConfig::SimpleConfig);
+  KConfigGroup catConfigGroup = catConfig->group(QStringLiteral("<default>"));
+  catConfigGroup.writeEntry("ExecPath", QFINDTESTDATA("data/cat_mods.sh")); // update command path to local script
+  catConfigGroup.markAsClean(); // don't edit the file on sync()
+  modsFetcher1->readConfig(catConfigGroup);
+  modsFetcher1->setMessageHandler(new Tellico::Fetch::MessageLogger);
+  modsFetcher2->readConfig(catConfigGroup);
+  modsFetcher2->setMessageHandler(new Tellico::Fetch::MessageLogger);
+
+  auto fetchManager = Tellico::Fetch::Manager::self();
+  fetchManager->addFetcher(modsFetcher1);
+  fetchManager->addFetcher(modsFetcher2);
+
+  QStringList uuids;
+  uuids << modsFetcher1->uuid() << modsFetcher2->uuid();
+
+  auto multiConfig = KSharedConfig::openConfig(QString(), KConfig::SimpleConfig)->group(QStringLiteral("multi"));
+  multiConfig.writeEntry("Sources", uuids);
+  multiConfig.writeEntry("CollectionType", int(Tellico::Data::Collection::Book));
+
+  Tellico::Fetch::FetchRequest isbnRequest(Tellico::Data::Collection::Book,
+                                           Tellico::Fetch::ISBN,
+                                           QStringLiteral("0801486394"));
+  Tellico::Fetch::Fetcher::Ptr multiFetcher(new Tellico::Fetch::MultiFetcher(this));
+  multiFetcher->readConfig(multiConfig);
+  multiFetcher->setMessageHandler(new Tellico::Fetch::MessageLogger);
+
+  Tellico::Data::EntryList results = DO_FETCH(multiFetcher, isbnRequest);
+
+  QCOMPARE(results.size(), 1);
+
+  Tellico::Data::EntryPtr entry = results.at(0);
+  QVERIFY(entry);
+
+  QCOMPARE(entry->field(QStringLiteral("title")), QStringLiteral("Sound and fury"));
+  QCOMPARE(entry->field(QStringLiteral("isbn")), QStringLiteral("0-8014-8639-4"));
+}
diff --git a/src/tests/multifetchertest.h b/src/tests/multifetchertest.h
new file mode 100644
index 00000000..273a42ed
--- /dev/null
+++ b/src/tests/multifetchertest.h
@@ -0,0 +1,41 @@
+/***************************************************************************
+    Copyright (C) 2021 Robby Stephenson <robby at periapsis.org>
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or         *
+ *   modify it under the terms of the GNU General Public License as        *
+ *   published by the Free Software Foundation; either version 2 of        *
+ *   the License or (at your option) version 3 or any later version        *
+ *   accepted by the membership of KDE e.V. (or its successor approved     *
+ *   by the membership of KDE e.V.), which shall act as a proxy            *
+ *   defined in Section 14 of version 3 of the license.                    *
+ *                                                                         *
+ *   This program is distributed in the hope that it will be useful,       *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+ *   GNU General Public License for more details.                          *
+ *                                                                         *
+ *   You should have received a copy of the GNU General Public License     *
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
+ *                                                                         *
+ ***************************************************************************/
+
+#ifndef MULTIFETCHERTEST_H
+#define MULTIFETCHERTEST_H
+
+#include "abstractfetchertest.h"
+
+class MultiFetcherTest : public AbstractFetcherTest {
+Q_OBJECT
+public:
+  MultiFetcherTest();
+
+private Q_SLOTS:
+  void initTestCase();
+  void testEmpty();
+  void testIsbn();
+};
+
+#endif


More information about the kde-doc-english mailing list