summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch')
-rw-r--r--kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch151
1 files changed, 151 insertions, 0 deletions
diff --git a/kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch b/kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch
new file mode 100644
index 000000000000..06d01e7702db
--- /dev/null
+++ b/kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch
@@ -0,0 +1,151 @@
+commit 754275eda610dce1160286a76339353097d8764c
+Author: Sebastian Trueg <trueg@kde.org>
+Date: Fri Mar 9 17:17:48 2012 +0100
+
+ Backport from nepomuk-core: improved performance on res identification.
+
+ BUG: 289932
+ FIXED-IN: 4.8.2
+
+diff --git a/nepomuk/services/backupsync/lib/resourceidentifier.cpp b/nepomuk/services/backupsync/lib/resourceidentifier.cpp
+index c1a9919..894372c 100644
+--- a/nepomuk/services/backupsync/lib/resourceidentifier.cpp
++++ b/nepomuk/services/backupsync/lib/resourceidentifier.cpp
+@@ -31,6 +31,7 @@
+ #include <Soprano/Statement>
+ #include <Soprano/Graph>
+ #include <Soprano/Node>
++#include <Soprano/BindingSet>
+ #include <Soprano/StatementIterator>
+ #include <Soprano/QueryResultIterator>
+ #include <Soprano/Model>
+@@ -176,19 +177,18 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
+ return false;
+ }
+
+- QString query;
+-
+ QStringList identifyingProperties;
+ QHash<KUrl, Soprano::Node> identifyingPropertiesHash;
+
+ QHash< KUrl, Soprano::Node >::const_iterator it = res.constBegin();
+ QHash< KUrl, Soprano::Node >::const_iterator constEnd = res.constEnd();
++ QList<Soprano::Node> requiredTypes;
+ for( ; it != constEnd; it++ ) {
+ const QUrl & prop = it.key();
+
+ // Special handling for rdf:type
+ if( prop == RDF::type() ) {
+- query += QString::fromLatin1(" ?r a %1 . ").arg( it.value().toN3() );
++ requiredTypes << it.value().uri();
+ continue;
+ }
+
+@@ -219,6 +219,10 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
+ return false;
+ }
+
++
++ // construct the identification query
++ QString query = QLatin1String("select distinct ?r where { ");
++
+ //
+ // Optimization:
+ // If there is only one identifying property using all that optional and filter stuff
+@@ -235,7 +239,7 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
+ QString::number( numIdentifyingProperties++ ) );
+ }
+
+- // Make sure atleast one of the identification properties has been matched
++ // Make sure at least one of the identification properties has been matched
+ // by adding filter( bound(?o1) || bound(?o2) ... )
+ query += QString::fromLatin1("filter( ");
+ for( int i=0; i<numIdentifyingProperties-1; i++ ) {
+@@ -247,43 +251,68 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
+ query += QString::fromLatin1("?r %1 %2 . ").arg(Soprano::Node::resourceToN3(identifyingPropertiesHash.constBegin().key()),
+ identifyingPropertiesHash.constBegin().value().toN3());
+ }
+- query += QLatin1String("}");
+
+- // Construct the entire query
+- QString queryBegin = QString::fromLatin1("select distinct ?r count(?p) as ?cnt "
+- "where { ?r ?p ?o. filter( ?p in (%1) ).")
+- .arg( identifyingProperties.join(",") );
+-
+- query = queryBegin + query + QString::fromLatin1(" order by desc(?cnt)");
++ //
++ // For performance reasons we add a limit even though this could mean that we
++ // miss a resource to identify since we check the types below.
++ //
++ query += QLatin1String("} LIMIT 100");
+
+- kDebug() << query;
+
+ //
+- // Only store the results which have the maximum score
++ // Fetch a score for each result.
++ // We do this in a separate query for performance reasons.
+ //
+- QSet<KUrl> results;
+- int score = -1;
++ QMultiHash<int, KUrl> resultsScoreHash;
++ int maxScore = -1;
+ Soprano::QueryResultIterator qit = d->m_model->executeQuery( query, Soprano::Query::QueryLanguageSparql );
+ while( qit.next() ) {
+- //kDebug() << "RESULT: " << qit["r"] << " " << qit["cnt"];
++ const Soprano::Node r(qit["r"]);
++
++ //
++ // Check the type requirements. Experiments have shown this to mean a substantial
++ // performance boost as compared to doing it in the main query.
++ //
++ if(!requiredTypes.isEmpty() ) {
++ query = QLatin1String("ask where { ");
++ foreach(const Soprano::Node& type, requiredTypes) {
++ query += QString::fromLatin1("%1 a %2 . ").arg(r.toN3(), type.toN3());
++ }
++ query += QLatin1String("}");
++ if(!d->m_model->executeQuery(query, Soprano::Query::QueryLanguageSparql).boolValue()) {
++ continue;
++ }
++ }
++
++
++ const int score = d->m_model->executeQuery(QString::fromLatin1("select count(?p) as ?cnt where { "
++ "%1 ?p ?o. filter( ?p in (%2) ) . }")
++ .arg( r.toN3(),
++ identifyingProperties.join(",") ),
++ Soprano::Query::QueryLanguageSparql)
++ .allBindings().first()["cnt"].literal().toInt();
+
+- int count = qit["cnt"].literal().toInt();
+- if( score == -1 ) {
+- score = count;
++ if( maxScore < score ) {
++ maxScore = score;
+ }
+- else if( count < score )
+- break;
+
+- results << qit["r"].uri();
++ resultsScoreHash.insert(score, r.uri());
+ }
+
++ //
++ // Only get the results which have the maximum score
++ //
++ QSet<KUrl> results = QSet<KUrl>::fromList(resultsScoreHash.values(maxScore));
++
++
+ //kDebug() << "Got " << results.size() << " results";
+ if( results.empty() )
+ return false;
+
+ KUrl newUri;
+- if( results.size() == 1 )
++ if( results.size() == 1 ) {
+ newUri = *results.begin();
++ }
+ else {
+ kDebug() << "DUPLICATE RESULTS!";
+ newUri = duplicateMatch( res.uri(), results );