1 files changed, 151 insertions, 0 deletions
diff --git a/kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch b/kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch
new file mode 100644
index 000000000000..06d01e7702db
--- /dev/null
+++ b/kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch
@@ -0,0 +1,151 @@
+commit 754275eda610dce1160286a76339353097d8764c
+Author: Sebastian Trueg <trueg@kde.org>
+Date:   Fri Mar 9 17:17:48 2012 +0100
+
+    Backport from nepomuk-core: improved performance on res identification.
+    
+    BUG: 289932
+    FIXED-IN: 4.8.2
+
+diff --git a/nepomuk/services/backupsync/lib/resourceidentifier.cpp b/nepomuk/services/backupsync/lib/resourceidentifier.cpp
+index c1a9919..894372c 100644
+--- a/nepomuk/services/backupsync/lib/resourceidentifier.cpp
++++ b/nepomuk/services/backupsync/lib/resourceidentifier.cpp
+@@ -31,6 +31,7 @@
+ #include <Soprano/Statement>
+ #include <Soprano/Graph>
+ #include <Soprano/Node>
++#include <Soprano/BindingSet>
+ #include <Soprano/StatementIterator>
+ #include <Soprano/QueryResultIterator>
+ #include <Soprano/Model>
+@@ -176,19 +177,18 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
+         return false;
+     }
+ 
+-    QString query;
+-
+     QStringList identifyingProperties;
+     QHash<KUrl, Soprano::Node> identifyingPropertiesHash;
+ 
+     QHash< KUrl, Soprano::Node >::const_iterator it = res.constBegin();
+     QHash< KUrl, Soprano::Node >::const_iterator constEnd = res.constEnd();
++    QList<Soprano::Node> requiredTypes;
+     for( ; it != constEnd; it++ ) {
+         const QUrl & prop = it.key();
+ 
+         // Special handling for rdf:type
+         if( prop == RDF::type() ) {
+-            query += QString::fromLatin1(" ?r a %1 . ").arg( it.value().toN3() );
++            requiredTypes << it.value().uri();
+             continue;
+         }
+ 
+@@ -219,6 +219,10 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
+         return false;
+     }
+ 
++
++    // construct the identification query
++    QString query = QLatin1String("select distinct ?r where { ");
++
+     //
+     // Optimization:
+     // If there is only one identifying property using all that optional and filter stuff
+@@ -235,7 +239,7 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
+                            QString::number( numIdentifyingProperties++ ) );
+         }
+ 
+-        // Make sure atleast one of the identification properties has been matched
++        // Make sure at least one of the identification properties has been matched
+         // by adding filter( bound(?o1) || bound(?o2) ... )
+         query += QString::fromLatin1("filter( ");
+         for( int i=0; i<numIdentifyingProperties-1; i++ ) {
+@@ -247,43 +251,68 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
+         query += QString::fromLatin1("?r %1 %2 . ").arg(Soprano::Node::resourceToN3(identifyingPropertiesHash.constBegin().key()),
+                                                          identifyingPropertiesHash.constBegin().value().toN3());
+     }
+-    query += QLatin1String("}");
+ 
+-    // Construct the entire query
+-    QString queryBegin = QString::fromLatin1("select distinct ?r count(?p) as ?cnt "
+-    "where { ?r ?p ?o. filter( ?p in (%1) ).")
+-    .arg( identifyingProperties.join(",") );
+-
+-    query = queryBegin + query + QString::fromLatin1(" order by desc(?cnt)");
++    //
++    // For performance reasons we add a limit even though this could mean that we
++    // miss a resource to identify since we check the types below.
++    //
++    query += QLatin1String("} LIMIT 100");
+ 
+-    kDebug() << query;
+ 
+     //
+-    // Only store the results which have the maximum score
++    // Fetch a score for each result.
++    // We do this in a separate query for performance reasons.
+     //
+-    QSet<KUrl> results;
+-    int score = -1;
++    QMultiHash<int, KUrl> resultsScoreHash;
++    int maxScore = -1;
+     Soprano::QueryResultIterator qit = d->m_model->executeQuery( query, Soprano::Query::QueryLanguageSparql );
+     while( qit.next() ) {
+-        //kDebug() << "RESULT: " << qit["r"] << " " << qit["cnt"];
++        const Soprano::Node r(qit["r"]);
++
++        //
++        // Check the type requirements. Experiments have shown this to mean a substantial
++        // performance boost as compared to doing it in the main query.
++        //
++        if(!requiredTypes.isEmpty() ) {
++            query = QLatin1String("ask where { ");
++            foreach(const Soprano::Node& type, requiredTypes) {
++                query += QString::fromLatin1("%1 a %2 . ").arg(r.toN3(), type.toN3());
++            }
++            query += QLatin1String("}");
++            if(!d->m_model->executeQuery(query, Soprano::Query::QueryLanguageSparql).boolValue()) {
++                continue;
++            }
++        }
++
++
++        const int score = d->m_model->executeQuery(QString::fromLatin1("select count(?p) as ?cnt where { "
++                                                                       "%1 ?p ?o. filter( ?p in (%2) ) . }")
++                                                   .arg( r.toN3(),
++                                                         identifyingProperties.join(",") ),
++                                                   Soprano::Query::QueryLanguageSparql)
++                          .allBindings().first()["cnt"].literal().toInt();
+ 
+-        int count = qit["cnt"].literal().toInt();
+-        if( score == -1 ) {
+-            score = count;
++        if( maxScore < score ) {
++            maxScore = score;
+         }
+-        else if( count < score )
+-            break;
+ 
+-        results << qit["r"].uri();
++        resultsScoreHash.insert(score, r.uri());
+     }
+ 
++    //
++    // Only get the results which have the maximum score
++    //
++    QSet<KUrl> results = QSet<KUrl>::fromList(resultsScoreHash.values(maxScore));
++
++
+     //kDebug() << "Got " << results.size() << " results";
+     if( results.empty() )
+         return false;
+ 
+     KUrl newUri;
+-    if( results.size() == 1 )
++    if( results.size() == 1 ) {
+         newUri = *results.begin();
++    }
+     else {
+         kDebug() << "DUPLICATE RESULTS!";
+         newUri = duplicateMatch( res.uri(), results );