diff options
Diffstat (limited to 'kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch')
-rw-r--r-- | kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch | 151 |
1 files changed, 151 insertions, 0 deletions
diff --git a/kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch b/kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch new file mode 100644 index 000000000000..06d01e7702db --- /dev/null +++ b/kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch @@ -0,0 +1,151 @@ +commit 754275eda610dce1160286a76339353097d8764c +Author: Sebastian Trueg <trueg@kde.org> +Date: Fri Mar 9 17:17:48 2012 +0100 + + Backport from nepomuk-core: improved performance on res identification. + + BUG: 289932 + FIXED-IN: 4.8.2 + +diff --git a/nepomuk/services/backupsync/lib/resourceidentifier.cpp b/nepomuk/services/backupsync/lib/resourceidentifier.cpp +index c1a9919..894372c 100644 +--- a/nepomuk/services/backupsync/lib/resourceidentifier.cpp ++++ b/nepomuk/services/backupsync/lib/resourceidentifier.cpp +@@ -31,6 +31,7 @@ + #include <Soprano/Statement> + #include <Soprano/Graph> + #include <Soprano/Node> ++#include <Soprano/BindingSet> + #include <Soprano/StatementIterator> + #include <Soprano/QueryResultIterator> + #include <Soprano/Model> +@@ -176,19 +177,18 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri) + return false; + } + +- QString query; +- + QStringList identifyingProperties; + QHash<KUrl, Soprano::Node> identifyingPropertiesHash; + + QHash< KUrl, Soprano::Node >::const_iterator it = res.constBegin(); + QHash< KUrl, Soprano::Node >::const_iterator constEnd = res.constEnd(); ++ QList<Soprano::Node> requiredTypes; + for( ; it != constEnd; it++ ) { + const QUrl & prop = it.key(); + + // Special handling for rdf:type + if( prop == RDF::type() ) { +- query += QString::fromLatin1(" ?r a %1 . ").arg( it.value().toN3() ); ++ requiredTypes << it.value().uri(); + continue; + } + +@@ -219,6 +219,10 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri) + return false; + } + ++ ++ // construct the identification query ++ QString query = QLatin1String("select distinct ?r where { "); ++ + // + // Optimization: + // If there is only one identifying property using all that optional and filter stuff +@@ -235,7 +239,7 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri) + QString::number( numIdentifyingProperties++ ) ); + } + +- // Make sure atleast one of the identification properties has been matched ++ // Make sure at least one of the identification properties has been matched + // by adding filter( bound(?o1) || bound(?o2) ... ) + query += QString::fromLatin1("filter( "); + for( int i=0; i<numIdentifyingProperties-1; i++ ) { +@@ -247,43 +251,68 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri) + query += QString::fromLatin1("?r %1 %2 . ").arg(Soprano::Node::resourceToN3(identifyingPropertiesHash.constBegin().key()), + identifyingPropertiesHash.constBegin().value().toN3()); + } +- query += QLatin1String("}"); + +- // Construct the entire query +- QString queryBegin = QString::fromLatin1("select distinct ?r count(?p) as ?cnt " +- "where { ?r ?p ?o. filter( ?p in (%1) ).") +- .arg( identifyingProperties.join(",") ); +- +- query = queryBegin + query + QString::fromLatin1(" order by desc(?cnt)"); ++ // ++ // For performance reasons we add a limit even though this could mean that we ++ // miss a resource to identify since we check the types below. ++ // ++ query += QLatin1String("} LIMIT 100"); + +- kDebug() << query; + + // +- // Only store the results which have the maximum score ++ // Fetch a score for each result. ++ // We do this in a separate query for performance reasons. + // +- QSet<KUrl> results; +- int score = -1; ++ QMultiHash<int, KUrl> resultsScoreHash; ++ int maxScore = -1; + Soprano::QueryResultIterator qit = d->m_model->executeQuery( query, Soprano::Query::QueryLanguageSparql ); + while( qit.next() ) { +- //kDebug() << "RESULT: " << qit["r"] << " " << qit["cnt"]; ++ const Soprano::Node r(qit["r"]); ++ ++ // ++ // Check the type requirements. Experiments have shown this to mean a substantial ++ // performance boost as compared to doing it in the main query. ++ // ++ if(!requiredTypes.isEmpty() ) { ++ query = QLatin1String("ask where { "); ++ foreach(const Soprano::Node& type, requiredTypes) { ++ query += QString::fromLatin1("%1 a %2 . ").arg(r.toN3(), type.toN3()); ++ } ++ query += QLatin1String("}"); ++ if(!d->m_model->executeQuery(query, Soprano::Query::QueryLanguageSparql).boolValue()) { ++ continue; ++ } ++ } ++ ++ ++ const int score = d->m_model->executeQuery(QString::fromLatin1("select count(?p) as ?cnt where { " ++ "%1 ?p ?o. filter( ?p in (%2) ) . }") ++ .arg( r.toN3(), ++ identifyingProperties.join(",") ), ++ Soprano::Query::QueryLanguageSparql) ++ .allBindings().first()["cnt"].literal().toInt(); + +- int count = qit["cnt"].literal().toInt(); +- if( score == -1 ) { +- score = count; ++ if( maxScore < score ) { ++ maxScore = score; + } +- else if( count < score ) +- break; + +- results << qit["r"].uri(); ++ resultsScoreHash.insert(score, r.uri()); + } + ++ // ++ // Only get the results which have the maximum score ++ // ++ QSet<KUrl> results = QSet<KUrl>::fromList(resultsScoreHash.values(maxScore)); ++ ++ + //kDebug() << "Got " << results.size() << " results"; + if( results.empty() ) + return false; + + KUrl newUri; +- if( results.size() == 1 ) ++ if( results.size() == 1 ) { + newUri = *results.begin(); ++ } + else { + kDebug() << "DUPLICATE RESULTS!"; + newUri = duplicateMatch( res.uri(), results ); |