Skip to content

Commit 3433a01

Browse files
committed
Add optimization for finding the best CURIE.
- Build a map for searching for a matching IRI when computing the inverse context. Each letter of an IRI can be used to key into the map to find the best set of partial matches (which can be used to create CURIEs). - This approach is a faster alternative to trying each possible term in the active context as a possible CURIE, linearly, one at a time.
1 parent 956fb8b commit 3433a01

File tree

1 file changed

+92
-25
lines changed

1 file changed

+92
-25
lines changed

jsonld.php

Lines changed: 92 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4701,32 +4701,41 @@ protected function _compactIri(
47014701
}
47024702

47034703
// no term or @vocab match, check for possible CURIEs
4704-
$iri_len = strlen($iri);
47054704
$choice = null;
4706-
foreach($active_ctx->mappings as $term => $definition) {
4707-
// skip null definitions and terms with colons, they can't be prefixes
4708-
if($definition === null || $definition->_term_has_colon) {
4709-
continue;
4710-
}
4711-
// skip entries with @ids that are not partial matches
4712-
if(!($iri_len > $definition->_id_length &&
4713-
strpos($iri, $definition->{'@id'}) === 0)) {
4714-
continue;
4715-
}
4716-
4717-
// a CURIE is usable if:
4718-
// 1. it has no mapping, OR
4719-
// 2. value is null, which means we're not compacting an @value, AND
4720-
// the mapping matches the IRI)
4721-
$curie = $term . ':' . substr($iri, $definition->_id_length);
4722-
$is_usable_curie = (!property_exists($active_ctx->mappings, $curie) ||
4723-
($value === null && $active_ctx->mappings->{$curie}->{'@id'} === $iri));
4724-
4725-
// select curie if it is shorter or the same length but lexicographically
4726-
// less than the current choice
4727-
if($is_usable_curie && ($choice === null ||
4728-
self::_compareShortestLeast($curie, $choice) < 0)) {
4729-
$choice = $curie;
4705+
$idx = 0;
4706+
$partial_matches = array();
4707+
$iri_map = $active_ctx->fast_curie_map;
4708+
// check for partial matches of against `iri`, which means look until
4709+
// iri.length - 1, not full length
4710+
$max_partial_length = strlen($iri) - 1;
4711+
for(; $idx < $max_partial_length && isset($iri_map[$iri[$idx]]); ++$idx) {
4712+
$iri_map = $iri_map[$iri[$idx]];
4713+
if(isset($iri_map[''])) {
4714+
$entry = $iri_map[''][0];
4715+
$entry->iri_length = $idx + 1;
4716+
$partial_matches[] = $entry;
4717+
}
4718+
}
4719+
// check partial matches in reverse order to prefer longest ones first
4720+
$partial_matches = array_reverse($partial_matches);
4721+
foreach($partial_matches as $entry) {
4722+
$terms = $entry->terms;
4723+
foreach($terms as $term) {
4724+
// a CURIE is usable if:
4725+
// 1. it has no mapping, OR
4726+
// 2. value is null, which means we're not compacting an @value, AND
4727+
// the mapping matches the IRI
4728+
$curie = $term . ':' . substr($iri, $entry->iri_length);
4729+
$is_usable_curie = (!property_exists($active_ctx->mappings, $curie) ||
4730+
($value === null &&
4731+
$active_ctx->mappings->{$curie}->{'@id'} === $iri));
4732+
4733+
// select curie if it is shorter or the same length but
4734+
// lexicographically less than the current choice
4735+
if($is_usable_curie && ($choice === null ||
4736+
self::_compareShortestLeast($curie, $choice) < 0)) {
4737+
$choice = $curie;
4738+
}
47304739
}
47314740
}
47324741

@@ -5363,6 +5372,10 @@ protected function _getInverseContext($active_ctx) {
53635372

53645373
$inverse = $active_ctx->inverse = new stdClass();
53655374

5375+
// variables for building fast CURIE map
5376+
$fast_curie_map = $active_ctx->fast_curie_map = new ArrayObject();
5377+
$iris_to_terms = array();
5378+
53665379
// handle default language
53675380
$default_language = '@none';
53685381
if(property_exists($active_ctx, '@language')) {
@@ -5391,9 +5404,25 @@ protected function _getInverseContext($active_ctx) {
53915404
$iris = $mapping->{'@id'};
53925405
$iris = self::arrayify($iris);
53935406
foreach($iris as $iri) {
5407+
$is_keyword = self::_isKeyword($iri);
5408+
53945409
// initialize container map
53955410
if(!property_exists($inverse, $iri)) {
53965411
$inverse->{$iri} = new stdClass();
5412+
if(!$is_keyword && !$mapping->_term_has_colon) {
5413+
// init IRI to term map and fast CURIE map
5414+
$iris_to_terms[$iri] = new ArrayObject();
5415+
$iris_to_terms[$iri][] = $term;
5416+
$fast_curie_entry = (object)array(
5417+
'iri' => $iri, 'terms' => $iris_to_terms[$iri]);
5418+
if(!array_key_exists($iri[0], (array)$fast_curie_map)) {
5419+
$fast_curie_map[$iri[0]] = new ArrayObject();
5420+
}
5421+
$fast_curie_map[$iri[0]][] = $fast_curie_entry;
5422+
}
5423+
} else if(!$is_keyword && !$mapping->_term_has_colon) {
5424+
// add IRI to term match
5425+
$iris_to_terms[$iri][] = $term;
53975426
}
53985427
$container_map = $inverse->{$iri};
53995428

@@ -5437,9 +5466,47 @@ protected function _getInverseContext($active_ctx) {
54375466
}
54385467
}
54395468

5469+
// build fast CURIE map
5470+
foreach($fast_curie_map as $key => $value) {
5471+
$this->_buildIriMap($fast_curie_map, $key, 1);
5472+
}
5473+
54405474
return $inverse;
54415475
}
54425476

5477+
/**
5478+
* Runs a recursive algorithm to build a lookup map for quickly finding
5479+
* potential CURIEs.
5480+
*
5481+
* @param ArrayObject $iri_map the map to build.
5482+
* @param string $key the current key in the map to work on.
5483+
* @param int $idx the index into the IRI to compare.
5484+
*/
5485+
function _buildIriMap($iri_map, $key, $idx) {
5486+
$entries = $iri_map[$key];
5487+
$next = $iri_map[$key] = new ArrayObject();
5488+
5489+
foreach($entries as $entry) {
5490+
$iri = $entry->iri;
5491+
if($idx >= strlen($iri)) {
5492+
$letter = '';
5493+
} else {
5494+
$letter = $iri[$idx];
5495+
}
5496+
if(!isset($next[$letter])) {
5497+
$next[$letter] = new ArrayObject();
5498+
}
5499+
$next[$letter][] = $entry;
5500+
}
5501+
5502+
foreach($next as $key => $value) {
5503+
if($key === '') {
5504+
continue;
5505+
}
5506+
$this->_buildIriMap($next, $key, $idx + 1);
5507+
}
5508+
}
5509+
54435510
/**
54445511
* Adds the term for the given entry if not already added.
54455512
*

0 commit comments

Comments
 (0)