The set datatype implemented in XPath 3.0

In my previous two posts I introduced the binary search tree datatype, implemented in XPath 3.0. In most cases the binary search tree operations find, insert and delete have efficiency of O(log(d)) and the print/serialize operation is O(N*log(d)), where d is the maximum depth of the tree and N is the total number of tree nodes.

Today, I will show a set implemented using a binary search tree. This implementation choice means that the set datatype operations have the same efficiency as the corresponding binary search tree operations. This implementation choice also results in the constraint that the set of values for any kind of item, that we want to put in such set, must have total ordering. In other words, the standard (or user-defined) lt operation must be applicable on any pair of items belonging to the set.

Here is the code:

import module namespace set-impl =“http://fxsl.sf.net/data/set/implementation”
              at “implementations/set-implementation-as-binary-search-tree.xquery”;
             

declare namespace set =http://fxsl.sf.net/data/set

 

(:
  Create an empty set — (0×2205)
 :)
declare functionset:set()
         as
function() as item()*

{
  
set-impl:set()
};

declare functionset:empty($pSet as function() as item()*)
         as xs:boolean
{
  
set-impl:empty($
pSet)
};

declare functionset:size($pSet as function() as item()*)
         as xs:integer
{
  
set-impl:size($
pSet)
};

declare function set:equals
            (
$pSet1 as function() as item()*
,
            
$pSet2 as function() as item()*

            )
         as xs:boolean
{
   
set-impl:equals($pSet1, $pSet2)
};

declare function set:add
     (
$pSet as function() as item()*
,
     
$pItem as item
()
     )
         as
function() as item()*

{
  
set-impl:add($pSet, $pItem)
};

declare functionset:data($pSet as function() as item()*

)
         as
item()*

{
  
set-impl:data($pSet)
};

  The Set Theory belongs to (member of)   (0x22F2)
  operation
 :)
declare function set:member
     ($pItem as item()?,
     
$pSet as function() as item()*

      )
         as xs:boolean
{
  
set-impl:member($pItem, $pSet)
};

(:
 The Set Theory does not belong to (not member of)  (0×2209)
  operation
 :)

declare function set:not-member
     (
$pItem as item()?
,
     
$pSet as function() as item()*

      )
         as xs:boolean
{
  
not(set:member($pItem, $pSet))
};

declare function set:remove
     (
$pSet as function() as item()*
,
     
$pItem as item
()
     )
         as
function() as item()*

{
  
set-impl:remove($pSet[1], $pItem)
};

(:
  The classic set-theory U operation — union of two sets
:)
declare function set:U
     (
$pSet1 as function() as item()*
,
     
$pSet2 as function() as item()*

     )
         as
function() as item()*
{
  
set-impl:U($pSet1, $pSet2)
};

(:
  The classic set-theory set-difference operation \

:)
declare function set:diff
     (
$pSet1 as function() as item()*
,
     
$pSet2 as function() as item()*

     )
         as
function() as item()*
{
  
set-impl:diff($pSet1, $pSet2)
};

(: 
The classic set-theory ∩  (∩) /\
  set-intersection operation
:)
declare function set:intersect
     (
$pSet1 as function() as item()*
,
     
$pSet2 as function() as item()*

     )
         as
function() as item()*
{
 
set-impl:intersect($pSet1, $pSet2)
};
(:
The classic set-theory symmetric-difference operation
:)
declare function set:sym-diff
     (
$pSet1 as function() as item()*
,
     
$pSet2 as function() as item()*

     )
         as
function() as item()*
{
  
set-impl:sym-diff($pSet1, $pSet2)
};

(: 
The classic set-theory set (‘⊇’) operation
:)
declare function set:includes
     (
$pSet1 as function() as item()*
,
     
$pSet2 as function() as item()*

     )
         as xs:boolean
{
 
set-impl:includes($pSet1, $pSet2)
};

declare function set:print
     (
$pSet as function() as item()*
)
         as
element()?

{
 
set-impl:print($pSet)
};

declare function set:serialize
     (
$pSet as function() as item()*
)
         as
element()?

{
 
set-impl:serialize($pSet)
};

declare function set:deserialize
     (
$pSerialization as element()?
)
         as
function() as item()*

{
 
set-impl:deserialize($pSerialization)
};

Now, you would be right that this code shows very little, if anything, at all. However, it has at least two virtues:

  • Can serve as interface.
  • Is implementation independent – to use a different implementation just import another implementation module and bind the prefix set-impl to its namespace.

Having said that, here is the “true” implementation, which is contained in the file “implementations/set-implementation-as-binary-search-tree.xquery”.

This is a separate XQuery module with its own namespace. It imports and uses the module that implements the binary search tree:

module namespace set-impl =“http://fxsl.sf.net/data/set/implementation”
              at “../../BinaryTree/bintreeModule.xquery”;

import module namespace tree =http://fxsl.sf.net/data/bintree;

The set implementation module “contains” a binary search tree. The “empty set” is nothing else than a zero-argument function that returns an empty tree:

(:
  Create an empty set — (0×2205)
 :)
declare functionset-impl:set()
         as
function() as item()*

{
  
function() {tree:tree()}   (: underlying bintree :)
};

Many of the basic set operations have their binary search tree counterparts. Here is what it means in our implementation for a set to be “empty” and how we compute the size of a set:

declare functionset-impl:empty($pSet as function() as item()*)
         as xs:boolean
{
  
tree:empty($pSet[1
]())
};

 

declare functionset-impl:size($pSet as function() as item()*)
         as xs:integer
{
  
tree:size($pSet[1
]())
};

Two sets are equal exactly when they have the same size and the sequences of their atomized values are deep-equal:

declare function set-impl:equals
            (
$pSet1 as function() as item()*
,
            
$pSet2 as function() as item()*

            )
         as xs:boolean
{
   
set-impl:size($pSet1) eqset-impl:size($pSet1)
  
and

    deep-equal(set-impl:data($pSet1), set-impl:data($pSet2))
};

Here is how we add an item to a set:

declare function set-impl:add
     (
$pSet as function() as item()*
,
     
$pItem as item
()
     )
         as
function() as item()*

{
  
function() {tree:insert($pSet[1](), $pItem)}
};

and how we atomize a set:

declare functionset-impl:data($pSet as function() as item()*)
         as
item()*

{
  
tree:data($pSet[1]())
};

When is an item member of a set:

(:
  The Set Theory belongs to (member of)   (0x22F2)
  operation
 :)
declare function set-impl:member
     (
$pItem as item()?
,
     
$pSet as function() as item()*

      )
         as xs:boolean
{
  
tree:contains($pSet[1](), $pItem)
};

and when it isn’t a member of a set:

(:
  The Set Theory does not belong to (not member of)   (0×2209)
  operation
 :)

declare function set-impl:not-member
     (
$pItem as item()?
,
     
$pSet as function() as item()*

      )
         as xs:boolean
{
  
not(set-impl:member($pItem, $pSet))
};

This is how we remove an item from a set:

declare function set-impl:remove
     (
$pSet as function() as item()*
,
     
$pItem as item
()
     )
         as
function() as item()*

{
  
function() {tree:deleteNode($pSet[1](), $pItem)}
};

The classic set theory U (union) operation returns the union of two sets. Making use of the new standard XPath 3.0 higher order function fold-left(), we add all items from the smaller of the two sets to the bigger, thus performing only the minimal possible number of add operations:

(:
  The classic set-theory U operation — union of two sets
:)
declare function set-impl:U
     (
$pSet1 as function() as item()*
,
     
$pSet2 as function() as item()*

     )
         as
function() as item()*
{
  
let$vBiggerSet :=
          
if(set-impl:size($pSet1) geset-impl:size($
pSet2))
            
then$
pSet1
            
else$
pSet2,
      
$vSmallerSet :=

           if(not(set-impl:size($pSet1) geset-impl:size($pSet2)))
            
then$
pSet1
            
else$
pSet2
   
return

       fold-left(set-impl:add#2, $vBiggerSet, set-impl:data($vSmallerSet))
};

Similarly, the classic set theory \ (set difference) operation:

(:
  The classic set-theory set-difference operation \
:)
declare function set-impl:diff
     (
$pSet1 as function() as item()*
,
     
$pSet2 as function() as item()*

     )
         as
function() as item()*
{
 
fold-left(set-impl:remove#2, $pSet1, set-impl:data($pSet2))
};

And the classic set theory  intersect operation:

(:
  The classic set-theory ∩  (∩) /\
  set-intersection operation
:)
declare function set-impl:intersect
     (
$pSet1 as function() as item()*
,
     
$pSet2 as function() as item()*

     )
         as
function() as item()*
{
  
let$vBiggerSet :=
          
if(set-impl:size($pSet1) geset-impl:size($
pSet2))
            
then$
pSet1
            
else$
pSet2,
      
$vSmallerSet :=

           if(not(set-impl:size($pSet1) geset-impl:size($pSet2)))
            
then$
pSet1
            
else$
pSet2,
      
$to-be-removed :=

          
filter(set-impl:not-member(?, $
vBiggerSet),
                     
set-impl:data($
vSmallerSet)
                     )
   
return

       fold-left(set-impl:remove#2, $vSmallerSet, $to-be-removed)
};

Two more set theory operations – symmetric difference:

(:
  The classic set-theory symmetric-difference operation
:)
declare function set-impl:sym-diff
     (
$pSet1 as function() as item()*
,
     
$pSet2 as function() as item()*

     )
         as
function() as item()*
{
 
set-impl:U(set-impl:diff($pSet1, $pSet2), set-impl:diff($pSet2, $pSet1))
};

and (non-strict) set inclusion:

(:
  The classic set-theory set (‘⊇’) operation
:)
declare function set-impl:includes
     (
$pSet1 as function() as item()*
,
     
$pSet2 as function() as item()*

     )
         as xs:boolean
{
 
empty(set-impl:data($pSet2)[set-impl:not-member(.,$pSet1)])
};

And finally, three infrastructural operations: print(), serialize() and deserialize() :

declare function set-impl:print
     (
$pSet as function() as item()*
)
         as
element()?

{
 
tree:print($pSet[1]())
};
 

declare function set-impl:serialize
     (
$pSet as function() as item()*
)
         as
element()?

{
 
set-impl:print($pSet)
};

declare function set-impl:deserialize
     (
$pSerialization as element()?
)
         as
function() as item()*

{
 
function() {tree:deserialize($pSerialization)}
};

Here is a test of our set implementation:

let$set0 :=set:add(set:set(), 2),
    
$set1 :=set:add(set:add(set:add(set:set(), 10), 5), 17
),
    
$set2 :=set:remove($set1, 5
),
    
$set3 :=set:remove($set1, 12
),
    
$set4 :=set:add(set:set(), 2
),
    
$set4 :=set:add(set:add(set:add(set:set(), 8), 4), 9
),
    
$set5 :=set:add(set:add(set:add(set:set(), 17), 10), 19
)
 
return

    (
           
‘ set:empty(set:set()): ‘,
             
set:empty(set:set
()),
             
‘ set:size(set:set()): ‘
,
             
set:size(set:set
()),
             
‘ set:size($set0): ‘
,
             
set:size($
set0),
             
‘ set:size($set1): ‘
,
             
set:size($
set1),
             
‘ set:data($set1): ‘
,
             
set:data($
set1),
             
‘ set:member(10,$set1): ‘
,
             
set:member(10,$
set1),
             
‘ set:member(17,$set1): ‘
,
             
set:member(17,$
set1),
             
‘ set:member(5,$set1): ‘
,
             
set:member(5,$
set1),
             
‘ set:member(2,$set1): ‘
,
             
set:member(2,$
set1),
             
‘ set:member(15,$set1): ‘
,
             
set:member(15,$
set1),
             
‘ set:data($set2): ‘
,
             
set:data($
set2),
             
‘ set:data($set3): ‘
,
             
set:data($
set3),
             
‘ set:data($set4): ‘
,
             
set:data($
set4),
             
‘ set:data(set:U($set1,$set4)): ‘
,
             
set:data(set:U($set1,$
set4)),
             
‘ set:print(set:U($set1,$set4)): ‘
,
             
set:print(set:U($set1,$
set4)),
             
‘ set:print(set:U($set4,$set1)): ‘
,
             
set:print(set:U($set4,$
set1)),
             
‘ set:print(set:U($set1,$set2)): ‘
,
             
set:print(set:U($set1,$
set2)),
             
‘ set:equals($set1, set:U($set1,$set2)): ‘
,
             
set:equals($set1, set:U($set1,$
set2)),
             
‘ set:data($set1): ‘
,
             
set:data($
set1),
             
‘ set:data($set4): ‘
,
             
set:data($
set4),
             
‘ set:data(set:U($set1,$set4)): ‘
,
             
set:data(set:U($set1,$
set4)),
             
‘ set:print(set:U($set1,$set4)): ‘
,
             
set:print(set:U($set1,$
set4)),
             
‘ set:data($set1): ‘
,
             
set:data($
set1),
             
‘ set:data($set2): ‘
,
             
set:data($
set2),
             
‘ set:data(set:diff($set1,$set2)): ‘
,
             
set:data(set:diff($set1,$
set2)),
             
‘ set:print(tree:print(set:diff($set1,$set2)): ‘
,
             
set:print(set:diff($set1,$
set2)),
             
‘ set:data($set1): ‘
,
             
set:data($
set1),
             
‘ set:data($set2): ‘
,
             
set:data($
set2),
             
‘ set:data(set:intersect($set1, $set2)): ‘
,
  
set:data(set:intersect($set1, $
set2)),
             
‘ set:data($set1): ‘
,
             
set:data($
set1),
             
‘ set:data($set5): ‘
,
             
set:data($
set5),
             
‘ set:data(set:sym-diff($set1, $set5)): ‘
,
  
set:data(set:sym-diff($set1, $
set5)),
             
‘ set:includes($set1, $set2): ‘
,
  
set:includes($set1, $
set2),
             
‘ set:includes($set1, $set5): ‘
,
  
set:includes($set1, $
set5),
             
‘ set:includes($set1, set:set()): ‘
,
  
set:includes($set1, set:set
()),
             
‘ set:member((), $set1): ‘
,
  
set:member((), $
set1),
             
‘ set:member((), set:set()): ‘
,
  
set:member((), set:set
()),
             
‘ set:serialize($set5): ‘
,
  
set:serialize($
set5),
             
‘ set:data(set:deserialize(set:serialize($set5))): ‘
,
  
set:data(set:deserialize(set:serialize($
set5))),
             
‘ set:size(set:deserialize(set:serialize($set5))): ‘
,
  
set:size(set:deserialize(set:serialize($
set5)))
             )

When this test is executed, the expected, correct results are produced:

set:empty(set:set()):  true
set:size(set:set()):  0
set:size($set0):  1
set:size($set1):  3
set:data($set1):  5 10 17
set:member(10,$set1):  true
set:member(17,$set1):  true
set:member(5,$set1):  true
set:member(2,$set1):  false
set:member(15,$set1):  false
set:data($set2):  10 17
set:data($set3):  5 10 17
set:data($set4):  4 8 9
set:data(set:U($set1,$set4)):  4 5 8 9 10 17
set:print(set:U($set1,$set4)):

<treeNode>

   <value>10</value>
   <treeNode>
      <value>5</value>
      <treeNode>
         <value>4</value>
      </treeNode>
      <treeNode>
         <value>8</value>
         <treeNode>
            <value>9</value>
         </treeNode>
      </treeNode>
   </treeNode>
   <treeNode>
      <value>17</value>
   </treeNode>
</treeNode>
set:print(set:U($set4,$set1)):
<treeNode>
   <value>8</value>
   <treeNode>
      <value>4</value>
      <treeNode>
         <value>5</value>
      </treeNode>
   </treeNode>
   <treeNode>
      <value>9</value>
      <treeNode>
         <value>10</value>
         <treeNode>
            <value>17</value>
         </treeNode>
      </treeNode>
   </treeNode>
</treeNode>
set:print(set:U($set1,$set2)):
<treeNode>
   <value>10</value>
   <treeNode>
      <value>5</value>
   </treeNode>
   <treeNode>
      <value>17</value>
   </treeNode>
</treeNode>
set:equals($set1, set:U($set1,$set2)):  true
set:data($set1):  5 10 17
set:data($set4):  4 8 9
set:data(set:U($set1,$set4)):  4 5 8 9 10 17
set:print(set:U($set1,$set4)):

<treeNode>
   <value>10</value>
   <treeNode>
      <value>5</value>
      <treeNode>
         <value>4</value>
      </treeNode>
      <treeNode>
         <value>8</value>
         <treeNode>
            <value>9</value>
         </treeNode>
      </treeNode>
   </treeNode>
   <treeNode>
      <value>17</value>
   </treeNode>
</treeNode>
set:data($set1):  5 10 17
set:data($set2):  10 17
set:data(set:diff($set1,$set2)):  5
set:print(tree:print(set:diff($set1,$set2)):

<treeNode>
   <value>5</value>
</treeNode>
set:data($set1):  5 10 17
set:data($set2):  10 17
set:data(set:intersect($set1, $set2)):  10 17
set:data($set1):  5 10 17
set:data($set5):  10 17 19
set:data(set:sym-diff($set1, $set5)):  5 19
set:includes($set1, $set2):  true
set:includes($set1, $set5):  false
set:includes($set1, set:set()):  true
set:member((), $set1):  true
set:member((), set:set()):  true
set:serialize($set5):
<treeNode>
   <value>17</value>
   <treeNode>
      <value>10</value>
   </treeNode>
   <treeNode>
      <value>19</value>
   </treeNode>
</treeNode>
set:data(set:deserialize(set:serialize($set5))): 10 17 19
set:size(set:deserialize(set:serialize($set5))):  3

In a few of my next posts I will show other interesting applications of the two new datatypes: the set and the binary search tree.

Useful utility: Zip Codes – Free zip code lookup and zip code database download.

Posted in functional data structures, XPath, XPath 3.0 | 2 Comments

Part 2: The Binary Search Data Structure with XPath 3.0, Deleting a node

In the first part of this post I presented a Binary Search Tree implemented in pure XPath 3.0. Some new nice features of XPath 3.0 were shown in action and other, still missing but very important features were identified and discussed.

The only operation that was missing was the deletion of a node from the tree. I show how to do this now.

This is the function that consumers will use:

(:
Delete from $pTree the node containing $pItem.
:)

declare function tree:deleteNode
         (
$pTree as (function() as item()*)*
,
         
$pItem as item
()
         )
        
as (function() as item()*)*

{
 
tree:deleteNode($pTree, $pItem, 1)
};

It simply delegates to a similar internal function that also requires a third argument – the depth of the (sub)tree on which the delete is performed. Why is the depth necessary? The answer will come later.

(:
Delete from $pTree the node containing $pItem.
$pTree has a depth of $pDepth (1 if not a subtree)
:)

declare function tree:deleteNode
         (
$pTree as (function() as item()*)*
,
         
$pItem as item
(),
         
$
pDepth as xs:integer
         )
        
as (function() as item()*)*

{
if(tree:empty($pTree))
  
then $
pTree
  
else

     let $top   := tree:top($pTree),
        
$left  := tree:left($
pTree),
        
$right := tree:right($
pTree)
     
return

         if($pItem eq $top)
          
then tree:deleteTopNode($pTree, $
pDepth)
          
else

            if($pItem lt $top)
             
then

                (
                 
function() {tree:top($pTree)},
                 
function
()
                  {
tree:deleteNode(tree:left($
pTree),
                                  
$
pItem,
                                  
$pDepth+1
)
                  },
                 
function() {tree:right($
pTree)}
                 )
             
else

                (
                 
function() {tree:top($pTree)},
                 
function() {tree:left($
pTree)},
                 
function
()
                  {
tree:deleteNode(tree:right($
pTree),
                                             
$
pItem,
                                             
$pDepth+1
)
                  }
                 )
};

We see that the deleteNode() function simply dispatches the operation to the left or right subtree and calls another function – deleteTopNode() – whenever the item to be deleted happens to be the value of the top node.

declare function tree:deleteTopNode
         (
$pTree as (function() as item()*)*
,
         
$
pDepth as xs:integer
         )
        
as (function() as item()*)*

{
 
let $left := tree:left($pTree),
     
$right := tree:right($
pTree)
   
return

      if(tree:empty($left) and tree:empty($right))
       
then tree:tree
()
       
else

         if(tree:empty($left) and not(tree:empty($right)))
          
then

             (
              
function() {tree:top($right)},
              
function() {tree:left($
right)},
              
function() {tree:right($
right)},
              
function() {tree:size($right) -1
}
              )
          
else

            if(not(tree:empty($left)) and tree:empty($right))
            
then

             (
              
function() {tree:top($left)},
              
function() {tree:left($
left)},
              
function() {tree:right($
left)},
              
function() {tree:size($left) -1
}
              )
            
else  (: both subtrees are non-empty :)

               if($pDepth mod 2 eq 0)
               
then

                  let $subtree := tree:right($pTree),
                     
$vItem := tree:top(tree:leftmost($
subtree)),
                     
$newSubtree :=

                         tree:deleteNode($subtree, $vItem, $pDepth+1)
                   
return

                      (function() {$vItem},
                      
function() {$
left},
                      
function() {$
newSubtree},
                      
function() {tree:size($pTree) -1
}
                       )
               
else

                  let $subtree := tree:left($pTree),
                     
$vItem := tree:top(tree:rightmost($
subtree)),
                     
$newSubtree:=

                         tree:deleteNode($subtree, $vItem, $pDepth+1)
                   
return

                      (function() {$vItem},
                      
function() {$
newSubtree},
                      
function() {$
right},
                      
function() {tree:size($pTree) -1
}
                       )
};

deleteTopNode() does the real deletion work. The operation is straightforward in the case when the tree is empty or either the left subtree or the right subtree is empty.

It becomes interesting when both subtrees are non-empty. In order to understand what the last 20 lines of this code mean, it is best to read a good source on the binary search tree delete operation.

In a few words:

  • If the depth is even, then we move to the top the leftmost node (without its right subtree) of the right subtree.
  • If the depth is odd, then we move to the top the rightmost node (without its left subtree) of the left subtree.

Why is this different processing done, depending on the oddness of the depth of the subtree? If we always used the leftmost node of the right subtree, then a series of deletions would create a severely imbalanced (to the right) tree. Imbalanced trees lose the advantages of the binary search tree with search and insertions becoming of linear complexity (instead of O(log(N))  ) and sorting becoming O(N^2) instead of O(N*log(N)).

Thus, in order to avoid this deteriorating effect, we need to use with a pseudo-random frequency both the left and the right subtrees. In most cases this can be achieved simply by using the oddness of the depth of the tree, whose top node is to be replaced.

Below is the complete code of the Binary Search tree. Do note that I have added a new “property” to the tree datatype – “size”. Also, note the functions tree:leftmost() and tree:rightmost() – used by tree:deleteTopNode()

Another change is that now the second argument of tree:contains()   is of type  item()?   — that is, it can be empty. By definition, any tree contains the “empty item”.

Finally, there is a new pair of functions: tree:serialize() and   tree:deserialize() . The former is a synonym of  tree:print() and the latter loads the XML representation created by  tree:serialize() into a tree.

module namespace tree = “http://fxsl.sf.net/data/bintree”;

declare function tree:top
         (
$pTree as (function() as item()*)*
)
         as
item()?

{
if(empty($pTree))
  
then
()
  
else

     $pTree[1]()
};
 
 

 

declare function tree:left
         (
$pTree as (function() as item()*)*
)
        
as (function() as item()*)*

{
if(empty($pTree[2]))
then tree:tree
()
else

   $pTree[2]()
 
  
 

 

declare function tree:right
         (
$pTree as (function() as item()*)*
)
        
as (function() as item()*)*

{
if(empty($pTree[3]))
  
then tree:tree
()
  
else

     $pTree[3]()
};
  
declare function tree:size
         (
$pTree as (function() as item()*)*
)
         as xs:integer
{
if(empty($pTree[4
]))
  
then 0

   else
     $pTree[4]()
};
  
declare function tree:tree()
        
as (function() as item()*)+
{};
   (
    function() {()},   (: top() :)
    function() {()},   (: left() :)
    function() {()},   (: right() :)
    function() {0}     (: size() :)
   )
};
declare function tree:empty
         (
$pTree as (function() as item()*)*
)
         as xs:boolean
{
empty($pTree) or empty($pTree[1
]())
};

 declare function tree:contains
         (
$pTree as (function() as item()*)*,
         
$pItem as item()?
         )
         as xs:boolean
{
 
if(empty($pItem))
  
then true()
  
else
   
if(tree:empty($pTree))
     
then false()
     
else
       
let $top := tree:top($pTree)
        
return
            (
$pItem eq $top)
          
or
            (
$pItem lt $top
           
and
            
tree:contains(tree:left($pTree),$pItem)
             )
          
or
            (
$pItem gt $top
           
and
            
tree:contains(tree:right($pTree),$pItem)
             )
};

(:
Delete from $pTree the node containing $pItem.
:)
declare function tree:deleteNode
         (
$pTree as (function() as item()*)*
,
         
$pItem as item
()
         )
        
as (function() as item()*)*

{
 
tree:deleteNode($pTree, $pItem, 1)
};
declare function tree:deleteNode
         (
$pTree as (function() as item()*)*
,
         
$pItem as item
(),
         
$
pDepth as xs:integer
         )
        
as (function() as item()*)*

{
if(tree:empty($pTree))
  
then $
pTree
  
else

     let $top   := tree:top($pTree),
        
$left  := tree:left($
pTree),
        
$right := tree:right($
pTree)
     
return

         if($pItem eq $top)
          
then tree:deleteTopNode($pTree, $
pDepth)
          
else

            if($pItem lt $top)
             
then

                (
                 
function() {tree:top($pTree)},
                 
function
()
                  {
tree:deleteNode(tree:left($
pTree),
                                  
$
pItem,
                                  
$pDepth+1
)
                  },
                 
function() {tree:right($
pTree)}
                 )
             
else

                (
                 
function() {tree:top($pTree)},
                 
function() {tree:left($
pTree)},
                 
function
()
                  {
tree:deleteNode(tree:right($
pTree),
                                             
$
pItem,
                                            
$pDepth+1
)
                  }
                 )
};
 

declare function tree:deleteTopNode
         (
$pTree as (function() as item()*)*,
         
$pDepth as xs:integer
         )
        
as (function() as item()*)*
{
 
let $left := tree:left($pTree),
     
$right := tree:right($pTree)
   
return
     
if(tree:empty($left) and tree:empty($right))
       
then tree:tree()
       
else
        
if(tree:empty($left) and not(tree:empty($right)))
          
then
           
$right
          
else
           
if(not(tree:empty($left)) and tree:empty($right))
            
then
              
$left
            
else  (: both subtrees are non-empty :)
              
if($pDepth mod 2 eq 0)
               
then
                 
let $subtree := tree:right($pTree),
                     
$vItem := tree:top(tree:leftmost($subtree)),
                     
$newSubtree :=
                        
tree:deleteNode($subtree, $vItem, $pDepth+1)
                   
return
                      (
function() {$vItem},
                      
function() {$left},
                      
function() {$newSubtree},
                      
function() {tree:size($pTree) -1}
                       )
               
else
                 
let $subtree := tree:left($pTree),
                     
$vItem := tree:top(tree:rightmost($subtree)),
                     
$newSubtree:=
                        
tree:deleteNode($subtree, $vItem, $pDepth+1)
                   
return
                      (
function() {$vItem},
                      
function() {$newSubtree},
                      
function() {$right},
                      
function() {tree:size($pTree) -1}
                       )
};

 
(:
  Find/Return the leftmost node of the
  non-empty tree $pTree
:)
declare function tree:leftmost
         (
$pTree as (function() as item()*)+
)
        
as (function() as item()*)*

{
 
let $left := tree:left($pTree)
   
return

      if(tree:empty($left))
       
then $
pTree
       
else tree:leftmost($
left)
};
 

 

 
declare function tree:rightmost
         (
$pTree as (function() as item()*)+
)
        
as (function() as item()*)*

{
 
let $right := tree:right($pTree)
   
return

      if(tree:empty($right))
       
then $
pTree
       
else tree:rightmost($
right)
};
 declare function tree:insert
         (
$pTree as (function() as item()*)*
,
         
$pItem as item
()
         )
        
as (function() as item()*)+

{
  
if(tree:empty($pTree))
     
then

      (
      
function() {$pItem},        (: top()   :)
       function() {tree:tree()}, (: left()  :)
       function() {tree:tree()}, (: right() :)
       function() {1}                     (: size()  :)
       )
     
else
       if($pItem lt tree:top($pTree))
        
then

          (
          
function() {tree:top($pTree)},
          
function() {tree:insert(tree:left($pTree), $
pItem)},
          
function() {tree:right($
pTree)},
          
function() {tree:size($pTree)+1
}
           )
        
else

          if($pItem gt tree:top($pTree))
          
then

           (
           
function() {tree:top($pTree)},
           
function() {tree:left($
pTree)},
           
function() {tree:insert(tree:right($pTree), $
pItem)},
           
function() {tree:size($pTree)+1
}
           )
          
else

            $pTree
};

declare function tree:print
        (
$pTree as (function() as item()*)*
)
        as
element()?

{
  
if(not(tree:empty($pTree)))
   
then

     <treeNode>
      <value>{tree:top($pTree)}</value>
       {
       
tree:print(tree:left($pTree)),
       
tree:print(tree:right($
pTree))
       }
    
</treeNode>

    else ()
};
  

declare function tree:serialize
        (
$pTree as (function() as item()*)*)
        as
element()?
{
  
tree:print($pTree)
};

declare function tree:deserialize($pXml as element()?)
             
as (function() as item()*)*
{
 
if(empty($pXml))
 
then tree:tree()
 
else
   
let $left := tree:deserialize($pXml/treeNode[1]),
       
$right := tree:deserialize($pXml/treeNode[2])
    
return
      (
      
function() {$pXml/value/node()[1]},
      
function() {$left},
      
function() {$right},
      
function() {tree:size($left)+tree:size($right)+1}
      )
};

 

(: tree:data()
  Prints only the data — depth first.
   In effect this is sort() — quite good
   for random data.
:)
declare function tree:data
        (
$pTree as (function() as item()*)*
)
        as
item()*

{
if(not(tree:empty($pTree)))
  
then

    (
    
tree:data(tree:left($pTree)),
    
tree:top($
pTree),
    
tree:data(tree:right($
pTree))
     )
  
else
()
};

Now, let’s test just the deleteNode operation:

let $vBigTree :=
       tree:insert(tree:insert(tree:insert(tree:tree(),10),5),17),
   
$vBigTree2 :=

       tree:insert(tree:insert(tree:insert($vBigTree,3),8),14),
   
$vBigTree3 :=

       tree:insert(tree:insert(tree:insert($vBigTree2,20),6),9),
   
$vBigTree4 :=

       tree:insert(tree:insert(tree:insert($vBigTree3,12),15),7)
   
return

      (‘ tree:print($vBigTree4): ‘,
      
tree:print($
vBigTree4),
      
‘ tree:data($vBigTree4): ‘
,
      
tree:data($
vBigTree4),
      
‘ ====================================’
,
      
‘ tree:print(tree:deleteNode($vBigTree4, 7)): ‘
,
      
tree:print(tree:deleteNode($vBigTree4, 7
)),
      
‘ tree:data(tree:deleteNode($vBigTree4, 7)): ‘
,
      
tree:data(tree:deleteNode($vBigTree4, 7
)),
      
‘ ====================================’
,
      
‘ tree:print(tree:deleteNode($vBigTree4, 12)): ‘
,
      
tree:print(tree:deleteNode($vBigTree4, 12
)),
      
‘ tree:data(tree:deleteNode($vBigTree4, 12)): ‘
,
      
tree:data(tree:deleteNode($vBigTree4, 12
)),
      
‘ ====================================’
,
      
‘ tree:print(tree:deleteNode($vBigTree4, 6)): ‘
,
      
tree:print(tree:deleteNode($vBigTree4, 6
)),
      
‘ tree:data(tree:deleteNode($vBigTree4, 6)): ‘
,
      
tree:data(tree:deleteNode($vBigTree4, 6
)),
      
‘ ====================================’
,
      
‘ tree:print(tree:deleteNode($vBigTree4, 5)): ‘
,
      
tree:print(tree:deleteNode($vBigTree4, 5
)),
      
‘ tree:data(tree:deleteNode($vBigTree4, 5)): ‘
,
      
tree:data(tree:deleteNode($vBigTree4, 5
)),
      
‘ ====================================’
,
      
‘ tree:print(tree:deleteNode($vBigTree4, 10)): ‘
,
      
tree:print(tree:deleteNode($vBigTree4, 10
)),
      
‘ tree:data(tree:deleteNode($vBigTree4, 10)): ‘
,
      
tree:data(tree:deleteNode($vBigTree4, 10
))
      )

And here are the results:

tree:print($vBigTree4):

<treeNode>

      <value>10</value>

      <treeNode>

            <value>5</value>

            <treeNode>

                  <value>3</value>

            </treeNode>

            <treeNode>

                  <value>8</value>

                  <treeNode>

                        <value>6</value>

                        <treeNode>

                              <value>7</value>

                        </treeNode>

                  </treeNode>

                  <treeNode>

                        <value>9</value>

                  </treeNode>

            </treeNode>

      </treeNode>

      <treeNode>

            <value>17</value>

            <treeNode>

                  <value>14</value>

                  <treeNode>

                        <value>12</value>

                  </treeNode>

                  <treeNode>

                        <value>15</value>

                  </treeNode>

            </treeNode>

            <treeNode>

                  <value>20</value>

            </treeNode>

      </treeNode>

</treeNode>

tree:data($vBigTree4):

3 5 6 7 8 9 10 12 14 15 17 20

====================================

tree:print(tree:deleteNode($vBigTree4, 7)):

 

<treeNode>
  
<value>10</value>
  
<treeNode>
     
<value>5</value>
     
<treeNode>
        
<value>3</value>
     
</treeNode>
     
<treeNode>
        
<value>8</value>
        
<treeNode>
           
<value>6</value>
        
</treeNode>
        
<treeNode>
           
<value>9</value>
        
</treeNode>
     
</treeNode>
  
</treeNode>
  
<treeNode>
     
<value>17</value>
     
<treeNode>
        
<value>14</value>
        
<treeNode>
           
<value>12</value>
        
</treeNode>
        
<treeNode>
           
<value>15</value>
        
</treeNode>
     
</treeNode>
     
<treeNode>
        
<value>20</value>
     
</treeNode>
  
</treeNode>
</treeNode>

 

tree:data(tree:deleteNode($vBigTree4, 7)):

3 5 6 8 9 10 12 14 15 17 20

====================================

tree:print(tree:deleteNode($vBigTree4, 12)):

<treeNode>

      <value>10</value>

      <treeNode>

            <value>5</value>

            <treeNode>

                  <value>3</value>

            </treeNode>

            <treeNode>

                  <value>8</value>

                  <treeNode>

                        <value>6</value>

                        <treeNode>

                              <value>7</value>

                        </treeNode>

                  </treeNode>

                  <treeNode>

                        <value>9</value>

                  </treeNode>

            </treeNode>

      </treeNode>

      <treeNode>

            <value>17</value>

            <treeNode>

                  <value>14</value>

                  <treeNode>

                        <value>15</value>

                  </treeNode>

            </treeNode>

            <treeNode>

                  <value>20</value>

            </treeNode>

      </treeNode>

</treeNode>

tree:data(tree:deleteNode($vBigTree4, 12)):

3 5 6 7 8 9 10 14 15 17 20

====================================

tree:print(tree:deleteNode($vBigTree4, 6)):

<treeNode>

      <value>10</value>

      <treeNode>

            <value>5</value>

            <treeNode>

                  <value>3</value>

            </treeNode>

            <treeNode>

                  <value>8</value>

                  <treeNode>

                        <value>7</value>

                  </treeNode>

                  <treeNode>

                        <value>9</value>

                  </treeNode>

            </treeNode>

      </treeNode>

      <treeNode>

            <value>17</value>

            <treeNode>

                  <value>14</value>

                  <treeNode>

                        <value>12</value>

                  </treeNode>

                  <treeNode>

                        <value>15</value>

                  </treeNode>

            </treeNode>

            <treeNode>

                  <value>20</value>

            </treeNode>

      </treeNode>

</treeNode>

tree:data(tree:deleteNode($vBigTree4, 6)):

3 5 7 8 9 10 12 14 15 17 20

====================================

tree:print(tree:deleteNode($vBigTree4, 5)):

<treeNode>

      <value>10</value>

      <treeNode>

            <value>6</value>

            <treeNode>

                  <value>3</value>

            </treeNode>

            <treeNode>

                  <value>8</value>

                  <treeNode>

                        <value>7</value>

                  </treeNode>

                  <treeNode>

                        <value>9</value>

                  </treeNode>

            </treeNode>

      </treeNode>

      <treeNode>

            <value>17</value>

            <treeNode>

                  <value>14</value>

                  <treeNode>

                        <value>12</value>

                  </treeNode>

                  <treeNode>

                        <value>15</value>

                  </treeNode>

            </treeNode>

            <treeNode>

                  <value>20</value>

            </treeNode>

      </treeNode>

</treeNode>

tree:data(tree:deleteNode($vBigTree4, 5)):

3 6 7 8 9 10 12 14 15 17 20

====================================

tree:print(tree:deleteNode($vBigTree4, 10)):

<treeNode>

      <value>9</value>

      <treeNode>

            <value>5</value>

            <treeNode>

                  <value>3</value>

            </treeNode>

            <treeNode>

                  <value>8</value>

                  <treeNode>

                        <value>6</value>

                        <treeNode>

                              <value>7</value>

                        </treeNode>

                  </treeNode>

            </treeNode>

      </treeNode>

      <treeNode>

            <value>17</value>

            <treeNode>

                  <value>14</value>

                  <treeNode>

                        <value>12</value>

                  </treeNode>

                  <treeNode>

                        <value>15</value>

                  </treeNode>

            </treeNode>

            <treeNode>

                  <value>20</value>

            </treeNode>

      </treeNode>

</treeNode>

tree:data(tree:deleteNode($vBigTree4, 10)):

3 5 6 7 8 9 12 14 15 17 20

 

In my next posts I’ll show some of the most important applications of the Binary Search tree.


Posted in functional data structures, XPath, XPath 3.0 | Leave a comment

The Binary Search Tree Data Structure–having fun with XPath 3.0

For a long time I have wanted to play with XSLT 3.0 and XPath 3.0. Despite these being in their WD status, the new features are so powerful and overwhelming.

Take just these: Higher Order Functions and the ability to create new anonymous functions in XPath.

In my quest to accomplish what no one has ever done before with XPath I was helped by the existence of an early Saxon implementation – Saxon 9.3.04 offers these in its XQuery 3.0 implementation and the XSLT 3.0 implementation will hopefully be soon fully usable, after fixing a few bugs. The beautifully high-lighted XQuery code below was copied from oXygen 12.1 and pasted onto Word, then to Windows Live Writer.

I decided to start with something not too-complex (are you dreaming of a finger tree written entirely in XPath ?) and the Binary Search tree persistent functional data structure fits well this requirement.

So, let’s start:

As always:

declare namespace tree =http://fxsl.sf.net;

A tree is a sequence (triple) of functions. Here is the definition of an empty tree:

declare functiontree:tree()
        
as (function() as item()*)+

{ 
  (function() {()},   (: top() :)
    function() {()},   (: left() :)
    function() {()}    (: right() :)
  )
};
 

The first function produces the top of the tree, which is the value ( item() ) of its top node.

The second function produces the left sub-tree, which is another sequence (triple) of functions.

The third function produces the right sub-tree, which is another sequence (triple) of functions.

Do you see the problem with the type of the tree:tree()  function? What would happen if I accidentally returned only a pair of functions? Nothing! No static or runtime error would occur and I could spend a lot of time trying to find this simple error. Obviously, a tuple datatype would solve this problem completely.

Now, the three functions top(), left() and right() :

declare function

 tree:top
         (
$pTree as (function() as item()*)*
)
         as
item()?

{
 
if(empty($pTree))
  
then
()
  
else

     $pTree[1]()
};

 

declare function
tree:left
         (
$pTree as (function() as item()*)*
)
        
as (function() as item()*)*

{
 
if(empty($pTree[2]))
 
then
()
 
else

   $pTree[2]()
};
  
 
declare function
tree:right
         (
$pTree as (function() as item()*)*
)
        
as (function() as item()*)*

{
 
if(empty($pTree[3]))
  
then
()
  
else

     $pTree[3]()
};

A tree is empty when:

declare function tree:empty
         (
$pTree as (function() as item()*)*
)
         as xs:boolean
{
 
empty($pTree) orempty($pTree[1
]())
};
 

When does a tree contain an item?

declare function tree:contains
         ($pTree as (function() as item()*)*
,
          $pItem as item
()
         )
         as xs:boolean
{
 if(tree:empty($
pTree))
   then false
()
   else

     let $top := tree:top($pTree)
      return

         ($pItem eq $top)
        or

         ($pItem lt $top
         and

          tree:contains(tree:left($pTree),$pItem)
          )
        or

         ($pItem gt $top
         and

          tree:contains(tree:right($pTree),$pItem)
          )
};

How to add a new node to a tree?

declare function tree:insert
         ($pTree as (function() as item()*)*
,
          $pItem as item
()
         )
         as (function() as item()*)+

{
  
if(tree:empty($pTree))
      then

      (
      
function() {$pItem},   (: top()   :)
       function() {tree:tree()},         (: left()  :)
       function() {tree:tree()}          (: right() :)
       )
     
else
       if($pItem lt tree:top($pTree))
         then

          (
          
function() {tree:top($pTree)},
           function() {tree:insert(tree:left($pTree), $
pItem)},
           function() {tree:right($
pTree)}
           )
         else

          if($pItem gt tree:top($pTree))
           then

           (
           
function() {tree:top($pTree)},
            function() {tree:left($
pTree)},
            function() {tree:insert(tree:right($pTree), $
pItem)}
           )
           else

            $pTree
};

How to present a tree?

declare function tree:print
        ($pTree as (function() as item()*)*
)
        as element()?

{
  if(not(tree:empty($pTree)))
     then

      <treeNode>
           <value>{tree:top($pTree)}</value>
           {
           
tree:print(tree:left($pTree)),
            tree:print(tree:right($
pTree))
           }
     </treeNode>

     else ()
};

How to atomize a tree (and get a good sorting function as a side effect)?

(: tree:data()
  Prints only the data — depth first.
   In effect this is sort() — quite good
   for random data.
 :)
declare function tree:data
        ($pTree as (function() as item()*)*
)
        as item()*

{
  
if(not(tree:empty($pTree)))
     then

     (
     
tree:data(tree:left($pTree)),
      data(tree:top($
pTree)),
      tree:data(tree:right($
pTree))
      )
     else
()
};

How to return a subtree and its depth?

declare function tree:findSubtree
         ($pTree as (function() as item()*)*
,
          $pItem as item
()
         )
         as (function() as item()*)*

{
 
if(tree:empty($pTree))
   then (tree:tree(), function() as item()* {-1
})
   else

     let $depth := 1,
         $top := tree:top($
pTree)
       return

         if($pItem eq $top)
           then ($pTree,function() as item()* {$
depth})
           else

             if($pItem lt $top)
               then

                 let $lsubtree
                       := tree:findSubtree(tree:left($pTree),$
pItem),
                     $ldepth := $lsubtree[4
]()
                   return

                     if($ldepth eq -1)
                       then $
lsubtree
                       else

                         (subsequence($lsubtree,1,3),
                          function() as item()* {1+$
ldepth}
                          )
               else

                 let $rsubtree
                       := tree:findSubtree(tree:right($pTree),$
pItem),
                     $rdepth := $rsubtree[4
]()
                   return

                     if($rdepth eq -1)
                       then $
rsubtree
                       else

                         (subsequence($rsubtree,1,3),
                          function() as item()* {1+$
rdepth}
                          )

};

Finally, lets see how all this executes together:

let $vEmptyTree := tree:tree(),
    $vFilledTree := tree:insert($vEmptyTree,5
)  ,
    $vFilledTree2 := tree:insert($vFilledTree,3
),  
    $vFilledTree3 := tree:insert($vFilledTree2,7
),
    $vFilledTree4 := tree:insert($vFilledTree3,1
),
    $vFilledTree5 := tree:insert($vFilledTree4,9
)
    return

      (tree:print($vFilledTree5),
       ‘ tree:contains($vFilledTree5,1): ‘
,
       tree:contains($vFilledTree5,1
),
       ‘ tree:contains($vFilledTree5,9): ‘
,
       tree:contains($vFilledTree5,9
),
       ‘ tree:contains($vFilledTree5,2): ‘
,
       tree:contains($vFilledTree5,2
),
       ‘ tree:contains($vFilledTree5,11): ‘
,
       tree:contains($vFilledTree5,11
),
       ‘ tree:findSubtree($vFilledTree5,9)[4](): ‘
,
       tree:findSubtree($vFilledTree5,9)[4
](),
       ‘ tree:findSubtree($vFilledTree5,1)[4](): ‘
,
       tree:findSubtree($vFilledTree5,1)[4
](),
       ‘ tree:findSubtree($vFilledTree5,3)[4](): ‘
,
       tree:findSubtree($vFilledTree5,3)[4
](),
       ‘ tree:findSubtree($vFilledTree5,7)[4](): ‘
,
       tree:findSubtree($vFilledTree5,7)[4
](),
       ‘ tree:findSubtree($vFilledTree5,5)[4](): ‘
,
       tree:findSubtree($vFilledTree5,5)[4
](),
       ‘ tree:findSubtree($vFilledTree5,12)[4](): ‘
,
       tree:findSubtree($vFilledTree5,12)[4
](),
       ‘ tree:data($vFilledTree5): ‘
,
       tree:data($
vFilledTree5)
      )

And the result:

<treeNode>
   <value>5</value>
   <treeNode>
      <value>3</value>
      <treeNode>
         <value>1</value>
      </treeNode>
   </treeNode>
   <treeNode>
      <value>7</value>
      <treeNode>
         <value>9</value>
      </treeNode>
   </treeNode>
</treeNode>
tree:contains($vFilledTree5,1):  true
tree:contains($vFilledTree5,9):  true
tree:contains($vFilledTree5,2):  false
tree:contains($vFilledTree5,11):  false
tree:findSubtree($vFilledTree5,9)[4]():  3
tree:findSubtree($vFilledTree5,1)[4]():  3
tree:findSubtree($vFilledTree5,3)[4]():  2
tree:findSubtree($vFilledTree5,7)[4]():  2
tree:findSubtree($vFilledTree5,5)[4]():  1
tree:findSubtree($vFilledTree5,12)[4]():  -1
tree:data($vFilledTree5):  1 3 5 7 9

What next?

As you, my observant reader, probably have noticed, there is no tree:deleteItem() yet. This is a little bit more tricky and will be the topic for my next post.

Summary: A fully functional, persistent functional data structure – the Binary Search Tree has been implemented in pure XPath 3.0. We have observed how the new features of HOF and dynamically created anonymous function items make XPath 3.0 shine and how severely it lacks a simple yet most needed datatype – the tuple – to make our code more elegant and reliable.

Posted in functional data structures, XPath, XPath 3.0, XSLT 3.0 | 15 Comments

What words cannot describe

Lillies in my garden:

Is this the best?

Or maybe this?

Posted in Flowers, Photos | Leave a comment

Migrating to WordPress: http://dnovatchev.wordpress.com

As part of a global migration of spaces.live com to WordPress, I am migrating my blog to: http://dnovatchev.wordpress.com
 
Please, update your links, and sorry for the inconvenience.
 
From now on all my current and future posts will only be available at: http://dnovatchev.wordpress.com
Posted in Uncategorized | Leave a comment

The first Fibonacci Number that is zeroless-pandigital at both ends

Here is another Project Euler problem – this time a little-bit more challenging:

The Fibonacci sequence is defined by the recurrence relation:

Fn = Fn1 + Fn2 , where F1 = 1 and F2 = 1.

It turns out that F541  , which contains 113 digits, is the first Fibonacci number for which the last nine digits are 1-9 pandigital (contain all the digits 1 to 9, but not necessarily in order). And F2749  , which contains 575 digits, is the first Fibonacci number for which the first nine digits are 1-9 pandigital.

Given that Fk is the first Fibonacci number for which the first nine digits AND the last nine digits are 1-9 pandigital, find k.”

Does this sound XSLT-ish? No, not at all.

This solution:

<xsl:stylesheet version="2.0"

 xmlns:xsl="http://www.w3.org/1999/XSL/Transform"

 xmlns:xs="http://www.w3.org/2001/XMLSchema"

 xmlns:f="http://fxsl.sf.net/">

 <xsl:import href="../../../f/func-Fibonacci.xsl"/>

 <xsl:output omit-xml-declaration="yes" indent="yes"/>

 

 <xsl:template match="/">

  <xsl:sequence select=

  "f:getFiboBothEndingsPan9(3,2,1)

  "/>

 </xsl:template>

 

 <xsl:function name="f:getFiboBothEndingsPan9" as="xs:integer">

  <xsl:param name="pcurrentInd" as="xs:integer"/>

  <xsl:param name="pPrev1" as="xs:integer"/>

  <xsl:param name="pPrev2" as="xs:integer"/>

 

  <xsl:sequence select=

   "for $current in ($pPrev1 + $pPrev2) mod 1000000000

        return

           if(f:isPandigital19($current)

             and

              f:isPandigital19Head(f:fibo($pcurrentInd))

              )

             then $pcurrentInd

             else

               f:getFiboBothEndingsPan9($pcurrentInd+1, $current, $pPrev1)

   "/>

 </xsl:function>

 

 <xsl:function name="f:isPandigital19" as="xs:boolean">

  <xsl:param name="pN" as="xs:integer"/>

 

  <xsl:sequence select=

  "for $s in xs:string($pN)

    return

          not(translate(’123456789′, $s, ”))

  "/>

 </xsl:function>

 

 <xsl:function name="f:isPandigital19Head" as="xs:boolean">

   <xsl:param name="pN" as="xs:integer"/>

 

   <xsl:sequence select="f:isPandigital19(f:head9($pN))"/>

 </xsl:function>

 

  <xsl:function name="f:head9" as="xs:integer">

   <xsl:param name="arg1" as="xs:integer"/>

 

   <xsl:sequence select=

    "xs:integer(substring(string($arg1), 1, 9))"/>

  </xsl:function>

</xsl:stylesheet>

has the following properties:

  • Execution time:                 34.6 sec.
  • LOC:                                       54 (~ 40 express the solution)
  • Time to write the code:  ~15 min.
  • Uses library:                       FXSL (the f:fibo() function for generating Fibonacci numbers)

The index of the first Fibonacci number with the required properties is more than 300 thousand and the number itself has more than 68 thousand digits.

Not satisfied with the speed of this solution? Want even faster? Then here’s something you may consider:

<xsl:stylesheet version="2.0"

 xmlns:xsl="http://www.w3.org/1999/XSL/Transform"

 xmlns:xs="http://www.w3.org/2001/XMLSchema"

 xmlns:f="http://fxsl.sf.net/">

 <xsl:output omit-xml-declaration="yes" indent="yes"/>

 

 <xsl:template match="/">

  <xsl:sequence select=

  "f:getFiboBothEndingsPan9(3,2,1,2,1)

  "/>

 </xsl:template>

 

 <xsl:function name="f:getFiboBothEndingsPan9" as="xs:integer">

  <xsl:param name="pcurrentInd" as="xs:integer"/>

  <xsl:param name="pPrevTail1" as="xs:integer"/>

  <xsl:param name="pPrevTail2" as="xs:integer"/>

  <xsl:param name="pPrevHead1" as="xs:integer"/>

  <xsl:param name="pPrevHead2" as="xs:integer"/>

 

  <xsl:sequence select=

   "for $currentTail in ($pPrevTail1 + $pPrevTail2) mod 1000000000,

        $newHead1 in $pPrevHead1 + $pPrevHead2,

        $newHead2 in $pPrevHead1,

        $currentHead1 in if($newHead1 ge 10000000000000000)

                           then $newHead1 idiv 10

                           else $newHead1,

        $currentHead2 in if($newHead1 ge 10000000000000000)

                            then $newHead2 idiv 10

                            else $newHead2

        return

           if($currentHead1 ge 100000000

             and

              f:isPandigital19($currentTail)

             and

              f:isPandigital19

                    (xs:integer(substring(string($currentHead1),1,9))))

             then $pcurrentInd

             else

               f:getFiboBothEndingsPan9($pcurrentInd+1,

                                        $currentTail, $pPrevTail1,

                                        $currentHead1, $currentHead2

                                        )

   "/>

 </xsl:function>

 

 <xsl:function name="f:isPandigital19" as="xs:boolean">

  <xsl:param name="pN" as="xs:integer"/>

 

  <xsl:sequence select=

  "for $s in xs:string($pN)

    return

          not(translate(’123456789′, $s, ”))

  "/>

 </xsl:function>

</xsl:stylesheet>

We have added a a couple of lines of code and look what we have now:

  • Execution time:                     0.555 sec.
  • LOC:                                        55 (~ 43 express the solution)
  • Time to write the code:  ~10 min.

As witnessed in the Project Euler thread for this problem (one must have provided the correct answer in order to access this thread), many solutions took longer: from 3.3 seconds, to 30-40 minutes, to couple of hours.

One Python programmer even waited for the end of his brute-force solution … 12 days

Posted in Project Euler | 8 Comments

Not for XSLT? More Fun with Project Euler

 

Here is another Project Euler problem that seems exactly what XSLT was not intended for:

 

In the 5 by 5 matrix below, the minimal path sum from the top left to the bottom right, by only moving to the right and down, is indicated in bold red and is equal to 2427.

  

131

673

234

103

18

201

96

342

965

150

630

803

746

422

111

537

699

497

121

956

805

732

524

37

331

  

 

Find the minimal path sum, in matrix.txt (right click and ‘Save Link/Target As…’), a 31K text file containing a 80 by 80 matrix, from the top left to the bottom right by only moving right and down.

My solution:

 

<xsl:stylesheet version="2.0"

 xmlns:xsl="http://www.w3.org/1999/XSL/Transform"

 xmlns:xs="http://www.w3.org/2001/XMLSchema"

 xmlns:saxon="http://saxon.sf.net/"

 xmlns:mx="my:my" exclude-result-prefixes="xs saxon mx"

 >

 <xsl:output omit-xml-declaration="yes" indent="yes"/>

 

 <xsl:variable name="vMatrix" as="element()*">

  <xsl:variable name="vLines" as="xs:string*"

   select="tokenize(unparsed-text(‘matrix.txt’),’\s+’)[.]"/>

   <xsl:for-each select="$vLines">

     <line>

      <xsl:for-each select="tokenize(.,’,')">

        <v><xsl:value-of select="."/></v>

      </xsl:for-each>

     </line>

   </xsl:for-each>

 </xsl:variable>

 

 <xsl:variable name="vDimension" as="xs:integer"

  select="count($vMatrix)"/>

 

 <xsl:template match="/">

  <xsl:sequence select="mx:path-minSum(1,1,0)"/>

 </xsl:template>

 

 <xsl:function name="mx:path-minSum" as="xs:integer"

  saxon:memo-function="yes">

  <xsl:param name="pX" as="xs:integer"/>

  <xsl:param name="pY" as="xs:integer"/>

  <xsl:param name="pcurSum" as="xs:integer"/>

 

  <xsl:variable name="curVal" as="xs:integer"

   select="mx:mtx($pX, $pY)"/>

 

  <xsl:sequence select=

   "if($pX eq $vDimension and $pY eq $vDimension)

      then $curVal

      else

        for $nextX in min(($vDimension, $pX+1)),

            $nextY in min(($vDimension, $pY+1)),

            $s1 in if($nextY gt $pY)

                    then

                     $pcurSum + $curVal

                     + mx:path-minSum($pX, $nextY, $pcurSum)

                    else 999999999999,

            $s2 in if($nextX gt $pX)

                    then

                     $pcurSum + $curVal

                     + mx:path-minSum($nextX, $pY, $pcurSum)

                    else 999999999999

          return

            min(($s1,$s2))

   "/>

 </xsl:function>

 

 <xsl:function name="mx:mtx" as="xs:integer"

  saxon:memo-function="yes">

  <xsl:param name="pX" as="xs:integer"/>

  <xsl:param name="pY" as="xs:integer"/>

 

  <xsl:sequence select="$vMatrix[$pY]/*[$pX]"/>

 </xsl:function>

</xsl:stylesheet>

 

 

Properties:

  • LOC:                                         65 – well-formatted code for readability.
  • Execution time:                  078 ms.
  • Time to write the code:  ~ 10 minutes.
Posted in Project Euler | Leave a comment