aboutsummaryrefslogtreecommitdiff
path: root/src/share/classes/com/sun/org/apache/xml/internal/dtm/ref/dom2dtm/DOM2DTM.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/share/classes/com/sun/org/apache/xml/internal/dtm/ref/dom2dtm/DOM2DTM.java')
-rw-r--r--src/share/classes/com/sun/org/apache/xml/internal/dtm/ref/dom2dtm/DOM2DTM.java1765
1 files changed, 1765 insertions, 0 deletions
diff --git a/src/share/classes/com/sun/org/apache/xml/internal/dtm/ref/dom2dtm/DOM2DTM.java b/src/share/classes/com/sun/org/apache/xml/internal/dtm/ref/dom2dtm/DOM2DTM.java
new file mode 100644
index 0000000..cee8474
--- /dev/null
+++ b/src/share/classes/com/sun/org/apache/xml/internal/dtm/ref/dom2dtm/DOM2DTM.java
@@ -0,0 +1,1765 @@
+/*
+ * reserved comment block
+ * DO NOT REMOVE OR ALTER!
+ */
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * $Id: DOM2DTM.java,v 1.2.4.1 2005/09/15 08:15:10 suresh_emailid Exp $
+ */
+package com.sun.org.apache.xml.internal.dtm.ref.dom2dtm;
+
+import java.util.Vector;
+
+import javax.xml.transform.SourceLocator;
+import javax.xml.transform.dom.DOMSource;
+
+import com.sun.org.apache.xml.internal.dtm.DTM;
+import com.sun.org.apache.xml.internal.dtm.DTMManager;
+import com.sun.org.apache.xml.internal.dtm.DTMWSFilter;
+import com.sun.org.apache.xml.internal.dtm.ref.DTMDefaultBaseIterators;
+import com.sun.org.apache.xml.internal.dtm.ref.DTMManagerDefault;
+import com.sun.org.apache.xml.internal.dtm.ref.ExpandedNameTable;
+import com.sun.org.apache.xml.internal.dtm.ref.IncrementalSAXSource;
+import com.sun.org.apache.xml.internal.res.XMLErrorResources;
+import com.sun.org.apache.xml.internal.res.XMLMessages;
+import com.sun.org.apache.xml.internal.utils.FastStringBuffer;
+import com.sun.org.apache.xml.internal.utils.QName;
+import com.sun.org.apache.xml.internal.utils.StringBufferPool;
+import com.sun.org.apache.xml.internal.utils.TreeWalker;
+import com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer;
+import com.sun.org.apache.xml.internal.utils.XMLString;
+import com.sun.org.apache.xml.internal.utils.XMLStringFactory;
+import org.w3c.dom.Attr;
+import org.w3c.dom.Document;
+import org.w3c.dom.DocumentType;
+import org.w3c.dom.Element;
+import org.w3c.dom.Entity;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.xml.sax.ContentHandler;
+
+/** The <code>DOM2DTM</code> class serves up a DOM's contents via the
+ * DTM API.
+ *
+ * Note that it doesn't necessarily represent a full Document
+ * tree. You can wrap a DOM2DTM around a specific node and its subtree
+ * and the right things should happen. (I don't _think_ we currently
+ * support DocumentFrgment nodes as roots, though that might be worth
+ * considering.)
+ *
+ * Note too that we do not currently attempt to track document
+ * mutation. If you alter the DOM after wrapping DOM2DTM around it,
+ * all bets are off.
+ * */
+public class DOM2DTM extends DTMDefaultBaseIterators
+{
+ static final boolean JJK_DEBUG=false;
+ static final boolean JJK_NEWCODE=true;
+
+ /** Manefest constant
+ */
+ static final String NAMESPACE_DECL_NS="http://www.w3.org/XML/1998/namespace";
+
+ /** The current position in the DOM tree. Last node examined for
+ * possible copying to DTM. */
+ transient private Node m_pos;
+ /** The current position in the DTM tree. Who children get appended to. */
+ private int m_last_parent=0;
+ /** The current position in the DTM tree. Who children reference as their
+ * previous sib. */
+ private int m_last_kid=NULL;
+
+ /** The top of the subtree.
+ * %REVIEW%: 'may not be the same as m_context if "//foo" pattern.'
+ * */
+ transient private Node m_root;
+
+ /** True iff the first element has been processed. This is used to control
+ synthesis of the implied xml: namespace declaration node. */
+ boolean m_processedFirstElement=false;
+
+ /** true if ALL the nodes in the m_root subtree have been processed;
+ * false if our incremental build has not yet finished scanning the
+ * DOM tree. */
+ transient private boolean m_nodesAreProcessed;
+
+ /** The node objects. The instance part of the handle indexes
+ * directly into this vector. Each DTM node may actually be
+ * composed of several DOM nodes (for example, if logically-adjacent
+ * Text/CDATASection nodes in the DOM have been coalesced into a
+ * single DTM Text node); this table points only to the first in
+ * that sequence. */
+ protected Vector m_nodes = new Vector();
+
+ /**
+ * Construct a DOM2DTM object from a DOM node.
+ *
+ * @param mgr The DTMManager who owns this DTM.
+ * @param domSource the DOM source that this DTM will wrap.
+ * @param dtmIdentity The DTM identity ID for this DTM.
+ * @param whiteSpaceFilter The white space filter for this DTM, which may
+ * be null.
+ * @param xstringfactory XMLString factory for creating character content.
+ * @param doIndexing true if the caller considers it worth it to use
+ * indexing schemes.
+ */
+ public DOM2DTM(DTMManager mgr, DOMSource domSource,
+ int dtmIdentity, DTMWSFilter whiteSpaceFilter,
+ XMLStringFactory xstringfactory,
+ boolean doIndexing)
+ {
+ super(mgr, domSource, dtmIdentity, whiteSpaceFilter,
+ xstringfactory, doIndexing);
+
+ // Initialize DOM navigation
+ m_pos=m_root = domSource.getNode();
+ // Initialize DTM navigation
+ m_last_parent=m_last_kid=NULL;
+ m_last_kid=addNode(m_root, m_last_parent,m_last_kid, NULL);
+
+ // Apparently the domSource root may not actually be the
+ // Document node. If it's an Element node, we need to immediately
+ // add its attributes. Adapted from nextNode().
+ // %REVIEW% Move this logic into addNode and recurse? Cleaner!
+ //
+ // (If it's an EntityReference node, we're probably scrod. For now
+ // I'm just hoping nobody is ever quite that foolish... %REVIEW%)
+ //
+ // %ISSUE% What about inherited namespaces in this case?
+ // Do we need to special-case initialize them into the DTM model?
+ if(ELEMENT_NODE == m_root.getNodeType())
+ {
+ NamedNodeMap attrs=m_root.getAttributes();
+ int attrsize=(attrs==null) ? 0 : attrs.getLength();
+ if(attrsize>0)
+ {
+ int attrIndex=NULL; // start with no previous sib
+ for(int i=0;i<attrsize;++i)
+ {
+ // No need to force nodetype in this case;
+ // addNode() will take care of switching it from
+ // Attr to Namespace if necessary.
+ attrIndex=addNode(attrs.item(i),0,attrIndex,NULL);
+ m_firstch.setElementAt(DTM.NULL,attrIndex);
+ }
+ // Terminate list of attrs, and make sure they aren't
+ // considered children of the element
+ m_nextsib.setElementAt(DTM.NULL,attrIndex);
+
+ // IMPORTANT: This does NOT change m_last_parent or m_last_kid!
+ } // if attrs exist
+ } //if(ELEMENT_NODE)
+
+ // Initialize DTM-completed status
+ m_nodesAreProcessed = false;
+ }
+
+ /**
+ * Construct the node map from the node.
+ *
+ * @param node The node that is to be added to the DTM.
+ * @param parentIndex The current parent index.
+ * @param previousSibling The previous sibling index.
+ * @param forceNodeType If not DTM.NULL, overrides the DOM node type.
+ * Used to force nodes to Text rather than CDATASection when their
+ * coalesced value includes ordinary Text nodes (current DTM behavior).
+ *
+ * @return The index identity of the node that was added.
+ */
+ protected int addNode(Node node, int parentIndex,
+ int previousSibling, int forceNodeType)
+ {
+ int nodeIndex = m_nodes.size();
+
+ // Have we overflowed a DTM Identity's addressing range?
+ if(m_dtmIdent.size() == (nodeIndex>>>DTMManager.IDENT_DTM_NODE_BITS))
+ {
+ try
+ {
+ if(m_mgr==null)
+ throw new ClassCastException();
+
+ // Handle as Extended Addressing
+ DTMManagerDefault mgrD=(DTMManagerDefault)m_mgr;
+ int id=mgrD.getFirstFreeDTMID();
+ mgrD.addDTM(this,id,nodeIndex);
+ m_dtmIdent.addElement(id<<DTMManager.IDENT_DTM_NODE_BITS);
+ }
+ catch(ClassCastException e)
+ {
+ // %REVIEW% Wrong error message, but I've been told we're trying
+ // not to add messages right not for I18N reasons.
+ // %REVIEW% Should this be a Fatal Error?
+ error(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available";
+ }
+ }
+
+ m_size++;
+ // ensureSize(nodeIndex);
+
+ int type;
+ if(NULL==forceNodeType)
+ type = node.getNodeType();
+ else
+ type=forceNodeType;
+
+ // %REVIEW% The Namespace Spec currently says that Namespaces are
+ // processed in a non-namespace-aware manner, by matching the
+ // QName, even though there is in fact a namespace assigned to
+ // these nodes in the DOM. If and when that changes, we will have
+ // to consider whether we check the namespace-for-namespaces
+ // rather than the node name.
+ //
+ // %TBD% Note that the DOM does not necessarily explicitly declare
+ // all the namespaces it uses. DOM Level 3 will introduce a
+ // namespace-normalization operation which reconciles that, and we
+ // can request that users invoke it or otherwise ensure that the
+ // tree is namespace-well-formed before passing the DOM to Xalan.
+ // But if they don't, what should we do about it? We probably
+ // don't want to alter the source DOM (and may not be able to do
+ // so if it's read-only). The best available answer might be to
+ // synthesize additional DTM Namespace Nodes that don't correspond
+ // to DOM Attr Nodes.
+ if (Node.ATTRIBUTE_NODE == type)
+ {
+ String name = node.getNodeName();
+
+ if (name.startsWith("xmlns:") || name.equals("xmlns"))
+ {
+ type = DTM.NAMESPACE_NODE;
+ }
+ }
+
+ m_nodes.addElement(node);
+
+ m_firstch.setElementAt(NOTPROCESSED,nodeIndex);
+ m_nextsib.setElementAt(NOTPROCESSED,nodeIndex);
+ m_prevsib.setElementAt(previousSibling,nodeIndex);
+ m_parent.setElementAt(parentIndex,nodeIndex);
+
+ if(DTM.NULL != parentIndex &&
+ type != DTM.ATTRIBUTE_NODE &&
+ type != DTM.NAMESPACE_NODE)
+ {
+ // If the DTM parent had no children, this becomes its first child.
+ if(NOTPROCESSED == m_firstch.elementAt(parentIndex))
+ m_firstch.setElementAt(nodeIndex,parentIndex);
+ }
+
+ String nsURI = node.getNamespaceURI();
+
+ // Deal with the difference between Namespace spec and XSLT
+ // definitions of local name. (The former says PIs don't have
+ // localnames; the latter says they do.)
+ String localName = (type == Node.PROCESSING_INSTRUCTION_NODE) ?
+ node.getNodeName() :
+ node.getLocalName();
+
+ // Hack to make DOM1 sort of work...
+ if(((type == Node.ELEMENT_NODE) || (type == Node.ATTRIBUTE_NODE))
+ && null == localName)
+ localName = node.getNodeName(); // -sb
+
+ ExpandedNameTable exnt = m_expandedNameTable;
+
+ // %TBD% Nodes created with the old non-namespace-aware DOM
+ // calls createElement() and createAttribute() will never have a
+ // localname. That will cause their expandedNameID to be just the
+ // nodeType... which will keep them from being matched
+ // successfully by name. Since the DOM makes no promise that
+ // those will participate in namespace processing, this is
+ // officially accepted as Not Our Fault. But it might be nice to
+ // issue a diagnostic message!
+ if(node.getLocalName()==null &&
+ (type==Node.ELEMENT_NODE || type==Node.ATTRIBUTE_NODE))
+ {
+ // warning("DOM 'level 1' node "+node.getNodeName()+" won't be mapped properly in DOM2DTM.");
+ }
+
+ int expandedNameID = (null != localName)
+ ? exnt.getExpandedTypeID(nsURI, localName, type) :
+ exnt.getExpandedTypeID(type);
+
+ m_exptype.setElementAt(expandedNameID,nodeIndex);
+
+ indexNode(expandedNameID, nodeIndex);
+
+ if (DTM.NULL != previousSibling)
+ m_nextsib.setElementAt(nodeIndex,previousSibling);
+
+ // This should be done after m_exptype has been set, and probably should
+ // always be the last thing we do
+ if (type == DTM.NAMESPACE_NODE)
+ declareNamespaceInContext(parentIndex,nodeIndex);
+
+ return nodeIndex;
+ }
+
+ /**
+ * Get the number of nodes that have been added.
+ */
+ public int getNumberOfNodes()
+ {
+ return m_nodes.size();
+ }
+
+ /**
+ * This method iterates to the next node that will be added to the table.
+ * Each call to this method adds a new node to the table, unless the end
+ * is reached, in which case it returns null.
+ *
+ * @return The true if a next node is found or false if
+ * there are no more nodes.
+ */
+ protected boolean nextNode()
+ {
+ // Non-recursive one-fetch-at-a-time depth-first traversal with
+ // attribute/namespace nodes and white-space stripping.
+ // Navigating the DOM is simple, navigating the DTM is simple;
+ // keeping track of both at once is a trifle baroque but at least
+ // we've avoided most of the special cases.
+ if (m_nodesAreProcessed)
+ return false;
+
+ // %REVIEW% Is this local copy Really Useful from a performance
+ // point of view? Or is this a false microoptimization?
+ Node pos=m_pos;
+ Node next=null;
+ int nexttype=NULL;
+
+ // Navigate DOM tree
+ do
+ {
+ // Look down to first child.
+ if (pos.hasChildNodes())
+ {
+ next = pos.getFirstChild();
+
+ // %REVIEW% There's probably a more elegant way to skip
+ // the doctype. (Just let it go and Suppress it?
+ if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
+ next=next.getNextSibling();
+
+ // Push DTM context -- except for children of Entity References,
+ // which have no DTM equivalent and cause no DTM navigation.
+ if(ENTITY_REFERENCE_NODE!=pos.getNodeType())
+ {
+ m_last_parent=m_last_kid;
+ m_last_kid=NULL;
+ // Whitespace-handler context stacking
+ if(null != m_wsfilter)
+ {
+ short wsv =
+ m_wsfilter.getShouldStripSpace(makeNodeHandle(m_last_parent),this);
+ boolean shouldStrip = (DTMWSFilter.INHERIT == wsv)
+ ? getShouldStripWhitespace()
+ : (DTMWSFilter.STRIP == wsv);
+ pushShouldStripWhitespace(shouldStrip);
+ } // if(m_wsfilter)
+ }
+ }
+
+ // If that fails, look up and right (but not past root!)
+ else
+ {
+ if(m_last_kid!=NULL)
+ {
+ // Last node posted at this level had no more children
+ // If it has _no_ children, we need to record that.
+ if(m_firstch.elementAt(m_last_kid)==NOTPROCESSED)
+ m_firstch.setElementAt(NULL,m_last_kid);
+ }
+
+ while(m_last_parent != NULL)
+ {
+ // %REVIEW% There's probably a more elegant way to
+ // skip the doctype. (Just let it go and Suppress it?
+ next = pos.getNextSibling();
+ if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
+ next=next.getNextSibling();
+
+ if(next!=null)
+ break; // Found it!
+
+ // No next-sibling found. Pop the DOM.
+ pos=pos.getParentNode();
+ if(pos==null)
+ {
+ // %TBD% Should never arise, but I want to be sure of that...
+ if(JJK_DEBUG)
+ {
+ System.out.println("***** DOM2DTM Pop Control Flow problem");
+ for(;;); // Freeze right here!
+ }
+ }
+
+ // The only parents in the DTM are Elements. However,
+ // the DOM could contain EntityReferences. If we
+ // encounter one, pop it _without_ popping DTM.
+ if(pos!=null && ENTITY_REFERENCE_NODE == pos.getNodeType())
+ {
+ // Nothing needs doing
+ if(JJK_DEBUG)
+ System.out.println("***** DOM2DTM popping EntRef");
+ }
+ else
+ {
+ popShouldStripWhitespace();
+ // Fix and pop DTM
+ if(m_last_kid==NULL)
+ m_firstch.setElementAt(NULL,m_last_parent); // Popping from an element
+ else
+ m_nextsib.setElementAt(NULL,m_last_kid); // Popping from anything else
+ m_last_parent=m_parent.elementAt(m_last_kid=m_last_parent);
+ }
+ }
+ if(m_last_parent==NULL)
+ next=null;
+ }
+
+ if(next!=null)
+ nexttype=next.getNodeType();
+
+ // If it's an entity ref, advance past it.
+ //
+ // %REVIEW% Should we let this out the door and just suppress it?
+ // More work, but simpler code, more likely to be correct, and
+ // it doesn't happen very often. We'd get rid of the loop too.
+ if (ENTITY_REFERENCE_NODE == nexttype)
+ pos=next;
+ }
+ while (ENTITY_REFERENCE_NODE == nexttype);
+
+ // Did we run out of the tree?
+ if(next==null)
+ {
+ m_nextsib.setElementAt(NULL,0);
+ m_nodesAreProcessed = true;
+ m_pos=null;
+
+ if(JJK_DEBUG)
+ {
+ System.out.println("***** DOM2DTM Crosscheck:");
+ for(int i=0;i<m_nodes.size();++i)
+ System.out.println(i+":\t"+m_firstch.elementAt(i)+"\t"+m_nextsib.elementAt(i));
+ }
+
+ return false;
+ }
+
+ // Text needs some special handling:
+ //
+ // DTM may skip whitespace. This is handled by the suppressNode flag, which
+ // when true will keep the DTM node from being created.
+ //
+ // DTM only directly records the first DOM node of any logically-contiguous
+ // sequence. The lastTextNode value will be set to the last node in the
+ // contiguous sequence, and -- AFTER the DTM addNode -- can be used to
+ // advance next over this whole block. Should be simpler than special-casing
+ // the above loop for "Was the logically-preceeding sibling a text node".
+ //
+ // Finally, a DTM node should be considered a CDATASection only if all the
+ // contiguous text it covers is CDATASections. The first Text should
+ // force DTM to Text.
+
+ boolean suppressNode=false;
+ Node lastTextNode=null;
+
+ nexttype=next.getNodeType();
+
+ // nexttype=pos.getNodeType();
+ if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
+ {
+ // If filtering, initially assume we're going to suppress the node
+ suppressNode=((null != m_wsfilter) && getShouldStripWhitespace());
+
+ // Scan logically contiguous text (siblings, plus "flattening"
+ // of entity reference boundaries).
+ Node n=next;
+ while(n!=null)
+ {
+ lastTextNode=n;
+ // Any Text node means DTM considers it all Text
+ if(TEXT_NODE == n.getNodeType())
+ nexttype=TEXT_NODE;
+ // Any non-whitespace in this sequence blocks whitespace
+ // suppression
+ suppressNode &=
+ XMLCharacterRecognizer.isWhiteSpace(n.getNodeValue());
+
+ n=logicalNextDOMTextNode(n);
+ }
+ }
+
+ // Special handling for PIs: Some DOMs represent the XML
+ // Declaration as a PI. This is officially incorrect, per the DOM
+ // spec, but is considered a "wrong but tolerable" temporary
+ // workaround pending proper handling of these fields in DOM Level
+ // 3. We want to recognize and reject that case.
+ else if(PROCESSING_INSTRUCTION_NODE==nexttype)
+ {
+ suppressNode = (pos.getNodeName().toLowerCase().equals("xml"));
+ }
+
+
+ if(!suppressNode)
+ {
+ // Inserting next. NOTE that we force the node type; for
+ // coalesced Text, this records CDATASections adjacent to
+ // ordinary Text as Text.
+ int nextindex=addNode(next,m_last_parent,m_last_kid,
+ nexttype);
+
+ m_last_kid=nextindex;
+
+ if(ELEMENT_NODE == nexttype)
+ {
+ int attrIndex=NULL; // start with no previous sib
+ // Process attributes _now_, rather than waiting.
+ // Simpler control flow, makes NS cache available immediately.
+ NamedNodeMap attrs=next.getAttributes();
+ int attrsize=(attrs==null) ? 0 : attrs.getLength();
+ if(attrsize>0)
+ {
+ for(int i=0;i<attrsize;++i)
+ {
+ // No need to force nodetype in this case;
+ // addNode() will take care of switching it from
+ // Attr to Namespace if necessary.
+ attrIndex=addNode(attrs.item(i),
+ nextindex,attrIndex,NULL);
+ m_firstch.setElementAt(DTM.NULL,attrIndex);
+
+ // If the xml: prefix is explicitly declared
+ // we don't need to synthesize one.
+ //
+ // NOTE that XML Namespaces were not originally
+ // defined as being namespace-aware (grrr), and
+ // while the W3C is planning to fix this it's
+ // safer for now to test the QName and trust the
+ // parsers to prevent anyone from redefining the
+ // reserved xmlns: prefix
+ if(!m_processedFirstElement
+ && "xmlns:xml".equals(attrs.item(i).getNodeName()))
+ m_processedFirstElement=true;
+ }
+ // Terminate list of attrs, and make sure they aren't
+ // considered children of the element
+ } // if attrs exist
+ if(!m_processedFirstElement)
+ {
+ // The DOM might not have an explicit declaration for the
+ // implicit "xml:" prefix, but the XPath data model
+ // requires that this appear as a Namespace Node so we
+ // have to synthesize one. You can think of this as
+ // being a default attribute defined by the XML
+ // Namespaces spec rather than by the DTD.
+ attrIndex=addNode(new DOM2DTMdefaultNamespaceDeclarationNode(
+ (Element)next,"xml",NAMESPACE_DECL_NS,
+ makeNodeHandle(((attrIndex==NULL)?nextindex:attrIndex)+1)
+ ),
+ nextindex,attrIndex,NULL);
+ m_firstch.setElementAt(DTM.NULL,attrIndex);
+ m_processedFirstElement=true;
+ }
+ if(attrIndex!=NULL)
+ m_nextsib.setElementAt(DTM.NULL,attrIndex);
+ } //if(ELEMENT_NODE)
+ } // (if !suppressNode)
+
+ // Text postprocessing: Act on values stored above
+ if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
+ {
+ // %TBD% If nexttype was forced to TEXT, patch the DTM node
+
+ next=lastTextNode; // Advance the DOM cursor over contiguous text
+ }
+
+ // Remember where we left off.
+ m_pos=next;
+ return true;
+ }
+
+
+ /**
+ * Return an DOM node for the given node.
+ *
+ * @param nodeHandle The node ID.
+ *
+ * @return A node representation of the DTM node.
+ */
+ public Node getNode(int nodeHandle)
+ {
+
+ int identity = makeNodeIdentity(nodeHandle);
+
+ return (Node) m_nodes.elementAt(identity);
+ }
+
+ /**
+ * Get a Node from an identity index.
+ *
+ * NEEDSDOC @param nodeIdentity
+ *
+ * NEEDSDOC ($objectName$) @return
+ */
+ protected Node lookupNode(int nodeIdentity)
+ {
+ return (Node) m_nodes.elementAt(nodeIdentity);
+ }
+
+ /**
+ * Get the next node identity value in the list, and call the iterator
+ * if it hasn't been added yet.
+ *
+ * @param identity The node identity (index).
+ * @return identity+1, or DTM.NULL.
+ */
+ protected int getNextNodeIdentity(int identity)
+ {
+
+ identity += 1;
+
+ if (identity >= m_nodes.size())
+ {
+ if (!nextNode())
+ identity = DTM.NULL;
+ }
+
+ return identity;
+ }
+
+ /**
+ * Get the handle from a Node.
+ * <p>%OPT% This will be pretty slow.</p>
+ *
+ * <p>%OPT% An XPath-like search (walk up DOM to root, tracking path;
+ * walk down DTM reconstructing path) might be considerably faster
+ * on later nodes in large documents. That might also imply improving
+ * this call to handle nodes which would be in this DTM but
+ * have not yet been built, which might or might not be a Good Thing.</p>
+ *
+ * %REVIEW% This relies on being able to test node-identity via
+ * object-identity. DTM2DOM proxying is a great example of a case where
+ * that doesn't work. DOM Level 3 will provide the isSameNode() method
+ * to fix that, but until then this is going to be flaky.
+ *
+ * @param node A node, which may be null.
+ *
+ * @return The node handle or <code>DTM.NULL</code>.
+ */
+ private int getHandleFromNode(Node node)
+ {
+ if (null != node)
+ {
+ int len = m_nodes.size();
+ boolean isMore;
+ int i = 0;
+ do
+ {
+ for (; i < len; i++)
+ {
+ if (m_nodes.elementAt(i) == node)
+ return makeNodeHandle(i);
+ }
+
+ isMore = nextNode();
+
+ len = m_nodes.size();
+
+ }
+ while(isMore || i < len);
+ }
+
+ return DTM.NULL;
+ }
+
+ /** Get the handle from a Node. This is a more robust version of
+ * getHandleFromNode, intended to be usable by the public.
+ *
+ * <p>%OPT% This will be pretty slow.</p>
+ *
+ * %REVIEW% This relies on being able to test node-identity via
+ * object-identity. DTM2DOM proxying is a great example of a case where
+ * that doesn't work. DOM Level 3 will provide the isSameNode() method
+ * to fix that, but until then this is going to be flaky.
+ *
+ * @param node A node, which may be null.
+ *
+ * @return The node handle or <code>DTM.NULL</code>. */
+ public int getHandleOfNode(Node node)
+ {
+ if (null != node)
+ {
+ // Is Node actually within the same document? If not, don't search!
+ // This would be easier if m_root was always the Document node, but
+ // we decided to allow wrapping a DTM around a subtree.
+ if((m_root==node) ||
+ (m_root.getNodeType()==DOCUMENT_NODE &&
+ m_root==node.getOwnerDocument()) ||
+ (m_root.getNodeType()!=DOCUMENT_NODE &&
+ m_root.getOwnerDocument()==node.getOwnerDocument())
+ )
+ {
+ // If node _is_ in m_root's tree, find its handle
+ //
+ // %OPT% This check may be improved significantly when DOM
+ // Level 3 nodeKey and relative-order tests become
+ // available!
+ for(Node cursor=node;
+ cursor!=null;
+ cursor=
+ (cursor.getNodeType()!=ATTRIBUTE_NODE)
+ ? cursor.getParentNode()
+ : ((org.w3c.dom.Attr)cursor).getOwnerElement())
+ {
+ if(cursor==m_root)
+ // We know this node; find its handle.
+ return getHandleFromNode(node);
+ } // for ancestors of node
+ } // if node and m_root in same Document
+ } // if node!=null
+
+ return DTM.NULL;
+ }
+
+ /**
+ * Retrieves an attribute node by by qualified name and namespace URI.
+ *
+ * @param nodeHandle int Handle of the node upon which to look up this attribute..
+ * @param namespaceURI The namespace URI of the attribute to
+ * retrieve, or null.
+ * @param name The local name of the attribute to
+ * retrieve.
+ * @return The attribute node handle with the specified name (
+ * <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such
+ * attribute.
+ */
+ public int getAttributeNode(int nodeHandle, String namespaceURI,
+ String name)
+ {
+
+ // %OPT% This is probably slower than it needs to be.
+ if (null == namespaceURI)
+ namespaceURI = "";
+
+ int type = getNodeType(nodeHandle);
+
+ if (DTM.ELEMENT_NODE == type)
+ {
+
+ // Assume that attributes immediately follow the element.
+ int identity = makeNodeIdentity(nodeHandle);
+
+ while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
+ {
+ // Assume this can not be null.
+ type = _type(identity);
+
+ // %REVIEW%
+ // Should namespace nodes be retrievable DOM-style as attrs?
+ // If not we need a separate function... which may be desirable
+ // architecturally, but which is ugly from a code point of view.
+ // (If we REALLY insist on it, this code should become a subroutine
+ // of both -- retrieve the node, then test if the type matches
+ // what you're looking for.)
+ if (type == DTM.ATTRIBUTE_NODE || type==DTM.NAMESPACE_NODE)
+ {
+ Node node = lookupNode(identity);
+ String nodeuri = node.getNamespaceURI();
+
+ if (null == nodeuri)
+ nodeuri = "";
+
+ String nodelocalname = node.getLocalName();
+
+ if (nodeuri.equals(namespaceURI) && name.equals(nodelocalname))
+ return makeNodeHandle(identity);
+ }
+
+ else // if (DTM.NAMESPACE_NODE != type)
+ {
+ break;
+ }
+ }
+ }
+
+ return DTM.NULL;
+ }
+
+ /**
+ * Get the string-value of a node as a String object
+ * (see http://www.w3.org/TR/xpath#data-model
+ * for the definition of a node's string-value).
+ *
+ * @param nodeHandle The node ID.
+ *
+ * @return A string object that represents the string-value of the given node.
+ */
+ public XMLString getStringValue(int nodeHandle)
+ {
+
+ int type = getNodeType(nodeHandle);
+ Node node = getNode(nodeHandle);
+ // %TBD% If an element only has one text node, we should just use it
+ // directly.
+ if(DTM.ELEMENT_NODE == type || DTM.DOCUMENT_NODE == type
+ || DTM.DOCUMENT_FRAGMENT_NODE == type)
+ {
+ FastStringBuffer buf = StringBufferPool.get();
+ String s;
+
+ try
+ {
+ getNodeData(node, buf);
+
+ s = (buf.length() > 0) ? buf.toString() : "";
+ }
+ finally
+ {
+ StringBufferPool.free(buf);
+ }
+
+ return m_xstrf.newstr( s );
+ }
+ else if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
+ {
+ // If this is a DTM text node, it may be made of multiple DOM text
+ // nodes -- including navigating into Entity References. DOM2DTM
+ // records the first node in the sequence and requires that we
+ // pick up the others when we retrieve the DTM node's value.
+ //
+ // %REVIEW% DOM Level 3 is expected to add a "whole text"
+ // retrieval method which performs this function for us.
+ FastStringBuffer buf = StringBufferPool.get();
+ while(node!=null)
+ {
+ buf.append(node.getNodeValue());
+ node=logicalNextDOMTextNode(node);
+ }
+ String s=(buf.length() > 0) ? buf.toString() : "";
+ StringBufferPool.free(buf);
+ return m_xstrf.newstr( s );
+ }
+ else
+ return m_xstrf.newstr( node.getNodeValue() );
+ }
+
+ /**
+ * Determine if the string-value of a node is whitespace
+ *
+ * @param nodeHandle The node Handle.
+ *
+ * @return Return true if the given node is whitespace.
+ */
+ public boolean isWhitespace(int nodeHandle)
+ {
+ int type = getNodeType(nodeHandle);
+ Node node = getNode(nodeHandle);
+ if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
+ {
+ // If this is a DTM text node, it may be made of multiple DOM text
+ // nodes -- including navigating into Entity References. DOM2DTM
+ // records the first node in the sequence and requires that we
+ // pick up the others when we retrieve the DTM node's value.
+ //
+ // %REVIEW% DOM Level 3 is expected to add a "whole text"
+ // retrieval method which performs this function for us.
+ FastStringBuffer buf = StringBufferPool.get();
+ while(node!=null)
+ {
+ buf.append(node.getNodeValue());
+ node=logicalNextDOMTextNode(node);
+ }
+ boolean b = buf.isWhitespace(0, buf.length());
+ StringBufferPool.free(buf);
+ return b;
+ }
+ return false;
+ }
+
+ /**
+ * Retrieve the text content of a DOM subtree, appending it into a
+ * user-supplied FastStringBuffer object. Note that attributes are
+ * not considered part of the content of an element.
+ * <p>
+ * There are open questions regarding whitespace stripping.
+ * Currently we make no special effort in that regard, since the standard
+ * DOM doesn't yet provide DTD-based information to distinguish
+ * whitespace-in-element-context from genuine #PCDATA. Note that we
+ * should probably also consider xml:space if/when we address this.
+ * DOM Level 3 may solve the problem for us.
+ * <p>
+ * %REVIEW% Actually, since this method operates on the DOM side of the
+ * fence rather than the DTM side, it SHOULDN'T do
+ * any special handling. The DOM does what the DOM does; if you want
+ * DTM-level abstractions, use DTM-level methods.
+ *
+ * @param node Node whose subtree is to be walked, gathering the
+ * contents of all Text or CDATASection nodes.
+ * @param buf FastStringBuffer into which the contents of the text
+ * nodes are to be concatenated.
+ */
+ protected static void getNodeData(Node node, FastStringBuffer buf)
+ {
+
+ switch (node.getNodeType())
+ {
+ case Node.DOCUMENT_FRAGMENT_NODE :
+ case Node.DOCUMENT_NODE :
+ case Node.ELEMENT_NODE :
+ {
+ for (Node child = node.getFirstChild(); null != child;
+ child = child.getNextSibling())
+ {
+ getNodeData(child, buf);
+ }
+ }
+ break;
+ case Node.TEXT_NODE :
+ case Node.CDATA_SECTION_NODE :
+ case Node.ATTRIBUTE_NODE : // Never a child but might be our starting node
+ buf.append(node.getNodeValue());
+ break;
+ case Node.PROCESSING_INSTRUCTION_NODE :
+ // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
+ break;
+ default :
+ // ignore
+ break;
+ }
+ }
+
+ /**
+ * Given a node handle, return its DOM-style node name. This will
+ * include names such as #text or #document.
+ *
+ * @param nodeHandle the id of the node.
+ * @return String Name of this node, which may be an empty string.
+ * %REVIEW% Document when empty string is possible...
+ * %REVIEW-COMMENT% It should never be empty, should it?
+ */
+ public String getNodeName(int nodeHandle)
+ {
+
+ Node node = getNode(nodeHandle);
+
+ // Assume non-null.
+ return node.getNodeName();
+ }
+
+ /**
+ * Given a node handle, return the XPath node name. This should be
+ * the name as described by the XPath data model, NOT the DOM-style
+ * name.
+ *
+ * @param nodeHandle the id of the node.
+ * @return String Name of this node, which may be an empty string.
+ */
+ public String getNodeNameX(int nodeHandle)
+ {
+
+ String name;
+ short type = getNodeType(nodeHandle);
+
+ switch (type)
+ {
+ case DTM.NAMESPACE_NODE :
+ {
+ Node node = getNode(nodeHandle);
+
+ // assume not null.
+ name = node.getNodeName();
+ if(name.startsWith("xmlns:"))
+ {
+ name = QName.getLocalPart(name);
+ }
+ else if(name.equals("xmlns"))
+ {
+ name = "";
+ }
+ }
+ break;
+ case DTM.ATTRIBUTE_NODE :
+ case DTM.ELEMENT_NODE :
+ case DTM.ENTITY_REFERENCE_NODE :
+ case DTM.PROCESSING_INSTRUCTION_NODE :
+ {
+ Node node = getNode(nodeHandle);
+
+ // assume not null.
+ name = node.getNodeName();
+ }
+ break;
+ default :
+ name = "";
+ }
+
+ return name;
+ }
+
+ /**
+ * Given a node handle, return its XPath-style localname.
+ * (As defined in Namespaces, this is the portion of the name after any
+ * colon character).
+ *
+ * @param nodeHandle the id of the node.
+ * @return String Local name of this node.
+ */
+ public String getLocalName(int nodeHandle)
+ {
+ if(JJK_NEWCODE)
+ {
+ int id=makeNodeIdentity(nodeHandle);
+ if(NULL==id) return null;
+ Node newnode=(Node)m_nodes.elementAt(id);
+ String newname=newnode.getLocalName();
+ if (null == newname)
+ {
+ // XSLT treats PIs, and possibly other things, as having QNames.
+ String qname = newnode.getNodeName();
+ if('#'==qname.charAt(0))
+ {
+ // Match old default for this function
+ // This conversion may or may not be necessary
+ newname="";
+ }
+ else
+ {
+ int index = qname.indexOf(':');
+ newname = (index < 0) ? qname : qname.substring(index + 1);
+ }
+ }
+ return newname;
+ }
+ else
+ {
+ String name;
+ short type = getNodeType(nodeHandle);
+ switch (type)
+ {
+ case DTM.ATTRIBUTE_NODE :
+ case DTM.ELEMENT_NODE :
+ case DTM.ENTITY_REFERENCE_NODE :
+ case DTM.NAMESPACE_NODE :
+ case DTM.PROCESSING_INSTRUCTION_NODE :
+ {
+ Node node = getNode(nodeHandle);
+
+ // assume not null.
+ name = node.getLocalName();
+
+ if (null == name)
+ {
+ String qname = node.getNodeName();
+ int index = qname.indexOf(':');
+
+ name = (index < 0) ? qname : qname.substring(index + 1);
+ }
+ }
+ break;
+ default :
+ name = "";
+ }
+ return name;
+ }
+ }
+
+ /**
+ * Given a namespace handle, return the prefix that the namespace decl is
+ * mapping.
+ * Given a node handle, return the prefix used to map to the namespace.
+ *
+ * <p> %REVIEW% Are you sure you want "" for no prefix? </p>
+ * <p> %REVIEW-COMMENT% I think so... not totally sure. -sb </p>
+ *
+ * @param nodeHandle the id of the node.
+ * @return String prefix of this node's name, or "" if no explicit
+ * namespace prefix was given.
+ */
+ public String getPrefix(int nodeHandle)
+ {
+
+ String prefix;
+ short type = getNodeType(nodeHandle);
+
+ switch (type)
+ {
+ case DTM.NAMESPACE_NODE :
+ {
+ Node node = getNode(nodeHandle);
+
+ // assume not null.
+ String qname = node.getNodeName();
+ int index = qname.indexOf(':');
+
+ prefix = (index < 0) ? "" : qname.substring(index + 1);
+ }
+ break;
+ case DTM.ATTRIBUTE_NODE :
+ case DTM.ELEMENT_NODE :
+ {
+ Node node = getNode(nodeHandle);
+
+ // assume not null.
+ String qname = node.getNodeName();
+ int index = qname.indexOf(':');
+
+ prefix = (index < 0) ? "" : qname.substring(0, index);
+ }
+ break;
+ default :
+ prefix = "";
+ }
+
+ return prefix;
+ }
+
+ /**
+ * Given a node handle, return its DOM-style namespace URI
+ * (As defined in Namespaces, this is the declared URI which this node's
+ * prefix -- or default in lieu thereof -- was mapped to.)
+ *
+ * <p>%REVIEW% Null or ""? -sb</p>
+ *
+ * @param nodeHandle the id of the node.
+ * @return String URI value of this node's namespace, or null if no
+ * namespace was resolved.
+ */
+ public String getNamespaceURI(int nodeHandle)
+ {
+ if(JJK_NEWCODE)
+ {
+ int id=makeNodeIdentity(nodeHandle);
+ if(id==NULL) return null;
+ Node node=(Node)m_nodes.elementAt(id);
+ return node.getNamespaceURI();
+ }
+ else
+ {
+ String nsuri;
+ short type = getNodeType(nodeHandle);
+
+ switch (type)
+ {
+ case DTM.ATTRIBUTE_NODE :
+ case DTM.ELEMENT_NODE :
+ case DTM.ENTITY_REFERENCE_NODE :
+ case DTM.NAMESPACE_NODE :
+ case DTM.PROCESSING_INSTRUCTION_NODE :
+ {
+ Node node = getNode(nodeHandle);
+
+ // assume not null.
+ nsuri = node.getNamespaceURI();
+
+ // %TBD% Handle DOM1?
+ }
+ break;
+ default :
+ nsuri = null;
+ }
+
+ return nsuri;
+ }
+
+ }
+
+ /** Utility function: Given a DOM Text node, determine whether it is
+ * logically followed by another Text or CDATASection node. This may
+ * involve traversing into Entity References.
+ *
+ * %REVIEW% DOM Level 3 is expected to add functionality which may
+ * allow us to retire this.
+ */
+ private Node logicalNextDOMTextNode(Node n)
+ {
+ Node p=n.getNextSibling();
+ if(p==null)
+ {
+ // Walk out of any EntityReferenceNodes that ended with text
+ for(n=n.getParentNode();
+ n!=null && ENTITY_REFERENCE_NODE == n.getNodeType();
+ n=n.getParentNode())
+ {
+ p=n.getNextSibling();
+ if(p!=null)
+ break;
+ }
+ }
+ n=p;
+ while(n!=null && ENTITY_REFERENCE_NODE == n.getNodeType())
+ {
+ // Walk into any EntityReferenceNodes that start with text
+ if(n.hasChildNodes())
+ n=n.getFirstChild();
+ else
+ n=n.getNextSibling();
+ }
+ if(n!=null)
+ {
+ // Found a logical next sibling. Is it text?
+ int ntype=n.getNodeType();
+ if(TEXT_NODE != ntype && CDATA_SECTION_NODE != ntype)
+ n=null;
+ }
+ return n;
+ }
+
+ /**
+ * Given a node handle, return its node value. This is mostly
+ * as defined by the DOM, but may ignore some conveniences.
+ * <p>
+ *
+ * @param nodeHandle The node id.
+ * @return String Value of this node, or null if not
+ * meaningful for this node type.
+ */
+ public String getNodeValue(int nodeHandle)
+ {
+ // The _type(nodeHandle) call was taking the lion's share of our
+ // time, and was wrong anyway since it wasn't coverting handle to
+ // identity. Inlined it.
+ int type = _exptype(makeNodeIdentity(nodeHandle));
+ type=(NULL != type) ? getNodeType(nodeHandle) : NULL;
+
+ if(TEXT_NODE!=type && CDATA_SECTION_NODE!=type)
+ return getNode(nodeHandle).getNodeValue();
+
+ // If this is a DTM text node, it may be made of multiple DOM text
+ // nodes -- including navigating into Entity References. DOM2DTM
+ // records the first node in the sequence and requires that we
+ // pick up the others when we retrieve the DTM node's value.
+ //
+ // %REVIEW% DOM Level 3 is expected to add a "whole text"
+ // retrieval method which performs this function for us.
+ Node node = getNode(nodeHandle);
+ Node n=logicalNextDOMTextNode(node);
+ if(n==null)
+ return node.getNodeValue();
+
+ FastStringBuffer buf = StringBufferPool.get();
+ buf.append(node.getNodeValue());
+ while(n!=null)
+ {
+ buf.append(n.getNodeValue());
+ n=logicalNextDOMTextNode(n);
+ }
+ String s = (buf.length() > 0) ? buf.toString() : "";
+ StringBufferPool.free(buf);
+ return s;
+ }
+
+ /**
+ * A document type declaration information item has the following properties:
+ *
+ * 1. [system identifier] The system identifier of the external subset, if
+ * it exists. Otherwise this property has no value.
+ *
+ * @return the system identifier String object, or null if there is none.
+ */
+ public String getDocumentTypeDeclarationSystemIdentifier()
+ {
+
+ Document doc;
+
+ if (m_root.getNodeType() == Node.DOCUMENT_NODE)
+ doc = (Document) m_root;
+ else
+ doc = m_root.getOwnerDocument();
+
+ if (null != doc)
+ {
+ DocumentType dtd = doc.getDoctype();
+
+ if (null != dtd)
+ {
+ return dtd.getSystemId();
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Return the public identifier of the external subset,
+ * normalized as described in 4.2.2 External Entities [XML]. If there is
+ * no external subset or if it has no public identifier, this property
+ * has no value.
+ *
+ * @return the public identifier String object, or null if there is none.
+ */
+ public String getDocumentTypeDeclarationPublicIdentifier()
+ {
+
+ Document doc;
+
+ if (m_root.getNodeType() == Node.DOCUMENT_NODE)
+ doc = (Document) m_root;
+ else
+ doc = m_root.getOwnerDocument();
+
+ if (null != doc)
+ {
+ DocumentType dtd = doc.getDoctype();
+
+ if (null != dtd)
+ {
+ return dtd.getPublicId();
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Returns the <code>Element</code> whose <code>ID</code> is given by
+ * <code>elementId</code>. If no such element exists, returns
+ * <code>DTM.NULL</code>. Behavior is not defined if more than one element
+ * has this <code>ID</code>. Attributes (including those
+ * with the name "ID") are not of type ID unless so defined by DTD/Schema
+ * information available to the DTM implementation.
+ * Implementations that do not know whether attributes are of type ID or
+ * not are expected to return <code>DTM.NULL</code>.
+ *
+ * <p>%REVIEW% Presumably IDs are still scoped to a single document,
+ * and this operation searches only within a single document, right?
+ * Wouldn't want collisions between DTMs in the same process.</p>
+ *
+ * @param elementId The unique <code>id</code> value for an element.
+ * @return The handle of the matching element.
+ */
+ public int getElementById(String elementId)
+ {
+
+ Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
+ ? (Document) m_root : m_root.getOwnerDocument();
+
+ if(null != doc)
+ {
+ Node elem = doc.getElementById(elementId);
+ if(null != elem)
+ {
+ int elemHandle = getHandleFromNode(elem);
+
+ if(DTM.NULL == elemHandle)
+ {
+ int identity = m_nodes.size()-1;
+ while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
+ {
+ Node node = getNode(identity);
+ if(node == elem)
+ {
+ elemHandle = getHandleFromNode(elem);
+ break;
+ }
+ }
+ }
+
+ return elemHandle;
+ }
+
+ }
+ return DTM.NULL;
+ }
+
+ /**
+ * The getUnparsedEntityURI function returns the URI of the unparsed
+ * entity with the specified name in the same document as the context
+ * node (see [3.3 Unparsed Entities]). It returns the empty string if
+ * there is no such entity.
+ * <p>
+ * XML processors may choose to use the System Identifier (if one
+ * is provided) to resolve the entity, rather than the URI in the
+ * Public Identifier. The details are dependent on the processor, and
+ * we would have to support some form of plug-in resolver to handle
+ * this properly. Currently, we simply return the System Identifier if
+ * present, and hope that it a usable URI or that our caller can
+ * map it to one.
+ * TODO: Resolve Public Identifiers... or consider changing function name.
+ * <p>
+ * If we find a relative URI
+ * reference, XML expects it to be resolved in terms of the base URI
+ * of the document. The DOM doesn't do that for us, and it isn't
+ * entirely clear whether that should be done here; currently that's
+ * pushed up to a higher level of our application. (Note that DOM Level
+ * 1 didn't store the document's base URI.)
+ * TODO: Consider resolving Relative URIs.
+ * <p>
+ * (The DOM's statement that "An XML processor may choose to
+ * completely expand entities before the structure model is passed
+ * to the DOM" refers only to parsed entities, not unparsed, and hence
+ * doesn't affect this function.)
+ *
+ * @param name A string containing the Entity Name of the unparsed
+ * entity.
+ *
+ * @return String containing the URI of the Unparsed Entity, or an
+ * empty string if no such entity exists.
+ */
+ public String getUnparsedEntityURI(String name)
+ {
+
+ String url = "";
+ Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
+ ? (Document) m_root : m_root.getOwnerDocument();
+
+ if (null != doc)
+ {
+ DocumentType doctype = doc.getDoctype();
+
+ if (null != doctype)
+ {
+ NamedNodeMap entities = doctype.getEntities();
+ if(null == entities)
+ return url;
+ Entity entity = (Entity) entities.getNamedItem(name);
+ if(null == entity)
+ return url;
+
+ String notationName = entity.getNotationName();
+
+ if (null != notationName) // then it's unparsed
+ {
+ // The draft says: "The XSLT processor may use the public
+ // identifier to generate a URI for the entity instead of the URI
+ // specified in the system identifier. If the XSLT processor does
+ // not use the public identifier to generate the URI, it must use
+ // the system identifier; if the system identifier is a relative
+ // URI, it must be resolved into an absolute URI using the URI of
+ // the resource containing the entity declaration as the base
+ // URI [RFC2396]."
+ // So I'm falling a bit short here.
+ url = entity.getSystemId();
+
+ if (null == url)
+ {
+ url = entity.getPublicId();
+ }
+ else
+ {
+ // This should be resolved to an absolute URL, but that's hard
+ // to do from here.
+ }
+ }
+ }
+ }
+
+ return url;
+ }
+
+ /**
+ * 5. [specified] A flag indicating whether this attribute was actually
+ * specified in the start-tag of its element, or was defaulted from the
+ * DTD.
+ *
+ * @param attributeHandle the attribute handle
+ * @return <code>true</code> if the attribute was specified;
+ * <code>false</code> if it was defaulted.
+ */
+ public boolean isAttributeSpecified(int attributeHandle)
+ {
+ int type = getNodeType(attributeHandle);
+
+ if (DTM.ATTRIBUTE_NODE == type)
+ {
+ Attr attr = (Attr)getNode(attributeHandle);
+ return attr.getSpecified();
+ }
+ return false;
+ }
+
+ /** Bind an IncrementalSAXSource to this DTM. NOT RELEVANT for DOM2DTM, since
+ * we're wrapped around an existing DOM.
+ *
+ * @param source The IncrementalSAXSource that we want to recieve events from
+ * on demand.
+ */
+ public void setIncrementalSAXSource(IncrementalSAXSource source)
+ {
+ }
+
+ /** getContentHandler returns "our SAX builder" -- the thing that
+ * someone else should send SAX events to in order to extend this
+ * DTM model.
+ *
+ * @return null if this model doesn't respond to SAX events,
+ * "this" if the DTM object has a built-in SAX ContentHandler,
+ * the IncrmentalSAXSource if we're bound to one and should receive
+ * the SAX stream via it for incremental build purposes...
+ * */
+ public org.xml.sax.ContentHandler getContentHandler()
+ {
+ return null;
+ }
+
+ /**
+ * Return this DTM's lexical handler.
+ *
+ * %REVIEW% Should this return null if constrution already done/begun?
+ *
+ * @return null if this model doesn't respond to lexical SAX events,
+ * "this" if the DTM object has a built-in SAX ContentHandler,
+ * the IncrementalSAXSource if we're bound to one and should receive
+ * the SAX stream via it for incremental build purposes...
+ */
+ public org.xml.sax.ext.LexicalHandler getLexicalHandler()
+ {
+
+ return null;
+ }
+
+
+ /**
+ * Return this DTM's EntityResolver.
+ *
+ * @return null if this model doesn't respond to SAX entity ref events.
+ */
+ public org.xml.sax.EntityResolver getEntityResolver()
+ {
+
+ return null;
+ }
+
+ /**
+ * Return this DTM's DTDHandler.
+ *
+ * @return null if this model doesn't respond to SAX dtd events.
+ */
+ public org.xml.sax.DTDHandler getDTDHandler()
+ {
+
+ return null;
+ }
+
+ /**
+ * Return this DTM's ErrorHandler.
+ *
+ * @return null if this model doesn't respond to SAX error events.
+ */
+ public org.xml.sax.ErrorHandler getErrorHandler()
+ {
+
+ return null;
+ }
+
+ /**
+ * Return this DTM's DeclHandler.
+ *
+ * @return null if this model doesn't respond to SAX Decl events.
+ */
+ public org.xml.sax.ext.DeclHandler getDeclHandler()
+ {
+
+ return null;
+ }
+
+ /** @return true iff we're building this model incrementally (eg
+ * we're partnered with a IncrementalSAXSource) and thus require that the
+ * transformation and the parse run simultaneously. Guidance to the
+ * DTMManager.
+ * */
+ public boolean needsTwoThreads()
+ {
+ return false;
+ }
+
+ // ========== Direct SAX Dispatch, for optimization purposes ========
+
+ /**
+ * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
+ * of whitespace. Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
+ * the definition of <CODE>S</CODE></A> for details.
+ * @param ch Character to check as XML whitespace.
+ * @return =true if <var>ch</var> is XML whitespace; otherwise =false.
+ */
+ private static boolean isSpace(char ch)
+ {
+ return XMLCharacterRecognizer.isWhiteSpace(ch); // Take the easy way out for now.
+ }
+
+ /**
+ * Directly call the
+ * characters method on the passed ContentHandler for the
+ * string-value of the given node (see http://www.w3.org/TR/xpath#data-model
+ * for the definition of a node's string-value). Multiple calls to the
+ * ContentHandler's characters methods may well occur for a single call to
+ * this method.
+ *
+ * @param nodeHandle The node ID.
+ * @param ch A non-null reference to a ContentHandler.
+ *
+ * @throws org.xml.sax.SAXException
+ */
+ public void dispatchCharactersEvents(
+ int nodeHandle, org.xml.sax.ContentHandler ch,
+ boolean normalize)
+ throws org.xml.sax.SAXException
+ {
+ if(normalize)
+ {
+ XMLString str = getStringValue(nodeHandle);
+ str = str.fixWhiteSpace(true, true, false);
+ str.dispatchCharactersEvents(ch);
+ }
+ else
+ {
+ int type = getNodeType(nodeHandle);
+ Node node = getNode(nodeHandle);
+ dispatchNodeData(node, ch, 0);
+ // Text coalition -- a DTM text node may represent multiple
+ // DOM nodes.
+ if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
+ {
+ while( null != (node=logicalNextDOMTextNode(node)) )
+ {
+ dispatchNodeData(node, ch, 0);
+ }
+ }
+ }
+ }
+
+ /**
+ * Retrieve the text content of a DOM subtree, appending it into a
+ * user-supplied FastStringBuffer object. Note that attributes are
+ * not considered part of the content of an element.
+ * <p>
+ * There are open questions regarding whitespace stripping.
+ * Currently we make no special effort in that regard, since the standard
+ * DOM doesn't yet provide DTD-based information to distinguish
+ * whitespace-in-element-context from genuine #PCDATA. Note that we
+ * should probably also consider xml:space if/when we address this.
+ * DOM Level 3 may solve the problem for us.
+ * <p>
+ * %REVIEW% Note that as a DOM-level operation, it can be argued that this
+ * routine _shouldn't_ perform any processing beyond what the DOM already
+ * does, and that whitespace stripping and so on belong at the DTM level.
+ * If you want a stripped DOM view, wrap DTM2DOM around DOM2DTM.
+ *
+ * @param node Node whose subtree is to be walked, gathering the
+ * contents of all Text or CDATASection nodes.
+ */
+ protected static void dispatchNodeData(Node node,
+ org.xml.sax.ContentHandler ch,
+ int depth)
+ throws org.xml.sax.SAXException
+ {
+
+ switch (node.getNodeType())
+ {
+ case Node.DOCUMENT_FRAGMENT_NODE :
+ case Node.DOCUMENT_NODE :
+ case Node.ELEMENT_NODE :
+ {
+ for (Node child = node.getFirstChild(); null != child;
+ child = child.getNextSibling())
+ {
+ dispatchNodeData(child, ch, depth+1);
+ }
+ }
+ break;
+ case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW%
+ case Node.COMMENT_NODE :
+ if(0 != depth)
+ break;
+ // NOTE: Because this operation works in the DOM space, it does _not_ attempt
+ // to perform Text Coalition. That should only be done in DTM space.
+ case Node.TEXT_NODE :
+ case Node.CDATA_SECTION_NODE :
+ case Node.ATTRIBUTE_NODE :
+ String str = node.getNodeValue();
+ if(ch instanceof CharacterNodeHandler)
+ {
+ ((CharacterNodeHandler)ch).characters(node);
+ }
+ else
+ {
+ ch.characters(str.toCharArray(), 0, str.length());
+ }
+ break;
+// /* case Node.PROCESSING_INSTRUCTION_NODE :
+// // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
+// break; */
+ default :
+ // ignore
+ break;
+ }
+ }
+
+ TreeWalker m_walker = new TreeWalker(null);
+
+ /**
+ * Directly create SAX parser events from a subtree.
+ *
+ * @param nodeHandle The node ID.
+ * @param ch A non-null reference to a ContentHandler.
+ *
+ * @throws org.xml.sax.SAXException
+ */
+ public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch)
+ throws org.xml.sax.SAXException
+ {
+ TreeWalker treeWalker = m_walker;
+ ContentHandler prevCH = treeWalker.getContentHandler();
+
+ if(null != prevCH)
+ {
+ treeWalker = new TreeWalker(null);
+ }
+ treeWalker.setContentHandler(ch);
+
+ try
+ {
+ Node node = getNode(nodeHandle);
+ treeWalker.traverse(node);
+ }
+ finally
+ {
+ treeWalker.setContentHandler(null);
+ }
+ }
+
+ public interface CharacterNodeHandler
+ {
+ public void characters(Node node)
+ throws org.xml.sax.SAXException;
+ }
+
+ /**
+ * For the moment all the run time properties are ignored by this
+ * class.
+ *
+ * @param property a <code>String</code> value
+ * @param value an <code>Object</code> value
+ */
+ public void setProperty(String property, Object value)
+ {
+ }
+
+ /**
+ * No source information is available for DOM2DTM, so return
+ * <code>null</code> here.
+ *
+ * @param node an <code>int</code> value
+ * @return null
+ */
+ public SourceLocator getSourceLocatorFor(int node)
+ {
+ return null;
+ }
+
+}