I’m working on a program to validate XML files. An intermediary step was determining all of the potential XPATHs that the system could run into. So I co-opted the org.exolab.castor.xml.schema.reader package in order to read the XSD file into Java objects. I was happy to see that Castor’s schema reader worked flawlessly - although the documentation could be improved. The documentation issue is one reason that I’m posting this code.

package com.affy;

import java.util.*;
import org.exolab.castor.xml.schema.*;
import org.exolab.castor.xml.schema.reader.*;
import org.exolab.castor.xml.schema.simpletypes.*;
import org.xml.sax.*;

/**
 * This class reads an XSD file to extract XPATHs. Most of the code is simple. However, a Stack is used to track which types of elements are visited
 * so that the program won't infinitely recurse when a container can contain itself. For example, <foo><bar><foo/></bar></foo>
 */
public class XpathFromSchema {

	private static int numXpaths = 0;
	private static Stack visitedTypes = new Stack();

	/** provide a simple method to start the dump. */
	private static void dump(final ElementDecl elementDecl) {
		dump("", elementDecl);
	}

	/** there may be some situations where the starting xpath is actually
	 * a prefix because the starting node can be different from the root node.
	 * In either case, allowing an xpath as a parameter provides flexibility
	 * for unforeseen needs.
	 */
	private static void dump(final String xpath, final ElementDecl elementDecl) {
		if (elementDecl == null) {
			return;
		}
		List forcedXpaths = new ArrayList();
		XMLType typeReference = elementDecl.getType();

		if (typeReference.getName() != null && visitedTypes.contains(typeReference.getName())) {
			// The type is already in the stack, therefore if we were to continue we would infinitely recurse.
		} else {
			if (typeReference.getName() != null) {
				visitedTypes.push(typeReference.getName());
			}

			String newXpath = xpath + "/" + elementDecl.getName();

			System.out.println(numXpaths + ": " + newXpath);
			numXpaths++;

			if (typeReference.isComplexType()) {
				ComplexType ct = (ComplexType)typeReference;
				Enumeration attributes = ct.getAttributeDecls();
				while (attributes.hasMoreElements()) {
					AttributeDecl attributeDecl = (AttributeDecl)attributes.nextElement();
					System.out.println(numXpaths + ": " + newXpath + "/@" + attributeDecl.getName());
					numXpaths++;
				}
				Enumeration particles = ct.enumerate();
				while (particles.hasMoreElements()) {
					Object o = particles.nextElement();
					if (o instanceof Group) {
						dumpGroup(newXpath, (Group)o);
					} else {
						System.out.println(" [dump] ***** Unknown particle type: " + o.getClass().getName());
					}
				}
			}
		}

		if (typeReference.getName() != null && !visitedTypes.empty()) {
			visitedTypes.pop();
		}
	}

	/** I have no idea what a group is, but a little experimentation
	 * showed the follow method to work.
	 */
	public static void dumpGroup(String xpath, final Group group) {
		Enumeration particles = group.enumerate();
		while (particles.hasMoreElements()) {
			Object o = particles.nextElement();
			if (o instanceof Group) {
				dumpGroup(xpath, (Group)o);
			} else if (o instanceof ElementDecl) {
				dump(xpath, (ElementDecl)o);
			} else {
				System.out.println("[dumpGroup] ***** Unknown particle type: " + o.getClass().getName());
			}
		}
	}

	public static void main(String[] args) {
		String xsdFile = "file:///[PUT_YOUR_XSD_FILESPEC_HERE.";

		try {
			SchemaReader a = new SchemaReader(new InputSource(xsdFile));
			Schema s = a.read();

			// since this is a demonstration program, I select the topmost element to
			// start with.
			ElementDecl elementDecl = s.getElementDecl("[NAME_OF_ELEMENT]");
			dump(elementDecl);

		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			System.out.println("Done.");
		}
	}
}