2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2020

03/31/2013: Example using Accumulo's RegExFilter class

Example using Accumulo's RegExFilter class
package com.affy;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.conf.AccumuloConfiguration;
import org.apache.accumulo.core.data.ByteSequence;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.IteratorEnvironment;
import org.apache.accumulo.core.iterators.IteratorUtil;
import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
import org.apache.accumulo.core.iterators.SortedMapIterator;
import org.apache.accumulo.core.iterators.system.MapFileIterator;
import org.apache.accumulo.core.iterators.user.RegExFilter;
import org.apache.accumulo.core.util.CachedConfiguration;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.log4j.Logger;

public class AccumuloRegExIteratorPlayground {

    private final Logger log = Logger.getLogger(AccumuloRegExIteratorPlayground.class);
    private static final Collection<ByteSequence> EMPTY_COL_FAMS = new ArrayList<ByteSequence>();

    public void process() throws IOException {
        final String regularExpression = "/.*";

        final SortedMap<Key, Value> input = new TreeMap<Key, Value>();
        input.put(new Key("1111", "2222", "3333", 0), new Value("4444".getBytes()));
        input.put(new Key("/1111", "2222", "3333", 0), new Value("4444".getBytes()));

        final RegExFilter rei = new RegExFilter();
        IteratorSetting is = new IteratorSetting(1, RegExFilter.class);
        RegExFilter.setRegexs(is, regularExpression, null, null, null, false);

        if (!rei.validateOptions(is.getOptions())) {
            throw new RuntimeException("invalid options.");
        }

        rei.init(new SortedMapIterator(input), is.getOptions(), new IteratorEnvironment() {
            @Override
            public SortedKeyValueIterator<Key, Value> reserveMapFileReader(String mapFileName) throws IOException {
                Configuration conf = CachedConfiguration.getInstance();
                FileSystem fs = FileSystem.get(conf);
                return new MapFileIterator(AccumuloConfiguration.getDefaultConfiguration(), fs, mapFileName, conf);
            }

            @Override
            public AccumuloConfiguration getConfig() {
                return AccumuloConfiguration.getDefaultConfiguration();
            }

            @Override
            public IteratorUtil.IteratorScope getIteratorScope() {
                throw new UnsupportedOperationException("Not supported yet.");
            }

            @Override
            public boolean isFullMajorCompaction() {
                throw new UnsupportedOperationException("Not supported yet.");
            }

            @Override
            public void registerSideChannel(SortedKeyValueIterator<Key, Value> iter) {
                throw new UnsupportedOperationException("Not supported yet.");
            }
        });
        rei.seek(new Range(), EMPTY_COL_FAMS, false);

        while (rei.hasTop()) {
            final Key key = rei.getTopKey();
            final Value value = rei.getTopValue();
            log.info(key + " --> " + value);
            rei.next();
        }
    }

    public static void main(final String[] args) throws IOException {
        AccumuloRegExIteratorPlayground driver = new AccumuloRegExIteratorPlayground();
        driver.process();
    }
}

03/30/2013: Testing Your GrepIterator Without Running Accumulo

Testing Your GrepIterator Without Running Accumulo
package com.affy;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.client.impl.MasterClient;
import org.apache.accumulo.core.data.ByteSequence;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.SortedMapIterator;
import org.apache.accumulo.core.iterators.user.GrepIterator;
import org.apache.log4j.Logger;

public class AccumuloGrepIteratorPlayground {

    private static final Logger log = Logger.getLogger(AccumuloGrepIteratorPlayground.class);
    private static final Collection EMPTY_COL_FAMS = new ArrayList();

    public static void main(final String[] args) throws IOException {
        final String term = "/1";

        final SortedMap<Key, Value> input = new TreeMap<Key, Value>();
        input.put(new Key("1111", "2222", "3333", 0), new Value("4444".getBytes()));
        input.put(new Key("/1111", "2222", "3333", 0), new Value("4444".getBytes()));

        final GrepIterator grepIterator = new GrepIterator();
        final IteratorSetting iteratorSetting = new IteratorSetting(1, GrepIterator.class);
        GrepIterator.setTerm(iteratorSetting, term);
        grepIterator.init(new SortedMapIterator(input), iteratorSetting.getOptions(), null);
        grepIterator.seek(new Range(), EMPTY_COL_FAMS, false);

        while (grepIterator.hasTop()) {
            final Key key = grepIterator.getTopKey();
            final Value value = grepIterator.getTopValue();
            log.info(key + " --> " + value);
            grepIterator.next();
        }

    }
}

The pom.xml looks like this:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.mycompany</groupId>
    <artifactId>AccumuloGrepIterator</artifactId>
    <version>1.0-SNAPSHOT</version>
    <packaging>jar</packaging>

    <name>AccumuloGrepIterator</name>
    <url>http://maven.apache.org</url>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.accumulo</groupId>
            <artifactId>accumulo-core</artifactId>
            <version>1.4.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>0.23.6</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>3.8.1</version>
            <scope>test</scope>
        </dependency>
    </dependencies>
</project>