2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018

11/23/2013: Reading Accumulo Metadata Table to Learn How Many Entries Are In Each Tablet.

After compacting the table, you can run the following program to learn how many entries are in each table. Accumulo does a nice job of splitting tables by byte size but if you have small records then it's fairly easy to run the "Curse of the Last Reducer!" I've run into situations where some tablets have 50K and other with 50M. package com.affy;

import java.io.IOException;
import java.io.InputStream;
import java.util.Map.Entry;
import java.util.Properties;
import org.apache.accumulo.core.Constants;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.Instance;
import org.apache.accumulo.core.client.IsolatedScanner;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.client.ZooKeeperInstance;
import org.apache.accumulo.core.client.impl.Tables;
import org.apache.accumulo.core.client.security.tokens.PasswordToken;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.KeyExtent;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.user.RegExFilter;
import org.apache.accumulo.core.util.ByteBufferUtil;
import org.apache.hadoop.io.Text;

public class GetEntryCountForTable {

    public static void main(String[] args) throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException {

        String accumuloTable = "tableA";

        Properties prop = new Properties();
        ClassLoader loader = Thread.currentThread().getContextClassLoader();
        InputStream in = loader.getResourceAsStream("accumulo.properties");
        prop.load(in);

        String user = prop.getProperty("accumulo.user");
        String password = prop.getProperty("accumulo.password");
        String instanceInfo = prop.getProperty("accumulo.instance");
        String zookeepers = prop.getProperty("accumulo.zookeepers");

        Instance instance = new ZooKeeperInstance(instanceInfo, zookeepers);

        Connector connector = instance.getConnector(user, new PasswordToken(password));

        String tableId = Tables.getNameToIdMap(instance).get(accumuloTable);

        Scanner scanner = new IsolatedScanner(connector.createScanner(Constants.METADATA_TABLE_NAME, Constants.NO_AUTHS));
        scanner.fetchColumnFamily(Constants.METADATA_DATAFILE_COLUMN_FAMILY);
        scanner.setRange(new KeyExtent(new Text(tableId), null, null).toMetadataRange());

        int fileSize = 0;
        int numEntries = 0;
        int numSplits = 1;
        for (Entry entry : scanner) {
            String value = entry.getValue().toString();
            String[] components = value.split(",");
            fileSize += Integer.parseInt(components[0]);
            numEntries += Integer.parseInt(components[1]);
            numSplits++;
        }

        int average = numEntries / numSplits;

        System.out.println(String.format("fileSize: %,d", fileSize));
        System.out.println(String.format("numEntries: %,d", numEntries));
        System.out.println(String.format("average: %,d", average));

    }
}




subscribe via RSS