Source code for galaxy_dive.data_management.trove_management

#!/usr/bin/env python
'''Code for managing data troves.

@author: Zach Hafen
@contact: zachary.h.hafen@gmail.com
@status: Development
'''

import itertools
import os

import galaxy_dive.utils.utilities as utilities

########################################################################
########################################################################


class TroveManager( object ):
    '''Class for managing troves of data.'''

    @utilities.store_parameters
    def __init__( self, file_format, *args ):
        '''Constructor.

        Args:
            file_format (str) :
                Format for data files.

            *args :
                Arguments to pass to self.get_file() to get different data files.

        Returns:
            TroveManager object.
        '''

        pass

    ########################################################################

    def get_file( self, *args ):
        '''Default method for getting the data filename.
        
        Args:
            *args :
                Arguments provided. Assumes args[0] is the data dir.

        Returns:
            Filename for a given combination of args.
        '''

        filename = self.file_format.format( *args[1:] )

        return os.path.join( args[0], filename )

    ########################################################################

    @property
    def combinations( self ):
        '''Returns:
            All combinations of arguments.
        '''

        if not hasattr( self, '_combinations' ):
            self._combinations = list( itertools.product( *self.args ) )

        return self._combinations

    ########################################################################

    @property
    def data_files( self ):
        '''Returns:
            All data files that should be part of the trove.
        '''

        if not hasattr( self, '_data_files' ):
            self._data_files = [
                self.get_file( *args ) for args in self.combinations
             ]

        return self._data_files

    ########################################################################

    def get_incomplete_combinations( self ):
        '''Returns:
            Combinations in the trove that have not yet been done.
        '''

        incomplete_combinations = []
        for i, data_file in enumerate( self.data_files ):

            if not os.path.isfile( data_file ):
                incomplete_combinations.append( self.combinations[i] )

        return incomplete_combinations

    ########################################################################

    def get_incomplete_data_files( self ):
        '''Returns:
            Data files in the trove that have not yet been done.
        '''

        return [
            self.get_file( *args ) for args \
                in self.get_incomplete_combinations()
        ]

    ########################################################################

    def get_next_args_to_use( self, when_done='return_last' ):
        '''Is this necessary? No. This function is really a wrapper that in
        essence provides documentation.


        Args:
            when_done (str) :
                What to do when there are no incomplete combinations? Defaults
                to returning the last of self.combinations.

        Returns:
            Next set of arguments to use.
        '''

        incomplete_combinations = self.get_incomplete_combinations()

        if len( incomplete_combinations ) == 0:
            if when_done == 'return_last':
                return self.combinations[-1]
            elif when_done == 'return_0':
                return 0

        return self.get_incomplete_combinations()[0]