from biothings.utils.dataload import alwayslist
[docs]
class BaseMapper(object):
    """
    Basic mapper used to convert documents.
    if mapper's name matches source's metadata's mapper,
    mapper.convert(docs) call will be used to
    process/convert/whatever passed documents
    """
    def __init__(self, name=None, *args, **kwargs):
        self.name = name
[docs]
    def load(self):
        """
        Do whatever is required to fill mapper with mapping data
        Can be called multiple time, the first time only will load data
        """
        raise NotImplementedError("sub-class and implement me") 
[docs]
    def process(self, docs):
        """
        Convert given docs into other docs.
        """
        raise NotImplementedError("sub-class and implement me") 
 
[docs]
class IDBaseMapper(BaseMapper):
    """
    Provide mapping between different sources
    """
    def __init__(self, name=None, convert_func=None, *args, **kwargs):
        """
        'name' may match a "mapper" metatdata field (see uploaders). If None, mapper
        will be applied to any document from a resource without "mapper" argument
        """
        super(IDBaseMapper, self).__init__(name=name)
        self.map = None
        self.convert_func = convert_func
[docs]
    def translate(self, _id, transparent=False):
        """
        Return _id translated through mapper, or _id itself if not part of mapper
        If 'transparent' and no match, original _id will be returned
        """
        if self.need_load():
            self.load()
        default = transparent and _id or None
        conv = self.convert_func or (lambda x: x)
        return self.map.get(conv(_id), default) 
    def __contains__(self, _id):
        if self.need_load():
            self.load()
        return _id in self.map
    def __len__(self):
        if self.need_load():
            self.load()
        return len(self.map)
[docs]
    def process(self, docs, key_to_convert="_id", transparent=True):
        """
        Process 'key_to_convert' document key using mapping.
        If transparent and no match, original key will be used
        (so there's no change). Else, if no match, document will
        be discarded (default).
        Warning: key to be translated must not be None (it's considered
        a non-match)
        """
        for doc in docs:
            _id = doc.get(key_to_convert)
            _newid = self.translate(_id, transparent)
            if _newid is None and not transparent:
                continue
            for _oneid in alwayslist(_newid):
                _oneid = str(_oneid)
                doc[key_to_convert] = _oneid
                yield doc 
[docs]
    def need_load(self):
        return self.map is None 
 
[docs]
class TransparentMapper(BaseMapper):
[docs]
    def load(self, *args, **kwargs):
        pass 
[docs]
    def process(self, docs, *args, **kwargs):
        return docs