Source: organism.js

/* @module Organism */

var _ = require('lodash');
var highland = require('highland');
var httpErrors = require('./http-errors.js');
var hyperquest = require('hyperquest');
var csv = require('csv-streamify');
var csvOptions = {objectMode: true, delimiter: '\t'};
var Utils = require('./utils.js');

/**
 * Used internally to create a new Organism instance
 * @class
 * @protected
 * @memberof BridgeDb
 * @param {Object} instance
 */
var Organism = function(instance) {
  'use strict';

  var jsonldRx = instance.jsonldRx;
  var normalizeText = jsonldRx.normalizeText;

  /**
   * See {@link http://identifiers.org/snomedct/410607006|snomedct:Organism}
   * @typedef {Object} Organism Organism with as many as possible of the properties listed below.
   * @property {JsonldContext} @context JSON-LD context.
   * @property {Iri} id Preferred IRI for identifying an organism,
   *    using {@link http://identifiers.org/taxonomy/|Taxonomy ontology}
   * @property {Object} nameLanguageMap
   * @property {String} nameLanguageMap.en English name, when available.
   * @property {String} nameLanguageMap.la Full Latin name.
   */

  var latinNameToIriMappings = {
    'Anopheles gambiae': 'http://identifiers.org/taxonomy/7165',
    'Arabidopsis thaliana': 'http://identifiers.org/taxonomy/3702',
    'Aspergillus niger': 'http://identifiers.org/taxonomy/5061',
    'Bacillus subtilis': 'http://identifiers.org/taxonomy/1423',
    'Bos taurus': 'http://identifiers.org/taxonomy/9913',
    'Caenorhabditis elegans': 'http://identifiers.org/taxonomy/6239',
    'Canis familiaris': 'http://identifiers.org/taxonomy/9615',
    'Ciona intestinalis': 'http://identifiers.org/taxonomy/7719',
    'Danio rerio': 'http://identifiers.org/taxonomy/7955',
    'Drosophila melanogaster': 'http://identifiers.org/taxonomy/7227',
    'Escherichia coli': 'http://identifiers.org/taxonomy/562',
    'Equus caballus': 'http://identifiers.org/taxonomy/9796',
    'Gallus gallus': 'http://identifiers.org/taxonomy/9031',
    'Gibberella zeae': 'http://identifiers.org/taxonomy/5518',
    'Glycine max': 'http://identifiers.org/taxonomy/3847',
    'Homo sapiens': 'http://identifiers.org/taxonomy/9606',
    'Hordeum vulgare': 'http://identifiers.org/taxonomy/4513',
    'Macaca mulatta': 'http://identifiers.org/taxonomy/9544',
    'Mus musculus': 'http://identifiers.org/taxonomy/10090',
    'Mycobacterium tuberculosis': 'http://identifiers.org/taxonomy/1773',
    'Ornithorhynchus anatinus': 'http://identifiers.org/taxonomy/9258',
    'Oryza indica': 'http://identifiers.org/taxonomy/39946',
    'Oryza sativa': 'http://identifiers.org/taxonomy/4530',
    'Oryza sativa Indica Group': 'http://identifiers.org/taxonomy/39946',
    'Populus trichocarpa': 'http://identifiers.org/taxonomy/3694',
    'Pan troglodytes': 'http://identifiers.org/taxonomy/9598',
    'Rattus norvegicus': 'http://identifiers.org/taxonomy/10116',
    'Saccharomyces cerevisiae': 'http://identifiers.org/taxonomy/4932',
    'Solanum lycopersicum': 'http://identifiers.org/taxonomy/4081',
    'Sus scrofa': 'http://identifiers.org/taxonomy/9823',
    'Vitis vinifera': 'http://identifiers.org/taxonomy/29760',
    'Xenopus tropicalis': 'http://identifiers.org/taxonomy/8364',
    'Zea mays': 'http://identifiers.org/taxonomy/4577'
  };

  /**
   * @private
   *
   * Convert organismIdentifier to Latin name.
   *
   * @param {String} organismIdentifier - Can be name in Latin (full like "Escherichia coli"
   *      or abbreviated like "E. coli") or English. In the future, we might include IRIs
   *      for organisms.
   * @return {Stream<String>} organismLatinName Full name in Latin
   * @return {String}
   */
  function _convertToLatinName(organismIdentifier) {
    return _normalize(organismIdentifier).map(function(organism) {
      // returns either the organism name or false
      return !!organism.nameLanguageMap && !!organism.nameLanguageMap.la &&
          organism.nameLanguageMap.la;
    });
  }

  /**
   * Create a Node.js/Highland stream through which entity references
   * can be piped to return their associated organism.
   *
   * @return {Stream} entityReferenceToOrganismTransformationStream
   */
  var createEntityReferenceToOrganismTransformationStream = function() {
    return highland.pipeline(function(sourceStream) {
      return highland(sourceStream).flatMap(_getByEntityReference);
    });
  };

  /**
   * Get one organism.
   *
   * @param {Object|String} searchCriteria
   * @param {String|String[]} [searchCriteria.type='Organism']
   * @return {Stream<Organism>} organismStream
   */
  function get(searchCriteria) {
    if (_.isEmpty(searchCriteria)) {
      throw new Error('No searchCriteria specified for organism.get');
    }

    return query(searchCriteria).head();
  }

  /**
   * @private
   *
   * Get all organisms currently supported by BridgeDb.
   *
   * @return {Stream<Organism>} organism
   */
  var _getAll = function() {
    var path = 'contents';
    var source = instance.config.baseIri + path;

    return highland(hyperquest(source, {
      withCredentials: false
    })
    .pipe(csv(csvOptions)))
    .map(function(array) {
      var nameLanguageMap = {};
      var englishName = array[0];
      var latinName = array[1];

      // Note: I intentionally used 'null' as a string, not a native value,
      // because BridgeDb returns the string value
      if (englishName !== 'null') {
        nameLanguageMap.en = englishName;
      }
      if (latinName !== 'null') {
        nameLanguageMap.la = latinName;
      }

      var normalizedOrganism = {
        'id': latinNameToIriMappings[latinName],
        'type': 'Organism',
        nameLanguageMap: nameLanguageMap
      };

      return normalizedOrganism;
    });
  };

  /**
   * @private
   *
   * If the organism is not specified but the BridgeDb system code and
   * entity reference identifier are, we can identify the species by
   * trying species until we find one that exists for the system code
   * and identifier.
   *
   * @param systemCode
   * @param identifier
   * @return {Stream<Organism>} organismStream
   */
  var _getBySystemCodeAndIdentifier =
    function(systemCode, identifier) {
    var exists = highland.curry(instance.entityReference.exists,
        systemCode, identifier);

    return query()
    // TODO sort organisms by number of pathways at WikiPathways.
    // Get that data as part of build step for this library.
    .flatFilter(function(organism) {
      return exists(organism.nameLanguageMap.la);
    })
    .head();
  };

  /**
   * @private
   *
   * Identifies the organism for the provided entity reference and returns all
   * the data BridgeDb has about that organism, which currently is the Latin name
   * and, when available, the English name.
   *
   * @param {Object} entityReference See bridgeDb.entityReference.enrich for information
   *                                  on acceptable entity reference inputs.
   * @return {Stream<Organism>} organismStream
   */
  function _getByEntityReference(entityReference) {
    // TODO as part of the build process, query all species like this:
    // http://webservice.bridgedb.org/Human/sourceDataSources
    // http://webservice.bridgedb.org/Human/targetDataSources
    // to get a listing of which datasets go with which species.
    // Save that data as a JSON file.
    // Then use those limitations in this query.

    var entityReferenceStream;

    var systemCodeExists = !!entityReference.isDataItemIn &&
      (entityReference.isDataItemIn.bridgeDbSystemCode ||
      _.isArray(entityReference.isDataItemIn.alternatePrefix) &&
      entityReference.isDataItemIn.alternatePrefix[0]);
    if (!systemCodeExists) {
      entityReferenceStream = instance.entityReference.enrich(entityReference, {
        bridgeDbXrefsUrl: false,
        dataset: true,
        organism: false,
        xref: false,
      });
    } else {
      entityReferenceStream = highland([entityReference]);
    }

    return entityReferenceStream.flatMap(function(entityReference) {
      var organism = entityReference.organism;
      if (!!organism) {
        return _normalize(organism);
      }

      var systemCode =
        entityReference.isDataItemIn.bridgeDbSystemCode ||
        _.isArray(entityReference.isDataItemIn.alternatePrefix) &&
        entityReference.isDataItemIn.alternatePrefix[0];

      var identifier = entityReference.identifier;

      if (!!systemCode && !!identifier) {
        return _getBySystemCodeAndIdentifier(
          systemCode, identifier);
      } else {
        console.warn('Cannot get organism by entityReference.');
        return entityReference;
      }
    });
  }

  /**
   * @private
   *
   * Each BridgeDb instance has one organism associated with it. This
   * function gets the organism once and then always returns that organism.
   *
   * @param {Object|String} searchCriteria
   * @return {Stream<Organism>} Organism
   */
  function _getInstanceOrganism(searchCriteria) {
    var searchCriteriaUsed = instance.instanceOrganismNonNormalized ||
      searchCriteria;

    function initMethod() {
      return query(searchCriteriaUsed);
    }

    return Utils._runOncePerInstance(
        instance,
        'instanceOrganism',
        initMethod
    )
    .head();
  }

  /**
   * @private
   *
   * Normalize organism.
   *
   * @param {String|Object|Organism} organism - Can be any one of the following:
   *    * IRI from the (@link http://identifiers.org/taxonomy/|Taxonomy ontology}
   *    * name in Latin
   *      - full like "Escherichia coli" or
   *      - abbreviated like "E. coli" or
   *    * name in English
   *    * an object with the key being the language and the value being the name
   *    * a full or partial Organism object
   * @param {Iri} [organism['id']] Taxonomy ontology IRI
   * @param {String} [organism.name] name in Latin (preferred) or English
   * @param {String} [organism.en] name in English - deprecated
   * @param {String} [organism.english] name in English - deprecated
   * @param {String} [organism.la] name in Latin - deprecated
   * @param {String} [organism.latin] name in Latin - deprecated
   * @param {Object} [organism.nameLanguageMap] {@link
   *    http://www.w3.org/TR/json-ld/#language-maps|language map}
   * @param {String} [organism.nameLanguageMap.en] name in English
   * @param {String} [organism.nameLanguageMap.la] name in Latin
   * @return {Stream<Organism>} organismStream
   */
  function _normalize(organism) {
    // TODO has the input been transformed to use the internalContext yet?
    var organismName;
    var normalizedOrganismName;
    var organismIri;
    if (_.isString(organism)) {
      if (organism.indexOf('http://identifiers.org/taxonomy/') === 0) {
        organismIri = organism;
      } else {
        organismName = organism;
      }
    } else if (_.isPlainObject(organism)) {
      if (organism.id) {
        organismIri = organism.id;
      }
      var nameLanguageMap = organism.nameLanguageMap;
      if (nameLanguageMap) {
        organismName = nameLanguageMap.la || nameLanguageMap.en;
      } else {
        organismName = organism.name || organism.la || organism.latin ||
          organism.en || organism.english;
      }
    }

    if (!organismIri && !organismName) {
      console.error(organism);
      throw new Error('Cannot normalize provided organism (above).');
    }

    if (organismName) {
      normalizedOrganismName = normalizeText(organismName);
    }

    return _getAll()
    .filter(function(organism) {
      var organismIriMatch;
      if (organismIri) {
        organismIriMatch = organismIri === organism.id;
      }
      var normalizedOrganismNameMatch;
      if (normalizedOrganismName) {
        var nameLanguageMap = organism.nameLanguageMap;
        var latinName = nameLanguageMap.la;
        var latinNameComponents = latinName.split(' ');
        var latinNameAbbreviated = latinNameComponents[0][0] +
          latinNameComponents[1];
        var englishName = nameLanguageMap.en;
        var normalizedNameCandidates = [
          latinName,
          latinNameAbbreviated,
          englishName
        ].map(function(value) {
          return normalizeText(value);
        });
        normalizedOrganismNameMatch = normalizedNameCandidates.indexOf(normalizedOrganismName) > -1;
      }
      return organismIriMatch || normalizedOrganismNameMatch;
    })
    .head();
  }

  /**
   * Find organisms, either all or a subset by search criteria.
   *
   * @param {Object|String} searchCriteria
   * @param {String|String[]} [searchCriteria.type='Organism']
   * @return {Stream<Organism>} organismStream
   */
  function query(searchCriteria) {
    if (_.isEmpty(searchCriteria)) {
      return Utils._runOnceGlobal('organisms', _getAll);
    }

    var typeToFunctionMapping = {
      Organism: _normalize,
      EntityReference: _getByEntityReference,
    };

    var providedType;
    if (_.isString(searchCriteria)) {
      providedType = 'Organism';
    } else {
      providedType = searchCriteria.type || 'Organism';
    }
    providedType = jsonldRx.arrayify(providedType);

    var supportedType = _(typeToFunctionMapping).keys()
    .intersection(providedType)
    .first();

    if (!!supportedType) {
      return typeToFunctionMapping[supportedType](searchCriteria);
    } else {
      throw new Error('Cannot get organism by specified type(s): "' +
          providedType + '"');
    }
  }

  /**
   * @private
   *
   * Set the current organism for this instance so we don't have to look it up every time.
   *
   * @param {String|Object} organism The single organism for this bridgedbjs instance. It is
   *                                 preferably the full Latin name. If you need to work
   *                                 with another organism, create another bridgedbjs instance.
   * @param {Boolean} normalize Normalize the provided organism to ensure it matches what
   *                                 the BridgeDb API expects.
   * @return {Stream<Organism>} Normalized organism name. Note that if args.normalize is
   *    set to false, the returned value will be whatever was provided as args.organism.
   */
  function _setInstanceOrganism(organism, normalize) {
    if (normalize === null || typeof normalize === 'undefined') {
      normalize = true;
    }

    instance.instanceOrganismNonNormalized = organism;
    if (normalize) {
      return _getInstanceOrganism(organism);
    }
  }

  return {
    createEntityReferenceToOrganismTransformationStream:
      createEntityReferenceToOrganismTransformationStream,
    get:get,
    _getInstanceOrganism:_getInstanceOrganism,
    query:query,
    _setInstanceOrganism:_setInstanceOrganism
  };
};

exports = module.exports = Organism;
comments powered by Disqus