Skip to content

omics

ReactionGeneMap(database_connection, gene_ids, expression_levels, gene_groups=None, gene_id_map=None, gene_set_reduce_func=max, gene_complex_reduce_func=min)

rxn_exp: expression levels of each reaction.
gene_ids: A list-like of strings contains gene identifiers.
expression_levels: gene expression data.
gene_groups: parent-child mapping of gene product sets and complexes.
gene_id_map: mapping between gene metaId and gene name.
Source code in algo/omics.py
def __init__(
    self,
    database_connection: Neo4jClient,
    gene_ids: Iterable[str],
    expression_levels: Iterable[float],
    gene_groups: pd.DataFrame | None = None,
    gene_id_map: pd.DataFrame | None = None,
    gene_set_reduce_func: callable = max,
    gene_complex_reduce_func: callable = min,
):
    """

    Attributes:
        database_connection: Connection to the Neo4j database.
        rxn_exp: expression levels of each reaction.
        gene_ids: A list-like of strings contains gene identifiers.
        expression_levels: gene expression data.
        gene_groups: parent-child mapping of gene product sets and complexes.
        gene_id_map: mapping between gene metaId and gene name.
    """

    self.db = database_connection
    self.gene_exp = {
        gene_id: exp_level for gene_id, exp_level in zip(gene_ids, expression_levels)
    }
    self.rxn_exp: dict[str, float] = {}

    if gene_groups is None:
        self.gene_groups = self._get_gene_groups()
    else:
        self.gene_groups = gene_groups

    if gene_id_map is None:
        self.gene_id_map = self._get_gene_id_map()
    else:
        self.gene_id_map = gene_id_map

    self._valid_gene_groups = set(self.gene_groups["group_id"].unique())
    self._set_func = gene_set_reduce_func
    self._complex_func = gene_complex_reduce_func

    # Setup reaction expression levels
    rxn_genes = self._get_top_level_rxn_gene_mapping()
    self.reaction_gp_mapping = {}

    for rxn, gene in rxn_genes.items():
        # GeneProductSet or GeneProductComplex
        if gene in self._valid_gene_groups:
            self.reaction_gp_mapping[rxn] = self.get_all_genes_in_group(gene)
            self.rxn_exp[rxn] = self.get_gene_expression(gene)
        elif gene_name := self.gene_id_map.get(gene):  # not None or ""
            self.reaction_gp_mapping[rxn] = {gene_name}
            self.rxn_exp[rxn] = self.get_gene_expression(gene_name)

calc_reaction_gene_group_expression(gene_group_id)

Calculate the expression level of a given gene product set or complex node.

Source code in algo/omics.py
def calc_reaction_gene_group_expression(self, gene_group_id: str):
    """Calculate the expression level of a given gene product set or complex node."""
    # TODO: make use of self.get_all_genes_in_group()
    genes = self.gene_groups.loc[self.gene_groups["group_id"] == gene_group_id, :]
    gene_ids = genes["members"].values
    group_type = genes["group_type"].values[0]

    res = [
        self.get_gene_expression(gene_name)
        for gene_id in gene_ids
        if (gene_name := self.gene_id_map.get(gene_id))
    ]
    res = [x for x in res if x is not None]
    if not res:
        return 0.0

    if group_type == "Set":
        return self._set_func(res)
    elif group_type == "Complex":
        return self._complex_func(res)
    else:
        raise ValueError(f"Unknown gene group type: {group_type}")

get_route_expression(rxn_ids)

Sums up total expression levels of the reaction route, and divide the value by the number of reactions.

Parameters:

Name Type Description Default
rxn_ids Iterable[str]

The metaId fields of the reactions.

required

Returns:

Type Description
float

The averaged expression level.

Source code in algo/omics.py
def get_route_expression(self, rxn_ids: Iterable[str]) -> float:
    """Sums up total expression levels of the reaction route, and divide the value by the
    number of reactions.

    Args:
        rxn_ids: The metaId fields of the reactions.

    Returns:
        The averaged expression level.
    """
    res = 0.0
    for rxn_id in rxn_ids:
        if rxn_id.startswith("rev-"):
            rxn_id = rxn_id[4:]
        if (gene_exp := self.rxn_exp.get(rxn_id)) is not None:
            res += gene_exp

    return res / len(rxn_ids)

get_table_of_gene_products(db, rdf_fields=None)

Retrieves all reaction-associated gene products.

Parameters:

Name Type Description Default
db Neo4jClient

A Neo4j client connected to the graph database.

required
rdf_fields dict[str, str]

Properties of the RDF nodes, and the desired output name. For example, {"ncbigene": "entrez"} would extract the ncbigene property from the RDF nodes and output it in the entrez column.

None

Returns:

Type Description
list[dict[str, Any]]

A list of entries with the reaction ID, the metaId of the gene, the

list[dict[str, Any]]

gene symbol, and other fields from RDF nodes whenever available.

Source code in algo/omics.py
def get_table_of_gene_products(
    db: Neo4jClient, rdf_fields: dict[str, str] = None
) -> list[dict[str, Any]]:
    """
    Retrieves all reaction-associated gene products.
    Args:
        db: A Neo4j client connected to the graph database.
        rdf_fields: Properties of the RDF nodes, and the desired output name.
            For example, {"ncbigene": "entrez"} would extract the ``ncbigene``
            property from the RDF nodes and output it in the ``entrez`` column.

    Returns:
        A list of entries with the reaction ID, the metaId of the gene, the
        gene symbol, and other fields from RDF nodes whenever available.
    """
    query = """
        MATCH (r:Reaction)-[:hasGeneProduct|hasMember|hasComponent*]->(gp:GeneProduct)
        WHERE NOT gp:GeneProductSet OR gp:GeneProductComplex
        WITH r.metaId AS rxn_id, gp
        OPTIONAL MATCH (gp)-[:hasRDF {bioQualifier: 'isEncodedBy'}]->(rdf:RDF)
        RETURN rxn_id, gp.metaId AS gene_id, gp.name AS gene
    """
    if rdf_fields:
        for field, col in rdf_fields.items():
            query += f", rdf.{field} AS {col}"
    return db.read(query)