Source code for pydnameth.scripts.develop.betas.clock

import copy
import os
from shutil import copyfile
from anytree import Node
from pydnameth.config.config import Config
from pydnameth.config.experiment.types import Task, Method, DataType
from pydnameth.config.experiment.experiment import Experiment
from pydnameth.infrastucture.file_name import get_file_name
from pydnameth.infrastucture.path import get_save_path
from pydnameth.model.tree import build_tree, calc_tree


def betas_clock_linreg(
    data,
    annotations,
    attributes,
    method_params=None
):
    config_root = Config(
        data=copy.deepcopy(data),
        experiment=Experiment(
            data=DataType.betas,
            task=Task.clock,
            method=Method.linreg,
            method_params=copy.deepcopy(method_params)
        ),
        annotations=copy.deepcopy(annotations),
        attributes=copy.deepcopy(attributes),
        is_run=True,
        is_root=True
    )
    root = Node(name=str(config_root), config=config_root)

    config_child = Config(
        data=copy.deepcopy(data),
        experiment=Experiment(
            data=DataType.betas,
            task=Task.table,
            method=Method.linreg
        ),
        annotations=copy.deepcopy(annotations),
        attributes=copy.deepcopy(attributes),
        is_run=True,
        is_root=False
    )

    Node(name=str(config_child), config=config_child, parent=root)

    build_tree(root)
    calc_tree(root)


[docs]def betas_clock_special(
    data,
    annotations,
    attributes,
    file,
    method_params=None,
):
    """
        Producing epigentic clock, using best CpGs which are provided in input file.

        Epigentic clock represents as table:
        Each row corresponds to clocks, which are built on all CpGs from the previous rows including the current row.
        Columns:

        * item: CpG id.
        * aux: gene, on which CpG is mapped.
        * R2: determination coefficient of linear regression between real and predicted target observable.
          A statistical measure of how well the regression line approximates the data points.
        * r: correlation coefficient of linear regression between real and predicted target observable.
        * evs: explained variance regression score.
        * mae: mean absolute error regression loss.
        * rmse: root mean square error

        Possible parameters of experiment:

        * ``'type'``: type of clocks. \n
          Possible options: \n
          ``'all'``: iterative building of clocks starting from one element in the model,
          ending with ``'size'`` elements in the model. \n
          ``'single '``: building of clocks only with ``'size'`` elements in the model. \n
          ``'deep'``: iterative building of clocks starting from one element in the model,
          ending with ``'size'`` elements in the model, but choosing all possible combinations from ``'size'`` elements.
        * ``'part'``: the proportion of considered number of subject in the test set. From ``0.0`` to ``1.0``.
        * ``'size'``: maximum number of exogenous variables in a model.
        * ``'runs'`` number of bootstrap runs in model

        :param data: pdm.Data instance, which specifies information about dataset.
        :param annotations: pdm.Annotations instance, which specifies subset of CpGs.
        :param attributes: pdm.Attributes instance, which specifies information about subjects.
        :param method_params: parameters of experiment.
     """

    if os.path.isfile(file):

        head, tail = os.path.split(file)
        fn = os.path.splitext(tail)[0]
        ext = os.path.splitext(tail)[1]

        config_root = Config(
            data=copy.deepcopy(data),
            experiment=Experiment(
                data=DataType.betas,
                task=Task.clock,
                method=Method.linreg,
                method_params=copy.deepcopy(method_params)
            ),
            annotations=copy.deepcopy(annotations),
            attributes=copy.deepcopy(attributes),
            is_run=True,
            is_root=True
        )
        root = Node(name=str(config_root), config=config_root)

        config_child = Config(
            data=copy.deepcopy(data),
            experiment=Experiment(
                data=DataType.betas,
                task=Task.table,
                method=Method.special,
                method_params={'file_name': fn}
            ),
            annotations=copy.deepcopy(annotations),
            attributes=copy.deepcopy(attributes),
            is_run=False,
            is_root=False
        )

        Node(name=str(config_child), config=config_child, parent=root)

        build_tree(root)

        new_file = get_save_path(config_child) + '/' + \
            get_file_name(config_child) + ext

        copyfile(file, new_file)

        calc_tree(root)

    else:
        raise FileNotFoundError(f'File {file} not found.')