Skip to content

API

column_names(data, *args, **kwargs)

Returns the names of the columns in the data. Useful to investigate the dataset before running the actual algorithm.

Parameters:

Name Type Description Default
client

v6 client provided by the algorithm wrapper

required
data DataFrame

dataframe containing the data, provided by algorithm wrapper

required

Returns: a list of column names

Source code in python/verticox/vantage6.py
@data(1)
def column_names(data: pd.DataFrame, *args, **kwargs):
    """
    Returns the names of the columns in the data. Useful to investigate the dataset before
    running the actual algorithm.


    Args:
        client: v6 client provided by the algorithm wrapper
        data: dataframe containing the data, provided by algorithm wrapper

    Returns: a list of column names

    """
    return data.columns.tolist()

cross_validate(client, data, feature_columns, event_times_column, event_happened_column, include_value=True, datanode_ids=None, central_node_id=None, convergence_precision=DEFAULT_PRECISION, rho=DEFAULT_RHO, n_splits=DEFAULT_KFOLD_SPLITS, *_args, **_kwargs)

Fit a cox proportional hazards model using the Verticox+ algorithm using crossvalidation. Works similarly to the fit method, but trains multiple times on smaller subsets of the data using k-fold crossvalidation.

Parameters:

Name Type Description Default
client AlgorithmClient

v6 client provided by the algorithm wrapper

required
data DataFrame

dataframe containing the data, provided by algorithm wrapper

required
feature_columns List[str]

The columns to be used as features

required
event_times_column str

The name of the column that contains the event times

required
event_happened_column str

The name of the column that contains whether an event has happened,

required
include_value

The value in the event_happened_column that means the record is NOT right-censored

True
datanode_ids List[int]

List of organization ids of the nodes that will be used as feature nodes

None
central_node_id int

Organization id of the node that will be used as the central node. This

None
convergence_precision float

Precision for the Cox model. The algorithm will stop when the difference

DEFAULT_PRECISION
rho float

Penalty parameter

DEFAULT_RHO
n_splits int

Number of splits for crossvalidation

DEFAULT_KFOLD_SPLITS
*_args
()
**_kwargs
{}

Returns: A tuple containing 3 lists: c_indices, coefs, baseline_hazards

Source code in python/verticox/vantage6.py
@data(1)
@algorithm_client
def cross_validate(client: AlgorithmClient,
                   data: pd.DataFrame,
                   feature_columns: List[str],
                   event_times_column: str,
                   event_happened_column: str,
                   include_value=True,
                   datanode_ids: List[int] = None,
                   central_node_id: int = None,
                   convergence_precision: float = DEFAULT_PRECISION,
                   rho: float = DEFAULT_RHO,
                   n_splits: int = DEFAULT_KFOLD_SPLITS,
                   *_args,
                   **_kwargs):
    """
    Fit a cox proportional hazards model using the Verticox+ algorithm using crossvalidation.
    Works similarly to the `fit` method, but trains multiple times on smaller subsets of the data
    using k-fold crossvalidation.

    Args:
        client: v6 client provided by the algorithm wrapper
        data: dataframe containing the data, provided by algorithm wrapper
        feature_columns: The columns to be used as features
        event_times_column: The name of the column that contains the event times
        event_happened_column: The name of the column that contains whether an event has happened,
        or whether the sample is right censored.
        include_value: The value in the event_happened_column that means the record is NOT right-censored
        datanode_ids: List of organization ids of the nodes that will be used as feature nodes
        central_node_id:  Organization id of the node that will be used as the central node. This
        node should contain the outcome data.
        between iterations falls below this number
        convergence_precision: Precision for the Cox model. The algorithm will stop when the difference
        rho: Penalty parameter
        n_splits: Number of splits for crossvalidation
        *_args:
        **_kwargs:

    Returns:  A tuple containing 3 lists: `c_indices`, `coefs`, `baseline_hazards`

    """

    manager = node_manager.V6NodeManager(
        client,
        data,
        datanode_ids,
        central_node_id,
        event_happened_column,
        event_times_column,
        feature_columns,
        include_value,
        convergence_precision=convergence_precision,
        rho=rho,
    )
    try:
        info(f"Start running verticox on features: {feature_columns}")

        manager.start_nodes()

        start_time = time.time()
        c_indices, coefs, baseline_hazards = kfold_cross_validate(manager, n_splits=n_splits)
        end_time = time.time()
        duration = end_time - start_time
        info(f"Verticox algorithm complete after {duration} seconds")

        info("Killing datanodes")
        # Make baseline hazard functions serializable
        baseline_hazards = [_stepfunction_to_tuple(f) for f in baseline_hazards]

        print(f'Returning c_indices: {c_indices}\ncoefs: {coefs}\nbaseline_hazards: {baseline_hazards}')
        return c_indices, coefs, baseline_hazards
    except Exception as e:
        info(f"Algorithm ended with exception {e}")
        info(traceback.format_exc())
    finally:
        manager.kill_all_algorithms()

fit(client, data, feature_columns, event_times_column, event_happened_column, include_value=True, datanode_ids=None, central_node_id=None, precision=DEFAULT_PRECISION, rho=DEFAULT_RHO, database=None, *_args, **_kwargs)

Fit a cox proportional hazards model using the Verticox+ algorithm

Parameters:

Name Type Description Default
client AlgorithmClient

v6 client provided by the algorithm wrapper

required
data DataFrame

dataframe containing the data, provided by algorithm wrapper

required
feature_columns List[str]

The columns to be used as features

required
event_times_column str

The name of the column that contains the event times

required
event_happened_column str

The name of the column that contains whether an event has happened,

required
include_value any

The value in the event_happened_column that means the record is NOT right-censored

True
datanode_ids List[int]

List of organization ids of the nodes that will be used as feature nodes

None
central_node_id int

Organization id of the node that will be used as the central node. This

None
precision float

Precision for the Cox model. The algorithm will stop when the difference

DEFAULT_PRECISION
rho float

Penalty parameter

DEFAULT_RHO
database str | None

Name of the database to be used (default is "default")

None
*_args
()
**_kwargs
{}

Returns: A dictionary containing the coefficients of the model ("coefs") and the baseline hazard function of the model ("baseline_hazard_x" and "baseline_hazard_y").

Source code in python/verticox/vantage6.py
@data(1)
@algorithm_client
def fit(
        client: AlgorithmClient,
        data: pd.DataFrame,
        feature_columns: List[str],
        event_times_column: str,
        event_happened_column: str,
        include_value: any = True,
        datanode_ids: List[int] = None,
        central_node_id: int = None,
        precision: float = DEFAULT_PRECISION,
        rho: float = DEFAULT_RHO,
        database: str|None = None,
        *_args,
        **_kwargs,
):
    """
    Fit a cox proportional hazards model using the Verticox+ algorithm

    Args:
        client: v6 client provided by the algorithm wrapper
        data: dataframe containing the data, provided by algorithm wrapper
        feature_columns: The columns to be used as features
        event_times_column: The name of the column that contains the event times
        event_happened_column: The name of the column that contains whether an event has happened,
        or whether the sample is right censored.
        include_value: The value in the event_happened_column that means the record is NOT right-censored
        datanode_ids: List of organization ids of the nodes that will be used as feature nodes
        central_node_id:  Organization id of the node that will be used as the central node. This
        node should contain the outcome data.
        precision: Precision for the Cox model. The algorithm will stop when the difference
        between iterations falls below this number
        rho: Penalty parameter
        database: Name of the database to be used (default is "default")
        *_args:
        **_kwargs:

    Returns: A dictionary containing the coefficients of the model ("coefs") and the baseline
    hazard function of the model ("baseline_hazard_x" and "baseline_hazard_y").
    """

    # Preprocessing data
    # TODO: This can removed once we move to v6 version 5.x
    columns = Columns(feature_columns, event_times_column, event_happened_column)
    data, columns, data_location = preprocess_data(data, output_dir=DATABASE_DIR,columns=columns )

    info(f"Columns: {columns}")

    manager = node_manager.V6NodeManager(
        client,
        data,
        datanode_ids,
        central_node_id,
        columns.event_happened_column,
        columns.event_times_column,
        columns.feature_columns,
        include_value,
        convergence_precision=precision,
        rho=rho,
        database=database,
    )
    try:
        info(f"Start running verticox on features: {feature_columns}")

        manager.start_nodes()

        start_time = time.time()
        manager.fit()
        end_time = time.time()
        duration = end_time - start_time
        info(f"Verticox algorithm complete after {duration} seconds")

        info("Killing datanodes")
        return {"coefs": manager.coefs,
                "baseline_hazard_x": list(manager.baseline_hazard.x),
                "baseline_hazard_y": list(manager.baseline_hazard.y)
                }
    except Exception as e:
        info(f"Algorithm ended with exception {e}")
        info(traceback.format_exc())
    finally:
        manager.kill_all_algorithms()

no_op(*args, **kwargs)

A function that does nothing for a while. It is used as a partial algorithm within the verticox+ algorithm and and should not be called by itself.

Parameters:

Name Type Description Default
*args
()
**kwargs
{}

Returns:

Source code in python/verticox/vantage6.py
@data(1)
def no_op(*args, **kwargs):
    """
    A function that does nothing for a while. It is used as a partial algorithm within the verticox+
    algorithm and and should not be called by itself.

    Args:
        *args:
        **kwargs:

    Returns:

    """
    info(f"Sleeping for {NO_OP_TIME}")
    time.sleep(NO_OP_TIME)
    info("Shutting down.")

run_datanode(data, *args, selected_columns=(), event_time_column=None, include_column=None, include_value=None, external_commodity_address=None, address=None, **kwargs)

Starts the datanode (feature node) as gRPC server. This function is a partial function called by the main verticox algorithm. It is not meant to be called by itself.

Parameters:

Name Type Description Default
data DataFrame

the entire dataset, provided by the algorithm wrapper

required
include_value bool | None

This value in the data means the record is NOT right-censored

None
selected_columns List[str]

the names of the columns that will be treated as features (covariants) in

()
event_time_column str | None

the name of the column that indicates event time

None
include_column str | None

the name of the column that indicates whether an event has taken place or whether the sample is right censored. If the value is False, the sample is right censored.

None
external_commodity_address str | None

Address of the n-party product protocol commodity server

None
address

The address where this server will be running.

None

Returns: None

Source code in python/verticox/vantage6.py
@data(1)
def run_datanode(
        data: pd.DataFrame,
        *args,
        selected_columns: List[str] = (),
        event_time_column: str|None = None,
        include_column: str|None = None,
        include_value: bool|None = None,
        external_commodity_address: str|None = None,
        address=None,
        **kwargs,
):
    """
    Starts the datanode (feature node) as gRPC server. This function is a partial function called by
    the main verticox algorithm. It is not meant to be called by itself.

    Args:
        data: the entire dataset, provided by the algorithm wrapper
        include_value: This value in the data means the record is NOT right-censored
        selected_columns: the names of the columns that will be treated as features (covariants) in
        the analysis
        event_time_column: the name of the column that indicates event time
        include_column: the name of the column that indicates whether an event has taken
                                place or whether the sample is right censored. If the value is
                                False, the sample is right censored.
        external_commodity_address: Address of the n-party product protocol commodity server
        address: The address where this server will be running.

    Returns: None


    """
    info(f"Selected columns: {selected_columns}")
    info(f"Columns present in dataset: {data.columns}")
    info(f"Event time column: {event_time_column}")
    info(f"Censor column: {include_column}")


    columns = Columns(selected_columns, None, None)

    features, new_columns = preprocess_data(data, columns)

    # The current datanode might not have all the features
    selected_columns = [f for f in new_columns.feature_columns if f in data.columns]
    info(f"Feature columns after filtering: {selected_columns}")
    features = data[selected_columns]

    datanode.serve(
        data=features.values,
        feature_names=selected_columns,
        port=node_manager.PYTHON_PORT,
        include_column=include_column,
        include_value=include_value,
        commodity_address=external_commodity_address,
        address=address,
    )

run_java_server(_data, *_args, features=None, event_times_column=None, event_happened_column=None, **kwargs)

Partial function that starts the java server. This function is called by the main verticox+ algorithm (fit or cross_validate) and should not be called by itself. Args: _data: data provided by the vantage6 algorithm wrapper _args: features: list of column names that will be used as features event_times_column: Name of the column that contains the event times event_happened_column: Name of the column that contains whether an event has happened, or whether the sample is right-censored *kwargs:

Source code in python/verticox/vantage6.py
@data(1)
def run_java_server(_data: pd.DataFrame, *_args, features=None,
                    event_times_column=None,
                    event_happened_column=None, **kwargs):
    """
    Partial function that starts the java server. This function is called by the main verticox+
    algorithm (`fit` or `cross_validate`) and should not be called by itself.
    Args:
        _data: data provided by the vantage6 algorithm wrapper
        *_args:
        features: list of column names that will be used as features
        event_times_column: Name of the column that contains the event times
        event_happened_column: Name of the column that contains whether an event has happened,
        or whether the sample is right-censored
        **kwargs:

    """
    info("Starting java server")
    command = _get_java_command()
    info(f"Running command: {command}")
    #target_uri = _move_parquet_file(database)

    columns = Columns(features, event_times_column, event_happened_column)
    data, column_names, data_path = preprocess_data(_data, columns, _get_data_dir())

    subprocess.run(command, env=_get_workaround_sysenv(data_path))

test_sum_local_features(data, features, mask, *args, **kwargs)

Obsolete

Parameters:

Name Type Description Default
data DataFrame
required
features List[str]
required
mask
required
*args
()
**kwargs
{}

Returns:

Source code in python/verticox/vantage6.py
@data(1)
def test_sum_local_features(
        data: pd.DataFrame, features: List[str], mask, *args, **kwargs
):
    """
    Obsolete

    Args:
        data:
        features:
        mask:
        *args:
        **kwargs:

    Returns:

    """
    # Only check requested features
    data = data[features]

    # Exclude censored data
    data = data[mask]

    return data.sum(axis=0).values

CrossValResult dataclass

CrossValResult contains the result of a cross-validation task. It contains the c-indices, coefficients and baseline hazard functions for each fold.

Source code in python/verticox/client.py
@dataclass
class CrossValResult:
    """
    CrossValResult contains the result of a cross-validation task. It contains the c-indices,
    coefficients and baseline hazard functions for each fold.
    """
    c_indices: List[float]
    coefs: List[Dict[str, float]]
    baseline_hazards: List[HazardFunction]

    @staticmethod
    def parse(partialResults: list[dict]):
        # Cross validation should only have one partial result
        result = partialResults[0]["result"]
        result = json.loads(result)
        c_indices, coefs, baseline_hazards = result
        baseline_hazards = [HazardFunction(*h) for h in baseline_hazards]

        return CrossValResult(c_indices, coefs, baseline_hazards)

    def plot(self):
        num_folds = len(self.c_indices)
        fig, ax = plt.subplots(num_folds, 2, constrained_layout=True)

        for fold in range(num_folds):
            ax[fold][0].plot(self.baseline_hazards[fold].x, self.baseline_hazards[fold].y)
            ax[fold][0].set_title(f"Baseline hazard fold {fold}")
            ax[fold][1].bar(self.coefs[fold].keys(), self.coefs[fold].values())
            ax[fold][1].set_title(f"Coefficients fold {fold}")

FitResult dataclass

FitResult contains the result of a fit task. It contains the coefficients and the baseline hazard function.

Source code in python/verticox/client.py
@dataclass
class FitResult:
    """
    FitResult contains the result of a fit task. It contains the coefficients and the baseline
    hazard function.
    """
    coefs: Dict[str, float]
    baseline_hazard: HazardFunction

    @staticmethod
    def parse(results: List[Dict[str, any]]):
        # Assume that there is only one "partial" result
        content = json.loads(results[0]["result"])

        coefs = content["coefs"]
        baseline_hazard = HazardFunction(content["baseline_hazard_x"], content["baseline_hazard_y"])

        return FitResult(coefs, baseline_hazard)

    def plot(self):
        fig, ax = plt.subplots(2, 1, constrained_layout=True)
        ax[0].plot(self.baseline_hazard.x, self.baseline_hazard.y)
        ax[0].set_title("Baseline hazard")
        ax[0].set_xlabel("time")
        ax[0].set_ylabel("hazard score")
        ax[1].bar(self.coefs.keys(), self.coefs.values(), label="coefficients")
        ax[1].set_title("Coefficients")

Task

Task is a wrapper around the vantage6 task object.

Source code in python/verticox/client.py
class Task:
    """
    Task is a wrapper around the vantage6 task object.
    """
    def __init__(self, client: Client, task_data):
        self._raw_data = task_data
        self.client = client
        self.task_id = task_data["id"]

    def get_results(self) -> PartialResult:
        """
        Get the results of the task. This will block until the task is finished.

        Returns:

        """
        results = self.client.wait_for_results(self.task_id)
        return self._parse_results(results["data"])


    @staticmethod
    def _parse_results(results) -> FitResult| CrossValResult:
        return results

get_results()

Get the results of the task. This will block until the task is finished.

Returns:

Source code in python/verticox/client.py
def get_results(self) -> PartialResult:
    """
    Get the results of the task. This will block until the task is finished.

    Returns:

    """
    results = self.client.wait_for_results(self.task_id)
    return self._parse_results(results["data"])

VerticoxClient

Client for running verticox. This client is a wrapper around the vantage6 client to simplify use.

Source code in python/verticox/client.py
class VerticoxClient:
    """
    Client for running verticox. This client is a wrapper around the vantage6 client to simplify
    use.
    """
    def __init__(
            self,
            v6client: Client,
            collaboration=None,
            log_level=logging.INFO,
            image=DOCKER_IMAGE,
    ):
        self._logger = logging.getLogger(__name__)
        self._logger.setLevel(log_level)
        self._v6client = v6client
        self._image = image
        collaborations = self._v6client.collaboration.list()["data"]
        if len(collaborations) > 1:
            raise VerticoxClientException(
                f"You are in multiple collaborations, please specify "
                f"one of:\n {collaborations}"
            )

        self.collaboration_id = collaborations[0]["id"]

    def get_active_node_organizations(self) -> List[int]:
        """
        Get the organization ids of the active nodes in the collaboration.

        Returns: a list of organization ids

        """
        nodes = self._v6client.node.list(is_online=True)

        # TODO: Add pagination support
        nodes = nodes["data"]
        return [n["organization"]["id"] for n in nodes]

    def get_column_names(self, **kwargs):
        """
        Get the column names of the dataset at all active nodes.

        Args:
            **kwargs:

        Returns:

        """
        active_nodes = self.get_active_node_organizations()
        self._logger.debug(f"There are currently {len(active_nodes)} active nodes")

        task = self._run_task(
            "column_names", organizations=active_nodes, master=False, **kwargs
        )
        return task

    def fit(
            self,
            feature_columns,
            outcome_time_column,
            right_censor_column,
            feature_nodes,
            outcome_node,
            precision=_DEFAULT_PRECISION,
            database="default",
    ):
        """
        Run cox proportional hazard analysis on the entire dataset.

        Args:
            feature_columns: a list of column names that you want to use as features
            outcome_time_column: the column name of the outcome time
            right_censor_column: the column name of the binary value that indicates if an event
            happened.
            feature_nodes: A list of node ids from the datasources that contain the feature columns
            outcome_node: The node id of the datasource that contains the outcome
            precision: precision of the verticox algorithm. The smaller the number, the more
            precise the result. Smaller precision will take longer to compute though. The default is
            1e-5
            database: If the nodes have multiple datasources, indicate the label of the datasource
            you would like to use. Otherwise the default will be used.

        Returns: a `Task` object containing info about the task.

        """
        input_params = {
            "feature_columns": feature_columns,
            "event_times_column": outcome_time_column,
            "event_happened_column": right_censor_column,
            "datanode_ids": feature_nodes,
            "central_node_id": outcome_node,
            "precision": precision,
        }

        return self._run_task(
            "fit", True, [outcome_node], kwargs=input_params, database=database
        )

    def cross_validate(self,
                       feature_columns,
                       outcome_time_column,
                       right_censor_column,
                       feature_nodes,
                       outcome_node,
                       precision=_DEFAULT_PRECISION,
                       n_splits = 10,
                       database="default"):
        """
        Run cox proportional hazard analysis on the entire dataset using cross-validation. Uses 10
        fold by default.

        Args:
            feature_columns: a list of column names that you want to use as features
            outcome_time_column: the column name of the outcome time
            right_censor_column: the column name of the binary value that indicates if an event
            happened.
            feature_nodes: A list of node ids from the datasources that contain the feature columns
            outcome_node: The node id of the datasource that contains the outcome
            precision: precision of the verticox algorithm. The smaller the number, the more
            precise the result. Smaller precision will take longer to compute though. The default is
            1e-5
            n_splits: The number of folds to use for cross-validation. Default is 10.
            database: If the nodes have multiple datasources, indicate the label of the datasource
            you would like to use. Otherwise the default will be used.

        Returns: a `Task` object containing info about the task.
        """
        input_params = {
            "feature_columns": feature_columns,
            "event_times_column": outcome_time_column,
            "event_happened_column": right_censor_column,
            "datanode_ids": feature_nodes,
            "central_node_id": outcome_node,
            "convergence_precision": precision,
            "n_splits": n_splits,
        }

        return self._run_task(
            "cross_validate", True, [outcome_node], kwargs=input_params, database=database
        )

    def _run_task(
            self, method, master, organizations: List[int], kwargs=None, database="default"
    ):
        if kwargs is None:
            kwargs = {}
        kwargs["database"] = database
        # TODO: Construct description out of parameters
        description = ""
        name = "method"
        task_input = {"method": method, "master": master, "kwargs": kwargs}

        print(
            f"""
                    task = self.v6client.task.create(collaboration={self.collaboration_id},
                                             organizations={organizations},
                                             name={name},
                                             image={self._image},
                                             description={description},
                                             input={task_input},
                                             data_format={_DATA_FORMAT},
                                             database={database}
                                             )
            """
        )
        task = self._v6client.task.create(
            collaboration=self.collaboration_id,
            organizations=organizations,
            name=name,
            image=self._image,
            description=description,
            input_=task_input,
            databases=database,
        )

        match method:
            case "fit":
                return FitTask(self._v6client, task)
            case "cross_validate":
                return CrossValTask(self._v6client, task)
            case _:
                return Task(self._v6client, task)

cross_validate(feature_columns, outcome_time_column, right_censor_column, feature_nodes, outcome_node, precision=_DEFAULT_PRECISION, n_splits=10, database='default')

Run cox proportional hazard analysis on the entire dataset using cross-validation. Uses 10 fold by default.

Parameters:

Name Type Description Default
feature_columns

a list of column names that you want to use as features

required
outcome_time_column

the column name of the outcome time

required
right_censor_column

the column name of the binary value that indicates if an event

required
feature_nodes

A list of node ids from the datasources that contain the feature columns

required
outcome_node

The node id of the datasource that contains the outcome

required
precision

precision of the verticox algorithm. The smaller the number, the more

_DEFAULT_PRECISION
n_splits

The number of folds to use for cross-validation. Default is 10.

10
database

If the nodes have multiple datasources, indicate the label of the datasource

'default'

Returns: a Task object containing info about the task.

Source code in python/verticox/client.py
def cross_validate(self,
                   feature_columns,
                   outcome_time_column,
                   right_censor_column,
                   feature_nodes,
                   outcome_node,
                   precision=_DEFAULT_PRECISION,
                   n_splits = 10,
                   database="default"):
    """
    Run cox proportional hazard analysis on the entire dataset using cross-validation. Uses 10
    fold by default.

    Args:
        feature_columns: a list of column names that you want to use as features
        outcome_time_column: the column name of the outcome time
        right_censor_column: the column name of the binary value that indicates if an event
        happened.
        feature_nodes: A list of node ids from the datasources that contain the feature columns
        outcome_node: The node id of the datasource that contains the outcome
        precision: precision of the verticox algorithm. The smaller the number, the more
        precise the result. Smaller precision will take longer to compute though. The default is
        1e-5
        n_splits: The number of folds to use for cross-validation. Default is 10.
        database: If the nodes have multiple datasources, indicate the label of the datasource
        you would like to use. Otherwise the default will be used.

    Returns: a `Task` object containing info about the task.
    """
    input_params = {
        "feature_columns": feature_columns,
        "event_times_column": outcome_time_column,
        "event_happened_column": right_censor_column,
        "datanode_ids": feature_nodes,
        "central_node_id": outcome_node,
        "convergence_precision": precision,
        "n_splits": n_splits,
    }

    return self._run_task(
        "cross_validate", True, [outcome_node], kwargs=input_params, database=database
    )

fit(feature_columns, outcome_time_column, right_censor_column, feature_nodes, outcome_node, precision=_DEFAULT_PRECISION, database='default')

Run cox proportional hazard analysis on the entire dataset.

Parameters:

Name Type Description Default
feature_columns

a list of column names that you want to use as features

required
outcome_time_column

the column name of the outcome time

required
right_censor_column

the column name of the binary value that indicates if an event

required
feature_nodes

A list of node ids from the datasources that contain the feature columns

required
outcome_node

The node id of the datasource that contains the outcome

required
precision

precision of the verticox algorithm. The smaller the number, the more

_DEFAULT_PRECISION
database

If the nodes have multiple datasources, indicate the label of the datasource

'default'

Returns: a Task object containing info about the task.

Source code in python/verticox/client.py
def fit(
        self,
        feature_columns,
        outcome_time_column,
        right_censor_column,
        feature_nodes,
        outcome_node,
        precision=_DEFAULT_PRECISION,
        database="default",
):
    """
    Run cox proportional hazard analysis on the entire dataset.

    Args:
        feature_columns: a list of column names that you want to use as features
        outcome_time_column: the column name of the outcome time
        right_censor_column: the column name of the binary value that indicates if an event
        happened.
        feature_nodes: A list of node ids from the datasources that contain the feature columns
        outcome_node: The node id of the datasource that contains the outcome
        precision: precision of the verticox algorithm. The smaller the number, the more
        precise the result. Smaller precision will take longer to compute though. The default is
        1e-5
        database: If the nodes have multiple datasources, indicate the label of the datasource
        you would like to use. Otherwise the default will be used.

    Returns: a `Task` object containing info about the task.

    """
    input_params = {
        "feature_columns": feature_columns,
        "event_times_column": outcome_time_column,
        "event_happened_column": right_censor_column,
        "datanode_ids": feature_nodes,
        "central_node_id": outcome_node,
        "precision": precision,
    }

    return self._run_task(
        "fit", True, [outcome_node], kwargs=input_params, database=database
    )

get_active_node_organizations()

Get the organization ids of the active nodes in the collaboration.

Returns: a list of organization ids

Source code in python/verticox/client.py
def get_active_node_organizations(self) -> List[int]:
    """
    Get the organization ids of the active nodes in the collaboration.

    Returns: a list of organization ids

    """
    nodes = self._v6client.node.list(is_online=True)

    # TODO: Add pagination support
    nodes = nodes["data"]
    return [n["organization"]["id"] for n in nodes]

get_column_names(**kwargs)

Get the column names of the dataset at all active nodes.

Parameters:

Name Type Description Default
**kwargs
{}

Returns:

Source code in python/verticox/client.py
def get_column_names(self, **kwargs):
    """
    Get the column names of the dataset at all active nodes.

    Args:
        **kwargs:

    Returns:

    """
    active_nodes = self.get_active_node_organizations()
    self._logger.debug(f"There are currently {len(active_nodes)} active nodes")

    task = self._run_task(
        "column_names", organizations=active_nodes, master=False, **kwargs
    )
    return task