API

`column_names(data, *args, **kwargs)`

Returns the names of the columns in the data. Useful to investigate the dataset before running the actual algorithm.

Parameters:

Name	Type	Description	Default
`client`		v6 client provided by the algorithm wrapper	required
`data`	`DataFrame`	dataframe containing the data, provided by algorithm wrapper	required

Returns: a list of column names

Source code in python/verticox/vantage6.py

@data(1)
def column_names(data: pd.DataFrame, *args, **kwargs):
    """
    Returns the names of the columns in the data. Useful to investigate the dataset before
    running the actual algorithm.


    Args:
        client: v6 client provided by the algorithm wrapper
        data: dataframe containing the data, provided by algorithm wrapper

    Returns: a list of column names

    """
    return data.columns.tolist()

`cross_validate(client, data, feature_columns, event_times_column, event_happened_column, include_value=True, datanode_ids=None, central_node_id=None, convergence_precision=DEFAULT_PRECISION, rho=DEFAULT_RHO, n_splits=DEFAULT_KFOLD_SPLITS, *_args, **_kwargs)`

Fit a cox proportional hazards model using the Verticox+ algorithm using crossvalidation. Works similarly to the fit method, but trains multiple times on smaller subsets of the data using k-fold crossvalidation.

Parameters:

Name	Type	Description	Default
`client`	`AlgorithmClient`	v6 client provided by the algorithm wrapper	required
`data`	`DataFrame`	dataframe containing the data, provided by algorithm wrapper	required
`feature_columns`	`List[str]`	The columns to be used as features	required
`event_times_column`	`str`	The name of the column that contains the event times	required
`event_happened_column`	`str`	The name of the column that contains whether an event has happened,	required
`include_value`		The value in the event_happened_column that means the record is NOT right-censored	`True`
`datanode_ids`	`List[int]`	List of organization ids of the nodes that will be used as feature nodes	`None`
`central_node_id`	`int`	Organization id of the node that will be used as the central node. This	`None`
`convergence_precision`	`float`	Precision for the Cox model. The algorithm will stop when the difference	`DEFAULT_PRECISION`
`rho`	`float`	Penalty parameter	`DEFAULT_RHO`
`n_splits`	`int`	Number of splits for crossvalidation	`DEFAULT_KFOLD_SPLITS`
`*_args`			`()`
`**_kwargs`			`{}`

Returns: A tuple containing 3 lists: c_indices, coefs, baseline_hazards

Source code in python/verticox/vantage6.py

@data(1)
@algorithm_client
def cross_validate(client: AlgorithmClient,
                   data: pd.DataFrame,
                   feature_columns: List[str],
                   event_times_column: str,
                   event_happened_column: str,
                   include_value=True,
                   datanode_ids: List[int] = None,
                   central_node_id: int = None,
                   convergence_precision: float = DEFAULT_PRECISION,
                   rho: float = DEFAULT_RHO,
                   n_splits: int = DEFAULT_KFOLD_SPLITS,
                   *_args,
                   **_kwargs):
    """
    Fit a cox proportional hazards model using the Verticox+ algorithm using crossvalidation.
    Works similarly to the `fit` method, but trains multiple times on smaller subsets of the data
    using k-fold crossvalidation.

    Args:
        client: v6 client provided by the algorithm wrapper
        data: dataframe containing the data, provided by algorithm wrapper
        feature_columns: The columns to be used as features
        event_times_column: The name of the column that contains the event times
        event_happened_column: The name of the column that contains whether an event has happened,
        or whether the sample is right censored.
        include_value: The value in the event_happened_column that means the record is NOT right-censored
        datanode_ids: List of organization ids of the nodes that will be used as feature nodes
        central_node_id:  Organization id of the node that will be used as the central node. This
        node should contain the outcome data.
        between iterations falls below this number
        convergence_precision: Precision for the Cox model. The algorithm will stop when the difference
        rho: Penalty parameter
        n_splits: Number of splits for crossvalidation
        *_args:
        **_kwargs:

    Returns:  A tuple containing 3 lists: `c_indices`, `coefs`, `baseline_hazards`

    """

    manager = node_manager.V6NodeManager(
        client,
        data,
        datanode_ids,
        central_node_id,
        event_happened_column,
        event_times_column,
        feature_columns,
        include_value,
        convergence_precision=convergence_precision,
        rho=rho,
    )
    try:
        info(f"Start running verticox on features: {feature_columns}")

        manager.start_nodes()

        start_time = time.time()
        c_indices, coefs, baseline_hazards = kfold_cross_validate(manager, n_splits=n_splits)
        end_time = time.time()
        duration = end_time - start_time
        info(f"Verticox algorithm complete after {duration} seconds")

        info("Killing datanodes")
        # Make baseline hazard functions serializable
        baseline_hazards = [_stepfunction_to_tuple(f) for f in baseline_hazards]

        print(f'Returning c_indices: {c_indices}\ncoefs: {coefs}\nbaseline_hazards: {baseline_hazards}')
        return c_indices, coefs, baseline_hazards
    except Exception as e:
        info(f"Algorithm ended with exception {e}")
        info(traceback.format_exc())
    finally:
        manager.kill_all_algorithms()

`fit(client, data, feature_columns, event_times_column, event_happened_column, include_value=True, datanode_ids=None, central_node_id=None, precision=DEFAULT_PRECISION, rho=DEFAULT_RHO, database=None, *_args, **_kwargs)`

Fit a cox proportional hazards model using the Verticox+ algorithm

Parameters:

Name	Type	Description	Default
`client`	`AlgorithmClient`	v6 client provided by the algorithm wrapper	required
`data`	`DataFrame`	dataframe containing the data, provided by algorithm wrapper	required
`feature_columns`	`List[str]`	The columns to be used as features	required
`event_times_column`	`str`	The name of the column that contains the event times	required
`event_happened_column`	`str`	The name of the column that contains whether an event has happened,	required
`include_value`	`any`	The value in the event_happened_column that means the record is NOT right-censored	`True`
`datanode_ids`	`List[int]`	List of organization ids of the nodes that will be used as feature nodes	`None`
`central_node_id`	`int`	Organization id of the node that will be used as the central node. This	`None`
`precision`	`float`	Precision for the Cox model. The algorithm will stop when the difference	`DEFAULT_PRECISION`
`rho`	`float`	Penalty parameter	`DEFAULT_RHO`
`database`	`str \| None`	Name of the database to be used (default is "default")	`None`
`*_args`			`()`
`**_kwargs`			`{}`

Returns: A dictionary containing the coefficients of the model ("coefs") and the baseline hazard function of the model ("baseline_hazard_x" and "baseline_hazard_y").

Source code in python/verticox/vantage6.py

@data(1)
@algorithm_client
def fit(
        client: AlgorithmClient,
        data: pd.DataFrame,
        feature_columns: List[str],
        event_times_column: str,
        event_happened_column: str,
        include_value: any = True,
        datanode_ids: List[int] = None,
        central_node_id: int = None,
        precision: float = DEFAULT_PRECISION,
        rho: float = DEFAULT_RHO,
        database: str|None = None,
        *_args,
        **_kwargs,
):
    """
    Fit a cox proportional hazards model using the Verticox+ algorithm

    Args:
        client: v6 client provided by the algorithm wrapper
        data: dataframe containing the data, provided by algorithm wrapper
        feature_columns: The columns to be used as features
        event_times_column: The name of the column that contains the event times
        event_happened_column: The name of the column that contains whether an event has happened,
        or whether the sample is right censored.
        include_value: The value in the event_happened_column that means the record is NOT right-censored
        datanode_ids: List of organization ids of the nodes that will be used as feature nodes
        central_node_id:  Organization id of the node that will be used as the central node. This
        node should contain the outcome data.
        precision: Precision for the Cox model. The algorithm will stop when the difference
        between iterations falls below this number
        rho: Penalty parameter
        database: Name of the database to be used (default is "default")
        *_args:
        **_kwargs:

    Returns: A dictionary containing the coefficients of the model ("coefs") and the baseline
    hazard function of the model ("baseline_hazard_x" and "baseline_hazard_y").
    """

    # Preprocessing data
    # TODO: This can removed once we move to v6 version 5.x
    columns = Columns(feature_columns, event_times_column, event_happened_column)
    data, columns, data_location = preprocess_data(data, output_dir=DATABASE_DIR,columns=columns )

    info(f"Columns: {columns}")

    manager = node_manager.V6NodeManager(
        client,
        data,
        datanode_ids,
        central_node_id,
        columns.event_happened_column,
        columns.event_times_column,
        columns.feature_columns,
        include_value,
        convergence_precision=precision,
        rho=rho,
        database=database,
    )
    try:
        info(f"Start running verticox on features: {feature_columns}")

        manager.start_nodes()

        start_time = time.time()
        manager.fit()
        end_time = time.time()
        duration = end_time - start_time
        info(f"Verticox algorithm complete after {duration} seconds")

        info("Killing datanodes")
        return {"coefs": manager.coefs,
                "baseline_hazard_x": list(manager.baseline_hazard.x),
                "baseline_hazard_y": list(manager.baseline_hazard.y)
                }
    except Exception as e:
        info(f"Algorithm ended with exception {e}")
        info(traceback.format_exc())
    finally:
        manager.kill_all_algorithms()

`no_op(*args, **kwargs)`

A function that does nothing for a while. It is used as a partial algorithm within the verticox+ algorithm and and should not be called by itself.

Parameters:

Name	Type	Description	Default
`*args`			`()`
`**kwargs`			`{}`

Returns:

Source code in python/verticox/vantage6.py

@data(1)
def no_op(*args, **kwargs):
    """
    A function that does nothing for a while. It is used as a partial algorithm within the verticox+
    algorithm and and should not be called by itself.

    Args:
        *args:
        **kwargs:

    Returns:

    """
    info(f"Sleeping for {NO_OP_TIME}")
    time.sleep(NO_OP_TIME)
    info("Shutting down.")

`run_datanode(data, *args, selected_columns=(), event_time_column=None, include_column=None, include_value=None, external_commodity_address=None, address=None, **kwargs)`

Starts the datanode (feature node) as gRPC server. This function is a partial function called by the main verticox algorithm. It is not meant to be called by itself.

Parameters:

Name	Type	Description	Default
`data`	`DataFrame`	the entire dataset, provided by the algorithm wrapper	required
`include_value`	`bool \| None`	This value in the data means the record is NOT right-censored	`None`
`selected_columns`	`List[str]`	the names of the columns that will be treated as features (covariants) in	`()`
`event_time_column`	`str \| None`	the name of the column that indicates event time	`None`
`include_column`	`str \| None`	the name of the column that indicates whether an event has taken place or whether the sample is right censored. If the value is False, the sample is right censored.	`None`
`external_commodity_address`	`str \| None`	Address of the n-party product protocol commodity server	`None`
`address`		The address where this server will be running.	`None`

Returns: None

Source code in python/verticox/vantage6.py

@data(1)
def run_datanode(
        data: pd.DataFrame,
        *args,
        selected_columns: List[str] = (),
        event_time_column: str|None = None,
        include_column: str|None = None,
        include_value: bool|None = None,
        external_commodity_address: str|None = None,
        address=None,
        **kwargs,
):
    """
    Starts the datanode (feature node) as gRPC server. This function is a partial function called by
    the main verticox algorithm. It is not meant to be called by itself.

    Args:
        data: the entire dataset, provided by the algorithm wrapper
        include_value: This value in the data means the record is NOT right-censored
        selected_columns: the names of the columns that will be treated as features (covariants) in
        the analysis
        event_time_column: the name of the column that indicates event time
        include_column: the name of the column that indicates whether an event has taken
                                place or whether the sample is right censored. If the value is
                                False, the sample is right censored.
        external_commodity_address: Address of the n-party product protocol commodity server
        address: The address where this server will be running.

    Returns: None


    """
    info(f"Selected columns: {selected_columns}")
    info(f"Columns present in dataset: {data.columns}")
    info(f"Event time column: {event_time_column}")
    info(f"Censor column: {include_column}")


    columns = Columns(selected_columns, None, None)

    features, new_columns = preprocess_data(data, columns)

    # The current datanode might not have all the features
    selected_columns = [f for f in new_columns.feature_columns if f in data.columns]
    info(f"Feature columns after filtering: {selected_columns}")
    features = data[selected_columns]

    datanode.serve(
        data=features.values,
        feature_names=selected_columns,
        port=node_manager.PYTHON_PORT,
        include_column=include_column,
        include_value=include_value,
        commodity_address=external_commodity_address,
        address=address,
    )

`run_java_server(_data, *_args, features=None, event_times_column=None, event_happened_column=None, **kwargs)`

Partial function that starts the java server. This function is called by the main verticox+ algorithm (fit or cross_validate) and should not be called by itself. Args: _data: data provided by the vantage6 algorithm wrapper _args: features: list of column names that will be used as features event_times_column: Name of the column that contains the event times event_happened_column: Name of the column that contains whether an event has happened, or whether the sample is right-censored *kwargs:

Source code in python/verticox/vantage6.py

@data(1)
def run_java_server(_data: pd.DataFrame, *_args, features=None,
                    event_times_column=None,
                    event_happened_column=None, **kwargs):
    """
    Partial function that starts the java server. This function is called by the main verticox+
    algorithm (`fit` or `cross_validate`) and should not be called by itself.
    Args:
        _data: data provided by the vantage6 algorithm wrapper
        *_args:
        features: list of column names that will be used as features
        event_times_column: Name of the column that contains the event times
        event_happened_column: Name of the column that contains whether an event has happened,
        or whether the sample is right-censored
        **kwargs:

    """
    info("Starting java server")
    command = _get_java_command()
    info(f"Running command: {command}")
    #target_uri = _move_parquet_file(database)

    columns = Columns(features, event_times_column, event_happened_column)
    data, column_names, data_path = preprocess_data(_data, columns, _get_data_dir())

    subprocess.run(command, env=_get_workaround_sysenv(data_path))

`test_sum_local_features(data, features, mask, *args, **kwargs)`

Obsolete

Parameters:

Name	Type	Default
`data`	`DataFrame`	required
`features`	`List[str]`	required
`mask`		required
`*args`		`()`
`**kwargs`		`{}`

Returns:

Source code in python/verticox/vantage6.py

@data(1)
def test_sum_local_features(
        data: pd.DataFrame, features: List[str], mask, *args, **kwargs
):
    """
    Obsolete

    Args:
        data:
        features:
        mask:
        *args:
        **kwargs:

    Returns:

    """
    # Only check requested features
    data = data[features]

    # Exclude censored data
    data = data[mask]

    return data.sum(axis=0).values

`CrossValResult` `dataclass`

CrossValResult contains the result of a cross-validation task. It contains the c-indices, coefficients and baseline hazard functions for each fold.

Source code in python/verticox/client.py

@dataclass
class CrossValResult:
    """
    CrossValResult contains the result of a cross-validation task. It contains the c-indices,
    coefficients and baseline hazard functions for each fold.
    """
    c_indices: List[float]
    coefs: List[Dict[str, float]]
    baseline_hazards: List[HazardFunction]

    @staticmethod
    def parse(partialResults: list[dict]):
        # Cross validation should only have one partial result
        result = partialResults[0]["result"]
        result = json.loads(result)
        c_indices, coefs, baseline_hazards = result
        baseline_hazards = [HazardFunction(*h) for h in baseline_hazards]

        return CrossValResult(c_indices, coefs, baseline_hazards)

    def plot(self):
        num_folds = len(self.c_indices)
        fig, ax = plt.subplots(num_folds, 2, constrained_layout=True)

        for fold in range(num_folds):
            ax[fold][0].plot(self.baseline_hazards[fold].x, self.baseline_hazards[fold].y)
            ax[fold][0].set_title(f"Baseline hazard fold {fold}")
            ax[fold][1].bar(self.coefs[fold].keys(), self.coefs[fold].values())
            ax[fold][1].set_title(f"Coefficients fold {fold}")

`FitResult` `dataclass`

FitResult contains the result of a fit task. It contains the coefficients and the baseline hazard function.

Source code in python/verticox/client.py

@dataclass
class FitResult:
    """
    FitResult contains the result of a fit task. It contains the coefficients and the baseline
    hazard function.
    """
    coefs: Dict[str, float]
    baseline_hazard: HazardFunction

    @staticmethod
    def parse(results: List[Dict[str, any]]):
        # Assume that there is only one "partial" result
        content = json.loads(results[0]["result"])

        coefs = content["coefs"]
        baseline_hazard = HazardFunction(content["baseline_hazard_x"], content["baseline_hazard_y"])

        return FitResult(coefs, baseline_hazard)

    def plot(self):
        fig, ax = plt.subplots(2, 1, constrained_layout=True)
        ax[0].plot(self.baseline_hazard.x, self.baseline_hazard.y)
        ax[0].set_title("Baseline hazard")
        ax[0].set_xlabel("time")
        ax[0].set_ylabel("hazard score")
        ax[1].bar(self.coefs.keys(), self.coefs.values(), label="coefficients")
        ax[1].set_title("Coefficients")

`Task`

Task is a wrapper around the vantage6 task object.

Source code in python/verticox/client.py

class Task:
    """
    Task is a wrapper around the vantage6 task object.
    """
    def __init__(self, client: Client, task_data):
        self._raw_data = task_data
        self.client = client
        self.task_id = task_data["id"]

    def get_results(self) -> PartialResult:
        """
        Get the results of the task. This will block until the task is finished.

        Returns:

        """
        results = self.client.wait_for_results(self.task_id)
        return self._parse_results(results["data"])


    @staticmethod
    def _parse_results(results) -> FitResult| CrossValResult:
        return results

`get_results()`

Get the results of the task. This will block until the task is finished.

Returns:

Source code in python/verticox/client.py

def get_results(self) -> PartialResult:
    """
    Get the results of the task. This will block until the task is finished.

    Returns:

    """
    results = self.client.wait_for_results(self.task_id)
    return self._parse_results(results["data"])

`VerticoxClient`

Client for running verticox. This client is a wrapper around the vantage6 client to simplify use.

Source code in python/verticox/client.py

class VerticoxClient:
    """
    Client for running verticox. This client is a wrapper around the vantage6 client to simplify
    use.
    """
    def __init__(
            self,
            v6client: Client,
            collaboration=None,
            log_level=logging.INFO,
            image=DOCKER_IMAGE,
    ):
        self._logger = logging.getLogger(__name__)
        self._logger.setLevel(log_level)
        self._v6client = v6client
        self._image = image
        collaborations = self._v6client.collaboration.list()["data"]
        if len(collaborations) > 1:
            raise VerticoxClientException(
                f"You are in multiple collaborations, please specify "
                f"one of:\n {collaborations}"
            )

        self.collaboration_id = collaborations[0]["id"]

    def get_active_node_organizations(self) -> List[int]:
        """
        Get the organization ids of the active nodes in the collaboration.

        Returns: a list of organization ids

        """
        nodes = self._v6client.node.list(is_online=True)

        # TODO: Add pagination support
        nodes = nodes["data"]
        return [n["organization"]["id"] for n in nodes]

    def get_column_names(self, **kwargs):
        """
        Get the column names of the dataset at all active nodes.

        Args:
            **kwargs:

        Returns:

        """
        active_nodes = self.get_active_node_organizations()
        self._logger.debug(f"There are currently {len(active_nodes)} active nodes")

        task = self._run_task(
            "column_names", organizations=active_nodes, master=False, **kwargs
        )
        return task

    def fit(
            self,
            feature_columns,
            outcome_time_column,
            right_censor_column,
            feature_nodes,
            outcome_node,
            precision=_DEFAULT_PRECISION,
            database="default",
    ):
        """
        Run cox proportional hazard analysis on the entire dataset.

        Args:
            feature_columns: a list of column names that you want to use as features
            outcome_time_column: the column name of the outcome time
            right_censor_column: the column name of the binary value that indicates if an event
            happened.
            feature_nodes: A list of node ids from the datasources that contain the feature columns
            outcome_node: The node id of the datasource that contains the outcome
            precision: precision of the verticox algorithm. The smaller the number, the more
            precise the result. Smaller precision will take longer to compute though. The default is
            1e-5
            database: If the nodes have multiple datasources, indicate the label of the datasource
            you would like to use. Otherwise the default will be used.

        Returns: a `Task` object containing info about the task.

        """
        input_params = {
            "feature_columns": feature_columns,
            "event_times_column": outcome_time_column,
            "event_happened_column": right_censor_column,
            "datanode_ids": feature_nodes,
            "central_node_id": outcome_node,
            "precision": precision,
        }

        return self._run_task(
            "fit", True, [outcome_node], kwargs=input_params, database=database
        )

    def cross_validate(self,
                       feature_columns,
                       outcome_time_column,
                       right_censor_column,
                       feature_nodes,
                       outcome_node,
                       precision=_DEFAULT_PRECISION,
                       n_splits = 10,
                       database="default"):
        """
        Run cox proportional hazard analysis on the entire dataset using cross-validation. Uses 10
        fold by default.

        Args:
            feature_columns: a list of column names that you want to use as features
            outcome_time_column: the column name of the outcome time
            right_censor_column: the column name of the binary value that indicates if an event
            happened.
            feature_nodes: A list of node ids from the datasources that contain the feature columns
            outcome_node: The node id of the datasource that contains the outcome
            precision: precision of the verticox algorithm. The smaller the number, the more
            precise the result. Smaller precision will take longer to compute though. The default is
            1e-5
            n_splits: The number of folds to use for cross-validation. Default is 10.
            database: If the nodes have multiple datasources, indicate the label of the datasource
            you would like to use. Otherwise the default will be used.

        Returns: a `Task` object containing info about the task.
        """
        input_params = {
            "feature_columns": feature_columns,
            "event_times_column": outcome_time_column,
            "event_happened_column": right_censor_column,
            "datanode_ids": feature_nodes,
            "central_node_id": outcome_node,
            "convergence_precision": precision,
            "n_splits": n_splits,
        }

        return self._run_task(
            "cross_validate", True, [outcome_node], kwargs=input_params, database=database
        )

    def _run_task(
            self, method, master, organizations: List[int], kwargs=None, database="default"
    ):
        if kwargs is None:
            kwargs = {}
        kwargs["database"] = database
        # TODO: Construct description out of parameters
        description = ""
        name = "method"
        task_input = {"method": method, "master": master, "kwargs": kwargs}

        print(
            f"""
                    task = self.v6client.task.create(collaboration={self.collaboration_id},
                                             organizations={organizations},
                                             name={name},
                                             image={self._image},
                                             description={description},
                                             input={task_input},
                                             data_format={_DATA_FORMAT},
                                             database={database}
                                             )
            """
        )
        task = self._v6client.task.create(
            collaboration=self.collaboration_id,
            organizations=organizations,
            name=name,
            image=self._image,
            description=description,
            input_=task_input,
            databases=database,
        )

        match method:
            case "fit":
                return FitTask(self._v6client, task)
            case "cross_validate":
                return CrossValTask(self._v6client, task)
            case _:
                return Task(self._v6client, task)

`cross_validate(feature_columns, outcome_time_column, right_censor_column, feature_nodes, outcome_node, precision=_DEFAULT_PRECISION, n_splits=10, database='default')`

Run cox proportional hazard analysis on the entire dataset using cross-validation. Uses 10 fold by default.

Parameters:

Name	Description	Default
`feature_columns`	a list of column names that you want to use as features	required
`outcome_time_column`	the column name of the outcome time	required
`right_censor_column`	the column name of the binary value that indicates if an event	required
`feature_nodes`	A list of node ids from the datasources that contain the feature columns	required
`outcome_node`	The node id of the datasource that contains the outcome	required
`precision`	precision of the verticox algorithm. The smaller the number, the more	`_DEFAULT_PRECISION`
`n_splits`	The number of folds to use for cross-validation. Default is 10.	`10`
`database`	If the nodes have multiple datasources, indicate the label of the datasource	`'default'`

Returns: a Task object containing info about the task.

Source code in python/verticox/client.py

def cross_validate(self,
                   feature_columns,
                   outcome_time_column,
                   right_censor_column,
                   feature_nodes,
                   outcome_node,
                   precision=_DEFAULT_PRECISION,
                   n_splits = 10,
                   database="default"):
    """
    Run cox proportional hazard analysis on the entire dataset using cross-validation. Uses 10
    fold by default.

    Args:
        feature_columns: a list of column names that you want to use as features
        outcome_time_column: the column name of the outcome time
        right_censor_column: the column name of the binary value that indicates if an event
        happened.
        feature_nodes: A list of node ids from the datasources that contain the feature columns
        outcome_node: The node id of the datasource that contains the outcome
        precision: precision of the verticox algorithm. The smaller the number, the more
        precise the result. Smaller precision will take longer to compute though. The default is
        1e-5
        n_splits: The number of folds to use for cross-validation. Default is 10.
        database: If the nodes have multiple datasources, indicate the label of the datasource
        you would like to use. Otherwise the default will be used.

    Returns: a `Task` object containing info about the task.
    """
    input_params = {
        "feature_columns": feature_columns,
        "event_times_column": outcome_time_column,
        "event_happened_column": right_censor_column,
        "datanode_ids": feature_nodes,
        "central_node_id": outcome_node,
        "convergence_precision": precision,
        "n_splits": n_splits,
    }

    return self._run_task(
        "cross_validate", True, [outcome_node], kwargs=input_params, database=database
    )

`fit(feature_columns, outcome_time_column, right_censor_column, feature_nodes, outcome_node, precision=_DEFAULT_PRECISION, database='default')`

Run cox proportional hazard analysis on the entire dataset.

Parameters:

Name	Description	Default
`feature_columns`	a list of column names that you want to use as features	required
`outcome_time_column`	the column name of the outcome time	required
`right_censor_column`	the column name of the binary value that indicates if an event	required
`feature_nodes`	A list of node ids from the datasources that contain the feature columns	required
`outcome_node`	The node id of the datasource that contains the outcome	required
`precision`	precision of the verticox algorithm. The smaller the number, the more	`_DEFAULT_PRECISION`
`database`	If the nodes have multiple datasources, indicate the label of the datasource	`'default'`

Returns: a Task object containing info about the task.

Source code in python/verticox/client.py

def fit(
        self,
        feature_columns,
        outcome_time_column,
        right_censor_column,
        feature_nodes,
        outcome_node,
        precision=_DEFAULT_PRECISION,
        database="default",
):
    """
    Run cox proportional hazard analysis on the entire dataset.

    Args:
        feature_columns: a list of column names that you want to use as features
        outcome_time_column: the column name of the outcome time
        right_censor_column: the column name of the binary value that indicates if an event
        happened.
        feature_nodes: A list of node ids from the datasources that contain the feature columns
        outcome_node: The node id of the datasource that contains the outcome
        precision: precision of the verticox algorithm. The smaller the number, the more
        precise the result. Smaller precision will take longer to compute though. The default is
        1e-5
        database: If the nodes have multiple datasources, indicate the label of the datasource
        you would like to use. Otherwise the default will be used.

    Returns: a `Task` object containing info about the task.

    """
    input_params = {
        "feature_columns": feature_columns,
        "event_times_column": outcome_time_column,
        "event_happened_column": right_censor_column,
        "datanode_ids": feature_nodes,
        "central_node_id": outcome_node,
        "precision": precision,
    }

    return self._run_task(
        "fit", True, [outcome_node], kwargs=input_params, database=database
    )

`get_active_node_organizations()`

Get the organization ids of the active nodes in the collaboration.

Returns: a list of organization ids

Source code in python/verticox/client.py

def get_active_node_organizations(self) -> List[int]:
    """
    Get the organization ids of the active nodes in the collaboration.

    Returns: a list of organization ids

    """
    nodes = self._v6client.node.list(is_online=True)

    # TODO: Add pagination support
    nodes = nodes["data"]
    return [n["organization"]["id"] for n in nodes]

`get_column_names(**kwargs)`

Get the column names of the dataset at all active nodes.

Parameters:

Name	Type	Description	Default
`**kwargs`			`{}`

Returns:

Source code in python/verticox/client.py

def get_column_names(self, **kwargs):
    """
    Get the column names of the dataset at all active nodes.

    Args:
        **kwargs:

    Returns:

    """
    active_nodes = self.get_active_node_organizations()
    self._logger.debug(f"There are currently {len(active_nodes)} active nodes")

    task = self._run_task(
        "column_names", organizations=active_nodes, master=False, **kwargs
    )
    return task

API

column_names(data, *args, **kwargs)

cross_validate(client, data, feature_columns, event_times_column, event_happened_column, include_value=True, datanode_ids=None, central_node_id=None, convergence_precision=DEFAULT_PRECISION, rho=DEFAULT_RHO, n_splits=DEFAULT_KFOLD_SPLITS, *_args, **_kwargs)

fit(client, data, feature_columns, event_times_column, event_happened_column, include_value=True, datanode_ids=None, central_node_id=None, precision=DEFAULT_PRECISION, rho=DEFAULT_RHO, database=None, *_args, **_kwargs)

no_op(*args, **kwargs)

run_datanode(data, *args, selected_columns=(), event_time_column=None, include_column=None, include_value=None, external_commodity_address=None, address=None, **kwargs)

run_java_server(_data, *_args, features=None, event_times_column=None, event_happened_column=None, **kwargs)

test_sum_local_features(data, features, mask, *args, **kwargs)

CrossValResult dataclass

FitResult dataclass

Task

get_results()

VerticoxClient

cross_validate(feature_columns, outcome_time_column, right_censor_column, feature_nodes, outcome_node, precision=_DEFAULT_PRECISION, n_splits=10, database='default')

fit(feature_columns, outcome_time_column, right_censor_column, feature_nodes, outcome_node, precision=_DEFAULT_PRECISION, database='default')

get_active_node_organizations()

get_column_names(**kwargs)

`column_names(data, *args, **kwargs)`

`cross_validate(client, data, feature_columns, event_times_column, event_happened_column, include_value=True, datanode_ids=None, central_node_id=None, convergence_precision=DEFAULT_PRECISION, rho=DEFAULT_RHO, n_splits=DEFAULT_KFOLD_SPLITS, *_args, **_kwargs)`

`fit(client, data, feature_columns, event_times_column, event_happened_column, include_value=True, datanode_ids=None, central_node_id=None, precision=DEFAULT_PRECISION, rho=DEFAULT_RHO, database=None, *_args, **_kwargs)`

`no_op(*args, **kwargs)`

`run_datanode(data, *args, selected_columns=(), event_time_column=None, include_column=None, include_value=None, external_commodity_address=None, address=None, **kwargs)`

`run_java_server(_data, *_args, features=None, event_times_column=None, event_happened_column=None, **kwargs)`

`test_sum_local_features(data, features, mask, *args, **kwargs)`

`CrossValResult` `dataclass`

`FitResult` `dataclass`

`Task`

`get_results()`

`VerticoxClient`

`cross_validate(feature_columns, outcome_time_column, right_censor_column, feature_nodes, outcome_node, precision=_DEFAULT_PRECISION, n_splits=10, database='default')`

`fit(feature_columns, outcome_time_column, right_censor_column, feature_nodes, outcome_node, precision=_DEFAULT_PRECISION, database='default')`

`get_active_node_organizations()`

`get_column_names(**kwargs)`