Skip to content

ADLStream.evaluation.BaseEvaluator

Abstract base evaluator

This is the base class for implementing a custom evaluator.

Every Evaluator must have the properties below and implement evaluate with the signature (new_results, instances) = evaluate(). The evaluate function should contain the logic to: - Get validation metrics from validation data (self.y_eval, self.o_eval and self.x_eval). - Save metrics in self.metric_history. - Remove already evaluated data (y_eval, o_eval and x_eval) to keep memory free. - Return new computed accuracy and count of number of instances evaluated.

Examples:

    class MinimalEvaluator(BaseEvaluator):

        def __init__(self, metric='kappa', **kwargs):
            self.metric = metric
            super().__init__(**kwargs)

        def evaluate(self):
            new_results = []
            instances = []
            current_instance = len(self.metric_history)

            while self.y_eval and self.o_eval:
                # Get metric
                new_metric = metrics.evaluate(
                    self.metric,
                    self.y_eval[0]
                    self.o_eval[0]
                )

                # Save metric
                self.metric_history.append(new_metric)

                # Remove evaluated data
                self.y_eval = self.y_eval[1:]
                self.o_eval = self.o_eval[1:]
                self.x_eval = self.x_eval[1:]

                # Add number of instances evaluated
                current_instance += 1
                instances.append(current_instance)

            retun new_results, instances

Parameters:

Name Type Description Default
results_file str

Name of the csv file where to write results. If None, no csv file is created. Defaults to "ADLStream.csv".

'ADLStream.csv'
dataset_name str

Name of the data to validate. Defaults to None.

None
show_plot bool

Whether to plot the evolution of the metric. Defaults to True.

True
plot_file str

Name of the plot image file. If None, no image is saved. Defaults to None.

None
xlabel str

x-axis label of the evolution plot. Defaults to "".

''
Source code in ADLStream/evaluation/base_evaluator.py
class BaseEvaluator(ABC):
    """Abstract base evaluator

    This is the base class for implementing a custom evaluator.

    Every `Evaluator` must have the properties below and implement `evaluate` with the
    signature `(new_results, instances) = evaluate()`. The `evaluate` function should 
    contain the logic to: 
        - Get validation metrics from validation data (`self.y_eval`, `self.o_eval`
          and `self.x_eval`).
        - Save metrics in `self.metric_history`.
        - Remove already evaluated data (`y_eval`, `o_eval` and `x_eval`) to keep memory
          free.
        - Return new computed accuracy and count of number of instances evaluated.

    Examples:
    ```python
        class MinimalEvaluator(BaseEvaluator):

            def __init__(self, metric='kappa', **kwargs):
                self.metric = metric
                super().__init__(**kwargs)

            def evaluate(self):
                new_results = []
                instances = []
                current_instance = len(self.metric_history)

                while self.y_eval and self.o_eval:
                    # Get metric
                    new_metric = metrics.evaluate(
                        self.metric,
                        self.y_eval[0]
                        self.o_eval[0]
                    )

                    # Save metric
                    self.metric_history.append(new_metric)

                    # Remove evaluated data
                    self.y_eval = self.y_eval[1:]
                    self.o_eval = self.o_eval[1:]
                    self.x_eval = self.x_eval[1:]

                    # Add number of instances evaluated
                    current_instance += 1
                    instances.append(current_instance)

                retun new_results, instances
    ```

    Arguments:
        results_file (str, optional): Name of the csv file where to write results.
            If None, no csv file is created.
            Defaults to "ADLStream.csv".
        dataset_name (str, optional): Name of the data to validate.
            Defaults to None.
        show_plot (bool, optional): Whether to plot the evolution of the metric.
            Defaults to True.
        plot_file (str, optional): Name of the plot image file.
            If None, no image is saved.
            Defaults to None.
        xlabel (str, optional): x-axis label of the evolution plot.
            Defaults to "".
    """

    def __init__(
        self,
        results_file="ADLStream.csv",
        dataset_name=None,
        show_plot=True,
        plot_file=None,
        xlabel="",
    ):
        self.results_file = results_file
        self.dataset_name = dataset_name
        self.show_plot = show_plot
        self.plot_file = plot_file
        self.xlabel = xlabel

        self.x_eval = []
        self.y_eval = []
        self.o_eval = []
        self.metric_history = []

        self._create_results_file()

        self.fig = None
        self.ax = None
        self.line = None
        self.xlim = (0, 1)
        self.ylim = (0, 0.00001)
        self.xdata = []
        self.ydata = []

        self._initialize_plot()

    def _create_results_file(self):
        if self.results_file is not None:
            with open(self.results_file, "w") as f:
                f.write("timestamp,instances,metric\n")

    def _initialize_plot(self):
        if self.show_plot or self.plot_file is not None:
            fig, ax = plt.subplots()
            (line,) = ax.plot([], [], lw=2, label=self.xlabel)

            ax.grid()
            ax.set_title("ADLStream - {}".format(self.dataset_name))
            ax.set_ylabel(self.xlabel)
            ax.set_xlabel("Instances")

            ax.set_xlim(self.xlim)
            ax.set_ylim(self.ylim)
            ax.legend()

            self.fig = fig
            self.ax = ax
            self.line = line

    @abstractmethod
    def evaluate(self):
        """Function that contains the main logic of the evaluator.
        In a generic scheme, this function should:  
            - Get validation metrics from validation data (`self.y_eval`, `self.o_eval`
              and `self.x_eval`).
            - Save metrics in `self.metric_history`.
            - Remove already evaluated data (`y_eval`, `o_eval` and `x_eval`) to keep 
              memory free.
            - Return new computed metrics and count of number of instances evaluated.

        Raises:
            NotImplementedError: This is an abstract method which should be implemented.

        Returns:
            new_metrics (list)
            instances(list)
        """
        raise NotImplementedError("Abstract method")

    def write_results(self, new_results, instances):
        if self.results_file is not None:
            with open(self.results_file, "a") as f:
                for i, value in enumerate(new_results):
                    f.write(
                        "{},{},{}\n".format(str(datetime.now()), instances[i], value,)
                    )

    def update_plot(self, new_results, instances):
        if self.show_plot or self.plot_file is not None:
            self.ydata += new_results
            self.xdata += instances

            self.xlim = (
                self.xlim[0],
                self.xlim[1] if self.xdata[-1] < self.xlim[1] else self.xdata[-1] + 1,
            )
            self.ylim = (
                self.ylim[0]
                if min(new_results) >= self.ylim[0]
                else min(new_results) - (min(new_results)) * 0.1,
                self.ylim[1]
                if max(new_results) <= self.ylim[1]
                else max(new_results) + max(new_results) * 0.1,
            )
            self.ax.set_xlim(self.xlim)
            self.ax.set_ylim(self.ylim)

            self.line.set_data(self.xdata, self.ydata)
            self.line.set_label("{} ({:.4f})".format(self.xlabel, self.ydata[-1]))
            self.ax.legend(labels=["{} ({:.4f})".format(self.xlabel, self.ydata[-1])])
            plt.pause(0.0001)

    def update_predictions(self, context):
        """Gets new predictions from ADLStream context

        Args:
            context (ADLStreamContext)
        """
        x, y, o = context.get_predictions()
        self.x_eval += x
        self.y_eval += y
        self.o_eval += o

    def run(self, context):
        """Run evaluator

        This function update predictions from context, evaluate them and update result
        file and result plot.

        Args:
            context (ADLStreamContext)
        """
        while not context.is_finished():
            self.update_predictions(context)
            new_results, instances = self.evaluate()
            if new_results:
                self.write_results(new_results, instances)
                self.update_plot(new_results, instances)

        if self.plot_file is not None:
            self.fig.savefig(self.plot_file)
        if self.show_plot:
            plt.show()

evaluate(self)

Function that contains the main logic of the evaluator. In a generic scheme, this function should:
- Get validation metrics from validation data (self.y_eval, self.o_eval and self.x_eval). - Save metrics in self.metric_history. - Remove already evaluated data (y_eval, o_eval and x_eval) to keep memory free. - Return new computed metrics and count of number of instances evaluated.

Exceptions:

Type Description
NotImplementedError

This is an abstract method which should be implemented.

Returns:

Type Description

new_metrics (list) instances(list)

Source code in ADLStream/evaluation/base_evaluator.py
@abstractmethod
def evaluate(self):
    """Function that contains the main logic of the evaluator.
    In a generic scheme, this function should:  
        - Get validation metrics from validation data (`self.y_eval`, `self.o_eval`
          and `self.x_eval`).
        - Save metrics in `self.metric_history`.
        - Remove already evaluated data (`y_eval`, `o_eval` and `x_eval`) to keep 
          memory free.
        - Return new computed metrics and count of number of instances evaluated.

    Raises:
        NotImplementedError: This is an abstract method which should be implemented.

    Returns:
        new_metrics (list)
        instances(list)
    """
    raise NotImplementedError("Abstract method")

run(self, context)

Run evaluator

This function update predictions from context, evaluate them and update result file and result plot.

Source code in ADLStream/evaluation/base_evaluator.py
def run(self, context):
    """Run evaluator

    This function update predictions from context, evaluate them and update result
    file and result plot.

    Args:
        context (ADLStreamContext)
    """
    while not context.is_finished():
        self.update_predictions(context)
        new_results, instances = self.evaluate()
        if new_results:
            self.write_results(new_results, instances)
            self.update_plot(new_results, instances)

    if self.plot_file is not None:
        self.fig.savefig(self.plot_file)
    if self.show_plot:
        plt.show()

update_predictions(self, context)

Gets new predictions from ADLStream context

Source code in ADLStream/evaluation/base_evaluator.py
def update_predictions(self, context):
    """Gets new predictions from ADLStream context

    Args:
        context (ADLStreamContext)
    """
    x, y, o = context.get_predictions()
    self.x_eval += x
    self.y_eval += y
    self.o_eval += o