Source code for tsfresh.examples.robot_execution_failures

# -*- coding: utf-8 -*-
# This file as well as the whole tsfresh package are licenced under the MIT licence (see the LICENCE.txt)
# Maximilian Christ (maximilianchrist.com), Blue Yonder Gmbh, 2016

"""
This module implements functions to download the Robot Execution Failures LP1 Data Set[1] and load it as as DataFrame.

*Important:* You need to download the data set yourself, either manually or via the function
:func:`~tsfresh.examples.robot_execution_failures.download_robot_execution_failures`

References
----------
.. [1] http://mlr.cs.umass.edu/ml/datasets/Robot+Execution+Failures
.. [2] Lichman, M. (2013).
    UCI Machine Learning Repository [http://mlr.cs.umass.edu/ml].
    Irvine, CA: University of California, School of Information and Computer Science.
.. [3] Camarinha-Matos, L.M., L. Seabra Lopes, and J. Barata (1996).
    Integration and Learning in Supervision of Flexible Assembly Systems.
    "IEEE Transactions on Robotics and Automation", 12 (2), 202-219

"""

from __future__ import absolute_import, division

from builtins import map
import os
import pandas as pd
import requests
import logging


_logger = logging.getLogger(__name__)


UCI_MLD_REF_MSG = ("The example data could not be found. You need to download the Robot Execution Failures "
                   "LP1 Data Set from the UCI Machine Learning Repository. To do so, you can call the function "
                  "tsfresh.examples.robot_execution_failures.download_robot_execution_failures")
UCI_MLD_REF_URL = "https://raw.githubusercontent.com/MaxBenChrist/robot-failure-dataset/master/lp1.data.txt"


module_path = os.path.dirname(__file__)
data_file_name = os.path.join(module_path, 'data', 'robotfailure-mld', 'lp1.data')


[docs]def download_robot_execution_failures(): """ Download the Robot Execution Failures LP1 Data Set[1] from the UCI Machine Learning Repository[2] and store it locally. :return: Examples ======== >>> from tsfresh.examples import download_robot_execution_failures >>> download_robot_execution_failures_lp1() """ if os.path.exists(data_file_name): _logger.warning("You have already downloaded the Robot Execution Failures LP1 Data Set.") return if not os.access(module_path, os.W_OK): raise RuntimeError("You don't have the necessary permissions to download the Robot Execution Failures LP1 Data " "Set into the module path. Consider installing the module in a virtualenv virtualenv you " "own or run this function with appropriate permissions.") os.makedirs(os.path.dirname(data_file_name)) r = requests.get(UCI_MLD_REF_URL) if r.status_code != 200: raise RuntimeError("Could not download the Robot Execution Failures LP1 Data Set from the UCI Machine Learning " "Repository. HTTP status code: {}".format(r.status_code)) with open(data_file_name, "w") as f: f.write(r.text)
[docs]def load_robot_execution_failures(): """ Load the Robot Execution Failures LP1 Data Set[1]. The Time series are passed as a flat DataFrame. Examples ======== >>> from tsfresh.examples import load_robot_execution_failures >>> df, y = load_robot_execution_failures() >>> print(df.shape) (1320, 8) :return: time series data as :class:`pandas.DataFrame` and target vector as :class:`pandas.Series` :rtype: tuple """ if not os.path.exists(data_file_name): raise RuntimeError(UCI_MLD_REF_MSG) id_to_target = {} df_rows = [] with open(data_file_name) as f: cur_id = 0 time = 0 for line in f.readlines(): # New sample --> increase id, reset time and determine target if line[0] not in ['\t', '\n']: cur_id += 1 time = 0 if line.strip() == 'normal': id_to_target[cur_id] = 0 else: id_to_target[cur_id] = 1 # Data row --> split and convert values, create complete df row elif line[0] == '\t': values = list(map(int, line.split('\t')[1:])) df_rows.append([cur_id, time] + values) time += 1 df = pd.DataFrame(df_rows, columns=['id', 'time', 'a', 'b', 'c', 'd', 'e', 'f']) y = pd.Series(id_to_target) return df, y