Source code for tsfresh.examples.robot_execution_failures

# -*- coding: utf-8 -*-
# This file as well as the whole tsfresh package are licenced under the MIT licence (see the LICENCE.txt)
# Maximilian Christ (maximilianchrist.com), Blue Yonder Gmbh, 2016

"""
This module implements functions to download the Robot Execution Failures LP1 Data Set [1]_, [2]_, [3]_ and load it as
as DataFrame.

*Important:* You need to download the data set yourself, either manually or via the function
:func:`~tsfresh.examples.robot_execution_failures.download_robot_execution_failures`

References
----------
.. [1] https://archive.ics.uci.edu/ml/datasets/Robot+Execution+Failures
.. [2] Lichman, M. (2013).
    UCI Machine Learning Repository [https://archive.ics.uci.edu/ml].
    Irvine, CA: University of California, School of Information and Computer Science.
.. [3] Camarinha-Matos, L.M., L. Seabra Lopes, and J. Barata (1996).
    Integration and Learning in Supervision of Flexible Assembly Systems.
    "IEEE Transactions on Robotics and Automation", 12 (2), 202-219

"""

import logging
import os
from builtins import map

import pandas as pd
import requests

_logger = logging.getLogger(__name__)

UCI_MLD_REF_MSG = (
    "The example data could not be found. You need to download the Robot Execution Failures "
    "LP1 Data Set from the UCI Machine Learning Repository. To do so, you can call the function "
    "tsfresh.examples.robot_execution_failures.download_robot_execution_failures"
)
UCI_MLD_REF_URL = "https://raw.githubusercontent.com/MaxBenChrist/robot-failure-dataset/master/lp1.data.txt"

module_path = os.path.dirname(__file__)
data_file_name = os.path.join(module_path, "data", "robotfailure-mld", "lp1.data")


[docs] def download_robot_execution_failures(file_name=data_file_name): """ Download the Robot Execution Failures LP1 Data Set[#1] from the UCI Machine Learning Repository [#2] and store it locally. :return: Examples ======== >>> from tsfresh.examples import download_robot_execution_failures >>> download_robot_execution_failures() """ if os.path.exists(file_name): _logger.warning( "You have already downloaded the Robot Execution Failures LP1 Data Set." ) return os.makedirs(os.path.dirname(file_name), exist_ok=True) if not os.access(os.path.dirname(file_name), os.W_OK): raise RuntimeError( "You don't have the necessary permissions to download the Robot Execution Failures LP1 Data " "Set into the module path. Consider installing the module in a virtualenv you " "own or run this function with appropriate permissions." ) r = requests.get(UCI_MLD_REF_URL) if r.status_code != 200: raise RuntimeError( "Could not download the Robot Execution Failures LP1 Data Set from the UCI Machine Learning " "Repository. HTTP status code: {}".format(r.status_code) ) with open(file_name, "w") as f: f.write(r.text)
[docs] def load_robot_execution_failures(multiclass=False, file_name=data_file_name): """ Load the Robot Execution Failures LP1 Data Set[1]. The Time series are passed as a flat DataFrame. Examples ======== >>> from tsfresh.examples import load_robot_execution_failures >>> df, y = load_robot_execution_failures() >>> print(df.shape) (1320, 8) :param multiclass: If True, return all target labels. The default returns only "normal" vs all other labels. :type multiclass: bool :return: time series data as :class:`pandas.DataFrame` and target vector as :class:`pandas.Series` :rtype: tuple """ if not os.path.exists(file_name): raise RuntimeError(UCI_MLD_REF_MSG) id_to_target = {} df_rows = [] with open(file_name) as f: cur_id = 0 time = 0 for line in f.readlines(): # New sample --> increase id, reset time and determine target if line[0] not in ["\t", "\n"]: cur_id += 1 time = 0 if multiclass: id_to_target[cur_id] = line.strip() else: id_to_target[cur_id] = line.strip() == "normal" # Data row --> split and convert values, create complete df row elif line[0] == "\t": values = list(map(int, line.split("\t")[1:])) df_rows.append([cur_id, time] + values) time += 1 df = pd.DataFrame( df_rows, columns=["id", "time", "F_x", "F_y", "F_z", "T_x", "T_y", "T_z"] ) y = pd.Series(id_to_target) return df, y