Source code for tsfresh.scripts.run_tsfresh

# -*- coding: utf-8 -*-
# This file as well as the whole tsfresh package are licenced under the MIT licence (see the LICENCE.txt)
# Maximilian Christ (maximilianchrist.com), Blue Yonder Gmbh, 2017
"""
This script can be run with:


.. code-block:: bash

   python run_tsfresh.py path_to_your_csv.csv

A corresponding csv containing time series features will be
saved as features_path_to_your_csv.csv

There are a few limitations though

- Currently this only samples to first 50 values.
- Your csv must be space delimited.
- Output is saved as path_to_your_csv.features.csv

"""

import argparse
import os
import sys

import pandas as pd

from tsfresh import extract_features


def _preprocess(df):
    """
    given a DataFrame where records are stored row-wise, rearrange it
    such that records are stored column-wise.
    """

    df = df.stack()

    df.index.rename(["id", "time"], inplace=True)  # .reset_index()
    df.name = "value"
    df = df.reset_index()

    return df


[docs] def main(console_args=None): parser = argparse.ArgumentParser( description="Extract features from time series stored in a CSV file and " "write them back into another CSV file. The time series in the CSV " "file should either have one of the dataframe-formats described in " "http://tsfresh.readthedocs.io/en/latest/text/data_formats.html, " "which means you have to supply the --csv-with-headers flag " "or should be in the form " "[time series 1 values ..., time series 2 values ...] " "where you should not add the --csv-with-headers flag. " "The CSV is expected to be space-separated." ) parser.add_argument( "input_file_name", help="File name of the input CSV file to read in." ) parser.add_argument( "--output-file-name", help="File name of the output CSV file to write to. " "Defaults to input_file_name.features.csv", default=None, ) parser.add_argument( "--column-sort", help="Column name to be used to sort the rows. " "Only available when --csv-with-headers is enabled.", default=None, ) parser.add_argument( "--column-kind", help="Column name where the kind column can be found." "Only available when --csv-with-headers is enabled.", default=None, ) parser.add_argument( "--column-value", help="Column name where the values can be found." "Only available when --csv-with-headers is enabled.", default=None, ) parser.add_argument( "--column-id", help="Column name where the ids can be found." "Only available when --csv-with-headers is enabled.", default=None, ) parser.add_argument("--csv-with-headers", action="store_true", help="") print(console_args) args = parser.parse_args(console_args) if ( args.column_id or args.column_kind or args.column_sort or args.column_value ) and (not args.csv_with_headers): raise AttributeError( "You can only pass in column-value, column-kind, column-id or column-sort if " "--csv-with-headers is enabled." ) if args.csv_with_headers: column_kind = args.column_kind column_sort = args.column_sort column_value = args.column_value column_id = args.column_id header = 0 else: column_kind = None column_sort = "time" column_value = "value" column_id = "id" header = None # Read in CSV file input_file_name = args.input_file_name df = pd.read_csv(input_file_name, delim_whitespace=True, header=header) if not args.csv_with_headers: df = _preprocess(df) df_features = extract_features( df, column_kind=column_kind, column_sort=column_sort, column_value=column_value, column_id=column_id, ) # re-cast index from float to int df_features.index = df_features.index.astype("int") # write to disk default_out_file_name = os.path.splitext(input_file_name)[0] + ".features.csv" output_file_name = args.output_file_name or default_out_file_name df_features.to_csv(output_file_name)
if __name__ == "__main__": main()