Source code for tsfresh.scripts.run_tsfresh

"""
Run the script with:
```
python run_tsfresh.py path_to_your_csv.csv

- Currently this only samples to first 50 values.
- Your csv must be space delimited.
- Output is saved as path_to_your_csv.features.csv

```
e.g.:
```
python run_tsfresh.py data.txt
```

A corresponding csv containing time series features will be 
saved as features_path_to_your_csv.csv
"""

import pandas as pd
import sys
from tsfresh import extract_features
import argparse
import os


def _preprocess(df):
    """
    given a dataframe where records are stored row-wise, rearrange it
    such that records are stored column-wise.
    """

    df = df.stack()

    df.index.rename(["id", "time"], inplace=True)  # .reset_index()
    df.name = "value"
    df = df.reset_index()

    return df


[docs]def main(console_args=None):
    parser = argparse.ArgumentParser(description="Extract features from time series stored in a CSV file and "
                                                 "write them back into another CSV file. The time series in the CSV "
                                                 "file should either have one of the dataframe-formats described in "
                                                 "http://tsfresh.readthedocs.io/en/latest/text/data_formats.html, "
                                                 "which means you have to supply the --csv-with-headers flag "
                                                 "or should be in the form "
                                                 "[time series 1 values ..., time series 2 values ...] "
                                                 "where you should not add the --csv-with-headers flag. "
                                                 "The CSV is expected to be space-separated.")
    parser.add_argument("input_file_name", help="File name of the input CSV file to read in.")
    parser.add_argument("--output-file-name", help="File name of the output CSV file to write to. "
                                                   "Defaults to input_file_name.features.csv",
                        default=None)

    parser.add_argument("--column-sort", help="Column name to be used to sort the rows. "
                                              "Only available when --csv-with-headers is enabled.",
                        default=None)
    parser.add_argument("--column-kind", help="Column name where the kind column can be found."
                                              "Only available when --csv-with-headers is enabled.",
                        default=None)
    parser.add_argument("--column-value", help="Column name where the values can be found."
                                               "Only available when --csv-with-headers is enabled.",
                        default=None)
    parser.add_argument("--column-id", help="Column name where the ids can be found."
                                            "Only available when --csv-with-headers is enabled.",
                        default=None)

    parser.add_argument('--csv-with-headers', action='store_true', help="")
    print(console_args)
    args = parser.parse_args(console_args)

    if (args.column_id or args.column_kind or args.column_sort or args.column_value) and (not args.csv_with_headers):
        raise AttributeError("You can only pass in column-value, column-kind, column-id or column-sort if "
                             "--csv-with-headers is enabled.")

    if args.csv_with_headers:
        column_kind = args.column_kind
        column_sort = args.column_sort
        column_value = args.column_value
        column_id = args.column_id
        header = 0
    else:
        column_kind = None
        column_sort = "time"
        column_value = "value"
        column_id = "id"
        header = None

    # Read in CSV file
    input_file_name = args.input_file_name
    df = pd.read_csv(input_file_name, delim_whitespace=True, header=header)

    if not args.csv_with_headers:
        df = _preprocess(df)

    df_features = extract_features(df, column_kind=column_kind,
                                   column_sort=column_sort, column_value=column_value,
                                   column_id=column_id)

    # re-cast index from float to int
    df_features.index = df_features.index.astype('int')

    # write to disk
    default_out_file_name = os.path.splitext(input_file_name)[0] + '.features.csv'
    output_file_name = args.output_file_name or default_out_file_name
    df_features.to_csv(output_file_name)


if __name__ == '__main__':
    main()