Source code for fireant.tests.slicer.widgets.test_pandas

import copy
from unittest import TestCase

import numpy as np
import pandas as pd
import pandas.testing

from fireant.slicer.widgets.pandas import Pandas
from fireant.tests.slicer.mocks import (
    CumSum,
    ElectionOverElection,
    cat_dim_df,
    cont_cat_dim_df,
    cont_dim_df,
    cont_dim_operation_df,
    cont_uni_dim_df,
    cont_uni_dim_ref_df,
    multi_metric_df,
    no_index_df,
    single_metric_df,
    slicer,
    uni_dim_df,
)
from fireant.utils import (
    format_dimension_key as fd,
    format_metric_key as fm,
)


[docs]class PandasTransformerTests(TestCase): maxDiff = None
[docs] def test_single_metric(self): result = Pandas(slicer.metrics.votes) \ .transform(single_metric_df, slicer, [], []) expected = single_metric_df.copy()[[fm('votes')]] expected.columns = ['Votes'] expected.columns.name = 'Metrics' pandas.testing.assert_frame_equal(expected, result)
[docs] def test_multiple_metrics(self): result = Pandas(slicer.metrics.votes, slicer.metrics.wins) \ .transform(multi_metric_df, slicer, [], []) expected = multi_metric_df.copy()[[fm('votes'), fm('wins')]] expected.columns = ['Votes', 'Wins'] expected.columns.name = 'Metrics' pandas.testing.assert_frame_equal(expected, result)
[docs] def test_multiple_metrics_reversed(self): result = Pandas(slicer.metrics.wins, slicer.metrics.votes) \ .transform(multi_metric_df, slicer, [], []) expected = multi_metric_df.copy()[[fm('wins'), fm('votes')]] expected.columns = ['Wins', 'Votes'] expected.columns.name = 'Metrics' pandas.testing.assert_frame_equal(expected, result)
[docs] def test_time_series_dim(self): result = Pandas(slicer.metrics.wins) \ .transform(cont_dim_df, slicer, [slicer.dimensions.timestamp], []) expected = cont_dim_df.copy()[[fm('wins')]] expected.index.names = ['Timestamp'] expected.columns = ['Wins'] expected.columns.name = 'Metrics' pandas.testing.assert_frame_equal(expected, result)
[docs] def test_time_series_dim_with_operation(self): result = Pandas(CumSum(slicer.metrics.votes)) \ .transform(cont_dim_operation_df, slicer, [slicer.dimensions.timestamp], []) expected = cont_dim_operation_df.copy()[[fm('cumsum(votes)')]] expected.index.names = ['Timestamp'] expected.columns = ['CumSum(Votes)'] expected.columns.name = 'Metrics' pandas.testing.assert_frame_equal(expected, result)
[docs] def test_cat_dim(self): result = Pandas(slicer.metrics.wins) \ .transform(cat_dim_df, slicer, [slicer.dimensions.political_party], []) expected = cat_dim_df.copy()[[fm('wins')]] expected.index = pd.Index(['Democrat', 'Independent', 'Republican'], name='Party') expected.columns = ['Wins'] expected.columns.name = 'Metrics' pandas.testing.assert_frame_equal(expected, result)
[docs] def test_uni_dim(self): result = Pandas(slicer.metrics.wins) \ .transform(uni_dim_df, slicer, [slicer.dimensions.candidate], []) expected = uni_dim_df.copy() \ .set_index(fd('candidate_display'), append=True) \ .reset_index(fd('candidate'), drop=True) \ [[fm('wins')]] expected.index.names = ['Candidate'] expected.columns = ['Wins'] expected.columns.name = 'Metrics' pandas.testing.assert_frame_equal(expected, result)
[docs] def test_uni_dim_no_display_definition(self): import copy candidate = copy.copy(slicer.dimensions.candidate) uni_dim_df_copy = uni_dim_df.copy() del uni_dim_df_copy[fd(slicer.dimensions.candidate.display.key)] del candidate.display result = Pandas(slicer.metrics.wins) \ .transform(uni_dim_df_copy, slicer, [candidate], []) expected = uni_dim_df_copy.copy()[[fm('wins')]] expected.index.names = ['Candidate'] expected.columns = ['Wins'] expected.columns.name = 'Metrics' pandas.testing.assert_frame_equal(expected, result)
[docs] def test_multi_dims_time_series_and_uni(self): result = Pandas(slicer.metrics.wins) \ .transform(cont_uni_dim_df, slicer, [slicer.dimensions.timestamp, slicer.dimensions.state], []) expected = cont_uni_dim_df.copy() \ .set_index(fd('state_display'), append=True) \ .reset_index(fd('state'), drop=False)[[fm('wins')]] expected.index.names = ['Timestamp', 'State'] expected.columns = ['Wins'] expected.columns.name = 'Metrics' pandas.testing.assert_frame_equal(expected, result)
[docs] def test_transpose_single_dimension(self): result = Pandas(slicer.metrics.wins, transpose=True) \ .transform(cat_dim_df, slicer, [slicer.dimensions.political_party], []) expected = cat_dim_df.copy()[[fm('wins')]] expected.index = pd.Index(['Democrat', 'Independent', 'Republican'], name='Party') expected.columns = ['Wins'] expected.columns.name = 'Metrics' expected = expected.transpose() pandas.testing.assert_frame_equal(expected, result)
[docs] def test_pivoted_single_dimension_transposes_data_frame(self): result = Pandas(slicer.metrics.wins, pivot=[slicer.dimensions.political_party]) \ .transform(cat_dim_df, slicer, [slicer.dimensions.political_party], []) expected = cat_dim_df.copy()[[fm('wins')]] expected.index = pd.Index(['Democrat', 'Independent', 'Republican'], name='Party') expected.columns = ['Wins'] expected.columns.name = 'Metrics' expected = expected.transpose() pandas.testing.assert_frame_equal(expected, result)
[docs] def test_pivoted_multi_dims_time_series_and_cat(self): result = Pandas(slicer.metrics.wins, pivot=[slicer.dimensions.political_party]) \ .transform(cont_cat_dim_df, slicer, [slicer.dimensions.timestamp, slicer.dimensions.political_party], []) expected = cont_cat_dim_df.copy()[[fm('wins')]] expected = expected.unstack(level=[1]).fillna(value='') expected.index.names = ['Timestamp'] expected.columns = ['Democrat', 'Independent', 'Republican'] expected.columns.names = ['Party'] pandas.testing.assert_frame_equal(expected, result)
[docs] def test_pivoted_multi_dims_time_series_and_uni(self): result = Pandas(slicer.metrics.votes, pivot=[slicer.dimensions.state]) \ .transform(cont_uni_dim_df, slicer, [slicer.dimensions.timestamp, slicer.dimensions.state], []) expected = cont_uni_dim_df.copy() \ .set_index(fd('state_display'), append=True) \ .reset_index(fd('state'), drop=True)[[fm('votes')]] expected = expected.unstack(level=[1]) expected.index.names = ['Timestamp'] expected.columns = ['California', 'Texas'] expected.columns.names = ['State'] pandas.testing.assert_frame_equal(expected, result)
[docs] def test_time_series_ref(self): result = Pandas(slicer.metrics.votes) \ .transform(cont_uni_dim_ref_df, slicer, [ slicer.dimensions.timestamp, slicer.dimensions.state ], [ ElectionOverElection(slicer.dimensions.timestamp) ]) expected = cont_uni_dim_ref_df.copy() \ .set_index(fd('state_display'), append=True) \ .reset_index(fd('state'), drop=True)[[fm('votes'), fm('votes_eoe')]] expected.index.names = ['Timestamp', 'State'] expected.columns = ['Votes', 'Votes (EoE)'] expected.columns.name = 'Metrics' pandas.testing.assert_frame_equal(expected, result)
[docs] def test_metric_format(self): import copy votes = copy.copy(slicer.metrics.votes) votes.prefix = '$' votes.suffix = '€' votes.precision = 2 # divide the data frame by 3 to get a repeating decimal so we can check precision result = Pandas(votes) \ .transform(cont_dim_df / 3, slicer, [slicer.dimensions.timestamp], []) expected = cont_dim_df.copy()[[fm('votes')]] expected[fm('votes')] = ['${0:,.2f}€'.format(x) for x in expected[fm('votes')] / 3] expected.index.names = ['Timestamp'] expected.columns = ['Votes'] expected.columns.name = 'Metrics' pandas.testing.assert_frame_equal(expected, result)
[docs] def test_nan_in_metrics(self): cat_dim_df_with_nan = cat_dim_df.copy() cat_dim_df_with_nan['$m$wins'] = cat_dim_df_with_nan['$m$wins'].apply(float) cat_dim_df_with_nan.iloc[2, 1] = np.nan result = Pandas(slicer.metrics.wins) \ .transform(cat_dim_df_with_nan, slicer, [slicer.dimensions.political_party], []) expected = cat_dim_df_with_nan.copy()[[fm('wins')]] expected.index = pd.Index(['Democrat', 'Independent', 'Republican'], name='Party') expected.columns = ['Wins'] expected.columns.name = 'Metrics' pandas.testing.assert_frame_equal(expected, result)
[docs] def test_inf_in_metrics(self): cat_dim_df_with_nan = cat_dim_df.copy() cat_dim_df_with_nan['$m$wins'] = cat_dim_df_with_nan['$m$wins'].apply(float) cat_dim_df_with_nan.iloc[2, 1] = np.inf result = Pandas(slicer.metrics.wins) \ .transform(cat_dim_df_with_nan, slicer, [slicer.dimensions.political_party], []) expected = cat_dim_df_with_nan.copy()[[fm('wins')]] expected.index = pd.Index(['Democrat', 'Independent', 'Republican'], name='Party') expected.columns = ['Wins'] expected.columns.name = 'Metrics' pandas.testing.assert_frame_equal(expected, result)
[docs] def test_neginf_in_metrics(self): cat_dim_df_with_nan = cat_dim_df.copy() cat_dim_df_with_nan['$m$wins'] = cat_dim_df_with_nan['$m$wins'].apply(float) cat_dim_df_with_nan.iloc[2, 1] = np.inf result = Pandas(slicer.metrics.wins) \ .transform(cat_dim_df_with_nan, slicer, [slicer.dimensions.political_party], []) expected = cat_dim_df_with_nan.copy()[[fm('wins')]] expected.index = pd.Index(['Democrat', 'Independent', 'Republican'], name='Party') expected.columns = ['Wins'] expected.columns.name = 'Metrics' pandas.testing.assert_frame_equal(expected, result)
[docs] def test_inf_in_metrics_with_precision_zero(self): cat_dim_df_with_nan = cat_dim_df.copy() cat_dim_df_with_nan['$m$wins'] = cat_dim_df_with_nan['$m$wins'].apply(float) cat_dim_df_with_nan.iloc[2, 1] = np.inf slicer_modified = copy.deepcopy(slicer) slicer_modified.metrics.wins.precision = 0 result = Pandas(slicer_modified.metrics.wins) \ .transform(cat_dim_df_with_nan, slicer_modified, [slicer_modified.dimensions.political_party], []) expected = cat_dim_df_with_nan.copy()[[fm('wins')]] expected.index = pd.Index(['Democrat', 'Independent', 'Republican'], name='Party') expected['$m$wins'] = ['6', '0', 'Inf'] expected.columns = ['Wins'] expected.columns.name = 'Metrics' pandas.testing.assert_frame_equal(expected, result)
[docs]class PandasTransformerSortTests(TestCase):
[docs] def test_multiple_metrics_sort_index_asc(self): result = Pandas(slicer.metrics.wins, sort=[0]) \ .transform(cont_dim_df, slicer, [slicer.dimensions.timestamp], []) expected = cont_dim_df.copy()[[fm('wins')]] expected.index.names = ['Timestamp'] expected.columns = ['Wins'] expected.columns.name = 'Metrics' expected = expected.sort_index() pandas.testing.assert_frame_equal(expected, result)
[docs] def test_multiple_metrics_sort_index_desc(self): result = Pandas(slicer.metrics.wins, sort=[0], ascending=[False]) \ .transform(cont_dim_df, slicer, [slicer.dimensions.timestamp], []) expected = cont_dim_df.copy()[[fm('wins')]] expected.index.names = ['Timestamp'] expected.columns = ['Wins'] expected.columns.name = 'Metrics' expected = expected.sort_index(ascending=False) pandas.testing.assert_frame_equal(expected, result)
[docs] def test_multiple_metrics_sort_value_asc(self): result = Pandas(slicer.metrics.wins, sort=[1]) \ .transform(cont_dim_df, slicer, [slicer.dimensions.timestamp], []) expected = cont_dim_df.copy()[[fm('wins')]] expected.index.names = ['Timestamp'] expected.columns = ['Wins'] expected.columns.name = 'Metrics' expected = expected.sort_values(['Wins']) pandas.testing.assert_frame_equal(expected, result)
[docs] def test_multiple_metrics_sort_value_desc(self): result = Pandas(slicer.metrics.wins, sort=[1], ascending=[False]) \ .transform(cont_dim_df, slicer, [slicer.dimensions.timestamp], []) expected = cont_dim_df.copy()[[fm('wins')]] expected.index.names = ['Timestamp'] expected.columns = ['Wins'] expected.columns.name = 'Metrics' expected = expected.sort_values(['Wins'], ascending=False) pandas.testing.assert_frame_equal(expected, result)
[docs] def test_multiple_metrics_sort_index_and_value(self): result = Pandas(slicer.metrics.wins, sort=[-0, 1]) \ .transform(cont_dim_df, slicer, [slicer.dimensions.timestamp], []) expected = cont_dim_df.copy()[[fm('wins')]] expected.index.names = ['Timestamp'] expected.columns = ['Wins'] expected.columns.name = 'Metrics' expected = expected.reset_index() expected = expected.sort_values(['Timestamp', 'Wins'], ascending=[True, False]) expected = expected.set_index('Timestamp') pandas.testing.assert_frame_equal(expected, result)
[docs] def test_pivoted_multi_dims_time_series_and_uni_with_sort_index_asc(self): result = Pandas(slicer.metrics.votes, pivot=[slicer.dimensions.state], sort=[0]) \ .transform(cont_uni_dim_df, slicer, [slicer.dimensions.timestamp, slicer.dimensions.state], []) expected = cont_uni_dim_df.copy() \ .set_index(fd('state_display'), append=True) \ .reset_index(fd('state'), drop=True)[[fm('votes')]] expected = expected.unstack(level=[1]) expected.index.names = ['Timestamp'] expected.columns = ['California', 'Texas'] expected.columns.names = ['State'] expected = expected.sort_index() pandas.testing.assert_frame_equal(expected, result)
[docs] def test_pivoted_multi_dims_time_series_and_uni_with_sort_index_desc(self): result = Pandas(slicer.metrics.votes, pivot=[slicer.dimensions.state], sort=[0], ascending=[False]) \ .transform(cont_uni_dim_df, slicer, [slicer.dimensions.timestamp, slicer.dimensions.state], []) expected = cont_uni_dim_df.copy() \ .set_index(fd('state_display'), append=True) \ .reset_index(fd('state'), drop=True)[[fm('votes')]] expected = expected.unstack(level=[1]) expected.index.names = ['Timestamp'] expected.columns = ['California', 'Texas'] expected.columns.names = ['State'] expected = expected.sort_index(ascending=False) pandas.testing.assert_frame_equal(expected, result)
[docs] def test_pivoted_multi_dims_time_series_and_uni_with_sort_first_metric_asc(self): result = Pandas(slicer.metrics.votes, pivot=[slicer.dimensions.state], sort=[1]) \ .transform(cont_uni_dim_df, slicer, [slicer.dimensions.timestamp, slicer.dimensions.state], []) expected = cont_uni_dim_df.copy() \ .set_index(fd('state_display'), append=True) \ .reset_index(fd('state'), drop=True)[[fm('votes')]] expected = expected.unstack(level=[1]) expected.index.names = ['Timestamp'] expected.columns = ['California', 'Texas'] expected.columns.names = ['State'] expected = expected.reset_index() expected = expected.sort_values(['California']) expected = expected.set_index('Timestamp') pandas.testing.assert_frame_equal(expected, result)
[docs] def test_pivoted_multi_dims_time_series_and_uni_with_sort_first_metric_desc(self): result = Pandas(slicer.metrics.votes, pivot=[slicer.dimensions.state], sort=[1], ascending=[False]) \ .transform(cont_uni_dim_df, slicer, [slicer.dimensions.timestamp, slicer.dimensions.state], []) expected = cont_uni_dim_df.copy() \ .set_index(fd('state_display'), append=True) \ .reset_index(fd('state'), drop=True)[[fm('votes')]] expected = expected.unstack(level=[1]) expected.index.names = ['Timestamp'] expected.columns = ['California', 'Texas'] expected.columns.names = ['State'] expected = expected.reset_index() expected = expected.sort_values(['California'], ascending=[False]) expected = expected.set_index('Timestamp') pandas.testing.assert_frame_equal(expected, result)
[docs] def test_pivoted_multi_dims_time_series_and_uni_with_sort_second_metric_asc(self): result = Pandas(slicer.metrics.votes, pivot=[slicer.dimensions.state], sort=[2]) \ .transform(cont_uni_dim_df, slicer, [slicer.dimensions.timestamp, slicer.dimensions.state], []) expected = cont_uni_dim_df.copy() \ .set_index(fd('state_display'), append=True) \ .reset_index(fd('state'), drop=True)[[fm('votes')]] expected = expected.unstack(level=[1]) expected.index.names = ['Timestamp'] expected.columns = ['California', 'Texas'] expected.columns.names = ['State'] expected = expected.reset_index() expected = expected.sort_values(['Texas'], ascending=[True]) expected = expected.set_index('Timestamp') pandas.testing.assert_frame_equal(expected, result)
[docs] def test_pivoted_multi_dims_time_series_and_uni_with_sort_second_metric_desc(self): result = Pandas(slicer.metrics.votes, pivot=[slicer.dimensions.state], sort=[2], ascending=[False]) \ .transform(cont_uni_dim_df, slicer, [slicer.dimensions.timestamp, slicer.dimensions.state], []) expected = cont_uni_dim_df.copy() \ .set_index(fd('state_display'), append=True) \ .reset_index(fd('state'), drop=True)[[fm('votes')]] expected = expected.unstack(level=[1]) expected.index.names = ['Timestamp'] expected.columns = ['California', 'Texas'] expected.columns.names = ['State'] expected = expected.reset_index() expected = expected.sort_values(['Texas'], ascending=[False]) expected = expected.set_index('Timestamp') pandas.testing.assert_frame_equal(expected, result)
[docs] def test_pivoted_multi_dims_time_series_and_uni_with_sort_index_and_columns(self): result = Pandas(slicer.metrics.votes, pivot=[slicer.dimensions.state], sort=[0, 2], ascending=[True, False]) \ .transform(cont_uni_dim_df, slicer, [slicer.dimensions.timestamp, slicer.dimensions.state], []) expected = cont_uni_dim_df.copy() \ .set_index(fd('state_display'), append=True) \ .reset_index(fd('state'), drop=True)[[fm('votes')]] expected = expected.unstack(level=[1]) expected.index.names = ['Timestamp'] expected.columns = ['California', 'Texas'] expected.columns.names = ['State'] expected = expected.reset_index() expected = expected.sort_values(['Timestamp', 'Texas'], ascending=[True, False]) expected = expected.set_index('Timestamp') pandas.testing.assert_frame_equal(expected, result)
[docs] def test_multi_dims_time_series_and_cat_sort_index_level_0_asc(self): result = Pandas(slicer.metrics.wins, sort=[0]) \ .transform(cont_uni_dim_df, slicer, [slicer.dimensions.timestamp, slicer.dimensions.state], []) expected = cont_uni_dim_df.copy() \ .set_index(fd('state_display'), append=True) \ .reset_index(fd('state'), drop=False)[[fm('wins')]] expected.index.names = ['Timestamp', 'State'] expected.columns = ['Wins'] expected.columns.name = 'Metrics' expected = expected.reset_index() expected = expected.sort_values(['Timestamp']) expected = expected.set_index(['Timestamp', 'State']) pandas.testing.assert_frame_equal(expected, result)
[docs] def test_pivoted_multi_dims_time_series_and_cat_sort_index_level_1_desc(self): result = Pandas(slicer.metrics.wins, sort=[1], ascending=[False]) \ .transform(cont_uni_dim_df, slicer, [slicer.dimensions.timestamp, slicer.dimensions.state], []) expected = cont_uni_dim_df.copy() \ .set_index(fd('state_display'), append=True) \ .reset_index(fd('state'), drop=False)[[fm('wins')]] expected.index.names = ['Timestamp', 'State'] expected.columns = ['Wins'] expected.columns.name = 'Metrics' expected = expected.reset_index() expected = expected.sort_values(['State'], ascending=[False]) expected = expected.set_index(['Timestamp', 'State']) pandas.testing.assert_frame_equal(expected, result)
[docs] def test_pivoted_multi_dims_time_series_and_cat_sort_index_and_values(self): result = Pandas(slicer.metrics.wins, sort=[0, 2], ascending=[False, True]) \ .transform(cont_uni_dim_df, slicer, [slicer.dimensions.timestamp, slicer.dimensions.state], []) expected = cont_uni_dim_df.copy() \ .set_index(fd('state_display'), append=True) \ .reset_index(fd('state'), drop=False)[[fm('wins')]] expected.index.names = ['Timestamp', 'State'] expected.columns = ['Wins'] expected.columns.name = 'Metrics' expected = expected.reset_index() expected = expected.sort_values(['Timestamp', 'Wins'], ascending=[False, True]) expected = expected.set_index(['Timestamp', 'State']) pandas.testing.assert_frame_equal(expected, result)
[docs] def test_empty_sort_array_is_ignored(self): result = Pandas(slicer.metrics.wins, sort=[]) \ .transform(cont_dim_df, slicer, [slicer.dimensions.timestamp], []) expected = cont_dim_df.copy()[[fm('wins')]] expected.index.names = ['Timestamp'] expected.columns = ['Wins'] expected.columns.name = 'Metrics' pandas.testing.assert_frame_equal(expected, result)
[docs] def test_sort_value_greater_than_number_of_columns_is_ignored(self): result = Pandas(slicer.metrics.wins, sort=[5]) \ .transform(cont_dim_df, slicer, [slicer.dimensions.timestamp], []) expected = cont_dim_df.copy()[[fm('wins')]] expected.index.names = ['Timestamp'] expected.columns = ['Wins'] expected.columns.name = 'Metrics' pandas.testing.assert_frame_equal(expected, result)
[docs] def test_sort_with_no_index(self): result = Pandas(slicer.metrics.wins, sort=[0]) \ .transform(no_index_df, slicer, [slicer.dimensions.timestamp], []) expected = no_index_df.copy()[[fm('wins')]] expected.index.names = ['Timestamp'] expected.columns = ['Wins'] expected.columns.name = 'Metrics' pandas.testing.assert_frame_equal(expected, result)