Source code for nilmtk.feature_detectors.steady_states

from __future__ import print_function, division
import numpy as np
import pandas as pd 
import sys


# Fix the seed for repeatability of experiments
SEED = 42
np.random.seed(SEED)


[docs]def find_steady_states_transients(metergroup, cols, noise_level, state_threshold, **load_kwargs): steady_states_list = [] transients_list = [] for power_df in metergroup.load(cols=cols, **load_kwargs): """ if len(power_df.columns) <= 2: # Use whatever is available power_dataframe = power_df else: # Active, reactive and apparent are available power_dataframe = power_df[[('power', 'active'), ('power', 'reactive')]] """ power_dataframe = power_df.dropna() x, y = find_steady_states(power_dataframe, noise_level=noise_level, stateThreshold=state_threshold) steady_states_list.append(x) transients_list.append(y) return [pd.concat(steady_states_list), pd.concat(transients_list)]
[docs]def find_steady_states(dataframe, min_n_samples=2, stateThreshold=15, noise_level=70): """ Finds steady states given a DataFrame of power Parameters ---------- dataframe: pd.DataFrame with DateTimeIndex min_n_samples(int): number of samples to consider constituting a steady state. stateThreshold: maximum difference between highest and lowest value in steady state. noise_level: the level used to define significant appliances, transitions below this level will be ignored. See Hart 1985. p27. Returns ------- """ # Tells whether we have both real and reactive power or only real power num_measurements = len(dataframe.columns) estimated_steady_power = np.array([0] * num_measurements) last_steady_power = np.array([0] * num_measurements) previous_measurement = np.array([0] * num_measurements) # These flags store state of power instantaneous_change = False # power changing this second ongoing_change = False # power change in progress over multiple seconds index_transitions = [] # Indices to use in returned Dataframe index_steady_states = [] transitions = [] # holds information on transitions steady_states = [] # steadyStates to store in returned Dataframe N = 0 # N stores the number of samples in state time = dataframe.iloc[0].name # first state starts at beginning # Iterate over the rows performing algorithm print ("Finding Edges, please wait ...", end="\n") sys.stdout.flush() for row in dataframe.itertuples(): # test if either active or reactive moved more than threshold # http://stackoverflow.com/questions/17418108/elegant-way-to-perform-tuple-arithmetic # http://stackoverflow.com/questions/13168943/expression-for-elements-greater-than-x-and-less-than-y-in-python-all-in-one-ret # Step 2: this does the threshold test and then we sum the boolean # array. this_measurement = row[1:3] # logging.debug('The current measurement is: %s' % (thisMeasurement,)) # logging.debug('The previous measurement is: %s' % # (previousMeasurement,)) state_change = np.fabs( np.subtract(this_measurement, previous_measurement)) # logging.debug('The State Change is: %s' % (stateChange,)) if np.sum(state_change > stateThreshold): instantaneous_change = True else: instantaneous_change = False # Step 3: Identify if transition is just starting, if so, process it if instantaneous_change and (not ongoing_change): # Calculate transition size last_transition = np.subtract(estimated_steady_power, last_steady_power) # logging.debug('The steady state transition is: %s' % # (lastTransition,)) # Sum Boolean array to verify if transition is above noise level if np.sum(np.fabs(last_transition) > noise_level): # 3A, C: if so add the index of the transition start and the # power information # Avoid outputting first transition from zero index_transitions.append(time) # logging.debug('The current row time is: %s' % (time)) transitions.append(last_transition) # I think we want this, though not specifically in Hart's algo notes # We don't want to append a steady state if it's less than min samples in length. # if N > min_n_samples: index_steady_states.append(time) # logging.debug('The ''time'' stored is: %s' % (time)) # last states steady power steady_states.append(estimated_steady_power) # 3B last_steady_power = estimated_steady_power # 3C time = row[0] # Step 4: if a new steady state is starting, zero counter if instantaneous_change: N = 0 # Hart step 5: update our estimate for steady state's energy estimated_steady_power = np.divide( np.add(np.multiply(N, estimated_steady_power), this_measurement), (N + 1)) # logging.debug('The steady power estimate is: %s' % # (estimatedSteadyPower,)) # Step 6: increment counter N += 1 # Step 7 ongoing_change = instantaneous_change # Step 8 previous_measurement = this_measurement # Appending last edge last_transition = np.subtract(estimated_steady_power, last_steady_power) if np.sum(np.fabs(last_transition) > noise_level): index_transitions.append(time) transitions.append(last_transition) index_steady_states.append(time) steady_states.append(estimated_steady_power) # Removing first edge if the starting steady state power is more # than the noise threshold # https://github.com/nilmtk/nilmtk/issues/400 if np.sum(steady_states[0] > noise_level) and index_transitions[0] == index_steady_states[0] == dataframe.iloc[0].name: transitions = transitions[1:] index_transitions = index_transitions[1:] steady_states = steady_states[1:] index_steady_states = index_steady_states[1:] print("Edge detection complete.") print("Creating transition frame ...") sys.stdout.flush() cols_transition = {1: ['active transition'], 2: ['active transition', 'reactive transition']} cols_steady = {1: ['active average'], 2: ['active average', 'reactive average']} if len(index_transitions) == 0: # No events return pd.DataFrame(), pd.DataFrame() else: transitions = pd.DataFrame(data=transitions, index=index_transitions, columns=cols_transition[num_measurements]) print("Transition frame created.") print("Creating states frame ...") sys.stdout.flush() steady_states = pd.DataFrame(data=steady_states, index=index_steady_states, columns=cols_steady[num_measurements]) print("States frame created.") print("Finished.") return steady_states, transitions
[docs]def cluster(x, max_num_clusters=3): """Applies clustering on reduced data, i.e. data where power is greater than threshold. Parameters ---------- X : pd.Series or single-column pd.DataFrame max_num_clusters : int Returns ------- centroids : ndarray of int32s Power in different states of an appliance, sorted """ # Find where power consumption is greater than 10 data = _transform_data(x) # Find clusters centroids = _apply_clustering(data, max_num_clusters) centroids = np.append(centroids, 0) # add 'off' state centroids = np.round(centroids).astype(np.int32) centroids = np.unique(centroids) # np.unique also sorts # TODO: Merge similar clusters return centroids
def _transform_data(data): """ Subsamples if needed and converts to column vector (which is what scikit-learn requires). Parameters ---------- data : pd.Series or single column pd.DataFrame Returns ------- data_above_thresh : ndarray column vector """ MAX_NUMBER_OF_SAMPLES = 2000 MIN_NUMBER_OF_SAMPLES = 20 DATA_THRESHOLD = 10 data_above_thresh = data[data > DATA_THRESHOLD].dropna().values n_samples = len(data_above_thresh) if n_samples < MIN_NUMBER_OF_SAMPLES: return np.zeros((MAX_NUMBER_OF_SAMPLES, 1)) elif n_samples > MAX_NUMBER_OF_SAMPLES: # Randomly subsample (we don't want to smoothly downsample # because that is likely to change the values) random_indices = np.random.randint(0, n_samples, MAX_NUMBER_OF_SAMPLES) resampled = data_above_thresh[random_indices] return resampled.reshape(MAX_NUMBER_OF_SAMPLES, 1) else: return data_above_thresh.reshape(n_samples, 1) def _apply_clustering(X, max_num_clusters): ''' Parameters ---------- X : ndarray max_num_clusters : int Returns ------- centroids : list of numbers List of power in different states of an appliance ''' # If we import sklearn at the top of the file then it makes autodoc fail from sklearn.cluster import KMeans from sklearn import metrics # sklearn produces lots of DepreciationWarnings with PyTables import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) # Finds whether 2 or 3 gives better Silhouellete coefficient # Whichever is higher serves as the number of clusters for that # appliance num_clus = -1 sh = -1 k_means_labels = {} k_means_cluster_centers = {} k_means_labels_unique = {} for n_clusters in range(1, max_num_clusters): try: k_means = KMeans(init='k-means++', n_clusters=n_clusters) k_means.fit(X) k_means_labels[n_clusters] = k_means.labels_ k_means_cluster_centers[n_clusters] = k_means.cluster_centers_ k_means_labels_unique[n_clusters] = np.unique(k_means_labels) try: sh_n = metrics.silhouette_score( X, k_means_labels[n_clusters], metric='euclidean') if sh_n > sh: sh = sh_n num_clus = n_clusters except Exception: num_clus = n_clusters except Exception: if num_clus > -1: return k_means_cluster_centers[num_clus] else: return np.array([0]) return k_means_cluster_centers[num_clus].flatten()