import pandas as pd
import numpy as np
import sys
from os import listdir, getcwd
from os.path import isdir, join, dirname, abspath
from pandas.tools.merge import concat
from nilmtk.utils import get_module_directory, check_directory_exists
from nilmtk.datastore import Key
from nilmtk.measurement import LEVEL_NAMES
from nilm_metadata import convert_yaml_to_hdf5
from inspect import currentframe, getfile, getsourcefile
from sys import getfilesystemencoding
"""
DATASET STRUCTURE:
------------------
On extracting all the dataset values, we should arrive at a similar directory structure as
mentioned.
ECO Dataset will have a folder '<i>_sm_csv' and '<i>_plug_csv' where i is the building no.
<i>_sm_csv has a folder 01
<i>_plug_csv has a folder 01, 02,....<n> where n is the plug numbers.
Each folder has a CSV file as per each day, with each day csv file containing
86400 entries.
"""
plugs_column_name = {1:('power', 'active'),
};
[docs]def convert_eco(dataset_loc, hdf_filename, timezone):
"""
Parameters:
-----------
dataset_loc: str
The root directory where the dataset is located.
hdf_filename: str
The location where the hdf_filename is present.
The directory location has to contain the
hdf5file name for the converter to work.
timezone: str
specifies the timezone of the dataset.
"""
# Creating a new HDF File
store = pd.HDFStore(hdf_filename, 'w', complevel=9, complib='blosc')
check_directory_exists(dataset_loc)
directory_list = [i for i in listdir(dataset_loc) if '.txt' not in i]
directory_list.sort()
print directory_list
# Traversing every folder
for folder in directory_list:
if folder[0] == '.' or folder[-3:] == '.h5':
print 'Skipping ', folder
continue
print 'Computing for folder',folder
#Building number and meter_flag
building_no = int(folder[:2])
meter_flag = 'sm' if 'sm_csv' in folder else 'plugs'
dir_list = [i for i in listdir(join(dataset_loc, folder)) if isdir(join(dataset_loc,folder,i))]
dir_list.sort()
print 'Current dir list:',dir_list
for fl in dir_list:
print 'Computing for folder ',fl
fl_dir_list = [i for i in listdir(join(dataset_loc,folder,fl)) if '.csv' in i]
fl_dir_list.sort()
if meter_flag == 'sm':
for fi in fl_dir_list:
df = pd.read_csv(join(dataset_loc,folder,fl,fi), names=[i for i in range(1,17)], dtype=np.float32)
for phase in range(1,4):
key = str(Key(building=building_no, meter=phase))
df_phase = df.ix[:,[1+phase, 5+phase, 8+phase, 13+phase]]
# get reactive power
power = df_phase.as_matrix([1+phase, 13+phase])
reactive = power[:,0] * np.tan(power[:,1] * np.pi / 180)
df_phase['Q'] = reactive
df_phase.index = pd.DatetimeIndex(start=fi[:-4], freq='s', periods=86400, tz='GMT')
df_phase = df_phase.tz_convert(timezone)
sm_column_name = {1+phase:('power', 'active'),
5+phase:('current', ''),
8+phase:('voltage', ''),
13+phase:('phase_angle', ''),
'Q': ('power', 'reactive'),
};
df_phase.rename(columns=sm_column_name, inplace=True)
tmp_before = np.size(df_phase.power.active)
df_phase = df_phase[df_phase.power.active != -1]
tmp_after = np.size(df_phase.power.active)
if (tmp_before != tmp_after):
print('Removed missing measurements - Size before: ' + str(tmp_before) + ', size after: ' + str(tmp_after))
df_phase.columns.set_names(LEVEL_NAMES, inplace=True)
if not key in store:
store.put(key, df_phase, format='Table')
else:
store.append(key, df_phase, format='Table')
store.flush()
print 'Building',building_no,', Meter no.',phase,'=> Done for ',fi[:-4]
else:
#Meter number to be used in key
meter_num = int(fl) + 3
key = str(Key(building=building_no, meter=meter_num))
#Getting dataframe for each csv file seperately
for fi in fl_dir_list:
df = pd.read_csv(join(dataset_loc,folder,fl ,fi), names=[1], dtype=np.float64)
df.index = pd.DatetimeIndex(start=fi[:-4], freq='s', periods=86400, tz = 'GMT')
df.rename(columns=plugs_column_name, inplace=True)
df = df.tz_convert(timezone)
df.columns.set_names(LEVEL_NAMES, inplace=True)
tmp_before = np.size(df.power.active)
df = df[df.power.active != -1]
tmp_after = np.size(df.power.active)
if (tmp_before != tmp_after):
print('Removed missing measurements - Size before: ' + str(tmp_before) + ', size after: ' + str(tmp_after))
# If table not present in hdf5, create or else append to existing data
if not key in store:
store.put(key, df, format='Table')
print 'Building',building_no,', Meter no.',meter_num,'=> Done for ',fi[:-4]
else:
store.append(key, df, format='Table')
store.flush()
print 'Building',building_no,', Meter no.',meter_num,'=> Done for ',fi[:-4]
print "Data storage completed."
store.close()
# Adding the metadata to the HDF5file
print "Proceeding to Metadata conversion..."
meta_path = join(_get_module_directory(), 'metadata')
convert_yaml_to_hdf5(meta_path, hdf_filename)
print "Completed Metadata conversion."
def _get_module_directory():
# Taken from http://stackoverflow.com/a/6098238/732596
path_to_this_file = dirname(getfile(currentframe()))
if not isdir(path_to_this_file):
encoding = getfilesystemencoding()
path_to_this_file = dirname(unicode(__file__, encoding))
if not isdir(path_to_this_file):
abspath(getsourcefile(lambda _: None))
if not isdir(path_to_this_file):
path_to_this_file = getcwd()
assert isdir(path_to_this_file), path_to_this_file + ' is not a directory'
return path_to_this_file