Module BML.transform.dataset_transformation
Expand source code
#!/usr/bin/env python3
import os, random, time
import json
from BML.utils import utils
from BML.transform.base_transform import transform
def getFolders(folder):
folders = []
for r, d, f in os.walk(folder):
for filename in f:
dirname = os.path.dirname(r)
if("routes.json" in filename and not dirname in folders):
folders.append(dirname)
return(folders)
def transformSample(folder, transform_module, transform_name, params, logFiles=[]):
outputfolder = utils.mkdirPath(folder + os.sep + "transform" + os.sep + transform_name)
primingFile = folder + os.sep + "priming_data" + os.sep + "routes.json.gz"
dataFile = folder + os.sep + "data" + os.sep + "updates.csv.gz"
exec("from " + transform_module + " import " + transform_name + " as Transforms")
transformation = locals()["Transforms"]
transform(transformation, primingFile, dataFile, params=params, outFolder=outputfolder, logFiles=logFiles)
class DatasetTransformation():
def __init__(self, folder, transform_module, transform_name):
self.folder = folder
self.transform_module = transform_module
self.transform_name = transform_name
self.folders = None
def setParams(self, params):
self.params = params
def setFolders(self, folders):
self.folders = folders
def getJobs(self):
jobs = []
if(self.folders is None):
folders = getFolders(self.folder)
else:
folders = self.folders
for f in folders:
if("global" in self.params):
params = self.params["global"].copy()
else:
params = {}
path = f.split(os.sep)
label = path[-2]
name = path[-1]
if(label in self.params and name in self.params[label]):
for key, value in self.params[label][name].items():
params[key] = value
j = {
'includes' : "from BML.transform.dataset_transformation import transformSample",
'target': transformSample.__name__,
'args': (f, self.transform_module, self.transform_name, params),
'kwargs': {'logFiles':["LOG_ONLY"]}
}
jobs.append(j)
random.shuffle(jobs)
return(jobs)
Functions
def getFolders(folder)
-
Expand source code
def getFolders(folder): folders = [] for r, d, f in os.walk(folder): for filename in f: dirname = os.path.dirname(r) if("routes.json" in filename and not dirname in folders): folders.append(dirname) return(folders)
def transformSample(folder, transform_module, transform_name, params, logFiles=[])
-
Expand source code
def transformSample(folder, transform_module, transform_name, params, logFiles=[]): outputfolder = utils.mkdirPath(folder + os.sep + "transform" + os.sep + transform_name) primingFile = folder + os.sep + "priming_data" + os.sep + "routes.json.gz" dataFile = folder + os.sep + "data" + os.sep + "updates.csv.gz" exec("from " + transform_module + " import " + transform_name + " as Transforms") transformation = locals()["Transforms"] transform(transformation, primingFile, dataFile, params=params, outFolder=outputfolder, logFiles=logFiles)
Classes
class DatasetTransformation (folder, transform_module, transform_name)
-
Expand source code
class DatasetTransformation(): def __init__(self, folder, transform_module, transform_name): self.folder = folder self.transform_module = transform_module self.transform_name = transform_name self.folders = None def setParams(self, params): self.params = params def setFolders(self, folders): self.folders = folders def getJobs(self): jobs = [] if(self.folders is None): folders = getFolders(self.folder) else: folders = self.folders for f in folders: if("global" in self.params): params = self.params["global"].copy() else: params = {} path = f.split(os.sep) label = path[-2] name = path[-1] if(label in self.params and name in self.params[label]): for key, value in self.params[label][name].items(): params[key] = value j = { 'includes' : "from BML.transform.dataset_transformation import transformSample", 'target': transformSample.__name__, 'args': (f, self.transform_module, self.transform_name, params), 'kwargs': {'logFiles':["LOG_ONLY"]} } jobs.append(j) random.shuffle(jobs) return(jobs)
Methods
def getJobs(self)
-
Expand source code
def getJobs(self): jobs = [] if(self.folders is None): folders = getFolders(self.folder) else: folders = self.folders for f in folders: if("global" in self.params): params = self.params["global"].copy() else: params = {} path = f.split(os.sep) label = path[-2] name = path[-1] if(label in self.params and name in self.params[label]): for key, value in self.params[label][name].items(): params[key] = value j = { 'includes' : "from BML.transform.dataset_transformation import transformSample", 'target': transformSample.__name__, 'args': (f, self.transform_module, self.transform_name, params), 'kwargs': {'logFiles':["LOG_ONLY"]} } jobs.append(j) random.shuffle(jobs) return(jobs)
def setFolders(self, folders)
-
Expand source code
def setFolders(self, folders): self.folders = folders
def setParams(self, params)
-
Expand source code
def setParams(self, params): self.params = params