How to use the scitime._utils.get_path function in scitime

To help you get started, we’ve selected a few scitime examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nathan-toubiana / scitime / scitime / _model.py View on Github external
train_test_split(X, y, test_size=0.20, random_state=42)

        if self.meta_algo == 'NN':
            X_train_scaled, X_test_scaled = \
                self._scale_data(X_train, X_test, save_model)

            meta_algo.fit(X_train_scaled, y_train)

        else:
            meta_algo.fit(X_train, y_train)

        if save_model:
            if self.verbose >= 2:
                self.logger.info(f'''Saving {self.meta_algo} to {self.meta_algo}_{self.algo}_estimator.pkl''')

            model_path = f'''{get_path("models")}/{self.meta_algo}_{self.algo}_estimator.pkl'''

            joblib.dump(meta_algo, model_path)

            json_path = f'''{get_path("models")}/{self.meta_algo}_{self.algo}_estimator.json'''

            with open(json_path, 'w') as outfile:
                json.dump({"dummy": list(cols),
                           "original": list(original_cols)}, outfile)

        if self.meta_algo == 'NN':
            if self.verbose >= 2:
                self.logger.info(f'''R squared on train set is {r2_score(y_train, meta_algo.predict(X_train_scaled))}''')

            # MAPE is the mean absolute percentage error
            test_relu = [max(i, 0) for i in meta_algo.predict(X_test_scaled)]
            train_relu = [max(i, 0) for i in meta_algo.predict(X_train_scaled)]
github nathan-toubiana / scitime / scitime / _model.py View on Github external
saves the scaler as a pkl file if specified

        :param X_train: pd.DataFrame chosen as input for the training set
        :param X_test: pd.DataFrame chosen as input for the test set
        :param save_model: boolean set to True if the model needs to be saved
        :return: X_train and X_test data scaled
        :rtype: pd.DataFrame
        """
        scaler = StandardScaler()
        scaler.fit(X_train)

        if save_model:
            if self.verbose >= 2:
                self.logger.info(f'''Saving scaler model to scaler_{self.algo}_estimator.pkl''')

            model_path = f'''{get_path("models")}/scaler_{self.algo}_estimator.pkl'''
            joblib.dump(scaler, model_path)

        X_train_scaled = scaler.transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        return X_train_scaled, X_test_scaled
github nathan-toubiana / scitime / scitime / _model.py View on Github external
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
        if self.meta_algo == 'NN':
            X_train_scaled, X_test_scaled = self._scale_data(X_train, X_test, save_model)
            meta_algo.fit(X_train_scaled, y_train)

        else:
            meta_algo.fit(X_train, y_train)

        if save_model:
            if self.verbose >= 2:
                self.logger.info(f'Saving {self.meta_algo} to {self.meta_algo}_{self.algo}_estimator.pkl')

            model_path = f'{get_path("models")}/{self.meta_algo}_{self.algo}_estimator.pkl'
            joblib.dump(meta_algo, model_path)

            json_path = f'{get_path("models")}/{self.meta_algo}_{self.algo}_estimator.json'

            with open(json_path, 'w') as outfile:
                json.dump({"dummy": list(cols), "original": list(original_cols)}, outfile)

        if self.meta_algo == 'NN':
            if self.verbose >= 2:
                self.logger.info(f'R squared on train set is {r2_score(y_train, meta_algo.predict(X_train_scaled))}')

            # MAPE is the mean absolute percentage error https://en.wikipedia.org/wiki/Mean_absolute_percentage_error
            y_pred_test = np.array([max(i, 0) for i in meta_algo.predict(X_test_scaled)])
            y_pred_train = np.array([max(i, 0) for i in meta_algo.predict(X_train_scaled)])

        else:
            if self.verbose >= 2:
                self.logger.info(f'R squared on train set is {r2_score(y_train, meta_algo.predict(X_train))}')
            y_pred_test = meta_algo.predict(X_test)
github nathan-toubiana / scitime / scitime / _model.py View on Github external
saves the scaler as a pkl file if specified

        :param X_train: pd.DataFrame chosen as input for the training set
        :param X_test: pd.DataFrame chosen as input for the test set
        :param save_model: boolean set to True if the model needs to be saved
        :return: X_train and X_test data scaled
        :rtype: pd.DataFrame
        """
        scaler = StandardScaler()
        scaler.fit(X_train)

        if save_model:
            if self.verbose >= 2:
                self.logger.info(f'Saving scaler model to scaler_{self.algo}_estimator.pkl')

            model_path = f'{get_path("models")}/scaler_{self.algo}_estimator.pkl'
            joblib.dump(scaler, model_path)

        X_train_scaled = scaler.transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        return X_train_scaled, X_test_scaled
github nathan-toubiana / scitime / scitime / estimate.py View on Github external
def _fetch_inputs(json_path):
        """
        retrieves estimation inputs (made dummy)

        :param json_path: list of columns in json fils
        :return: list of inputs
        """
        return json.load(open(get_path(json_path)))
github nathan-toubiana / scitime / scitime / _model.py View on Github external
def _transform_from_csv(self, csv_name):
        """
        takes data from csv and returns inputs and outputs in right format for model_fit
        this is needed when the pd.read_csv function changes format of ambiguous columns

        :param csv_name: name of csv from generate data
        :param rename_columns: set to True if csv columns have to be named
        :return: inputs and outputs
        """
        df = pd.read_csv(get_path(csv_name))

        meta_params = self.params
        parameters_list = list(meta_params['internal_params'].keys())
        external_parameters_list = list(meta_params['external_params'].keys())
        df.columns = meta_params['other_params'] + external_parameters_list + parameters_list + ['output']

        semi_dummy_inputs = self.params['semi_dummy_inputs']
        for col in semi_dummy_inputs:
            df[col] = df[col].apply(self._str_to_float)

        inputs = df.drop(['output'], axis=1)
        outputs = df[['output']]

        return inputs, outputs
github nathan-toubiana / scitime / scitime / estimate.py View on Github external
transforms the dataframe of the params of the estimated
        model before predicting runtime

        :param df: dataframe of all inputed parameters
        :param algo: algo whose runtime the user wants to predict
        :param scaled: scaling the input if set to True
        :return: np array of all relevant algo parameters
        and system features used to estimate algo training time
        :rtype: pandas matrix object
        """
        param_dic = self._fetch_algo_metadata(algo)
        algo_name = param_dic['name']
        algo_params = param_dic['params']
        params = param_dic['config']

        json_path = f'''{get_path("models")}/{self.meta_algo}_{algo_name}_estimator.json'''
        estimation_inputs = self._fetch_inputs(json_path)['dummy']
        estimation_original_inputs = self._fetch_inputs(json_path)['original']

        # first we transform semi dummy features
        semi_dummy_inputs = params['semi_dummy_inputs']
        # we add columns for each semi dummy features
        # (times the number of potential dummy values)
        df = self._add_semi_dummy(df, semi_dummy_inputs)

        forgotten_inputs = list(set(list(estimation_original_inputs)) - set(list((df.columns))))

        if len(forgotten_inputs) > 0:
            # if some params that we use to train the underlying
            # meta model do not appear, we can't predict the runtime
            raise NameError(f'{forgotten_inputs} parameters missing')
github nathan-toubiana / scitime / scitime / _model.py View on Github external
def _add_row_to_csv(self, row_input, row_output):
        """
        writes a row into the csv results file - parameters (X) and number of seconds (y)

        :param input: row inputs
        :param output: row output
        :return:
        """
        csv_name = f'{self.algo}_result.csv'
        with open(f'{get_path(csv_name)}', 'a+') as file:
            writer = csv.writer(file)
            row = list(row_input) + [row_output]
            writer.writerows([row])
github nathan-toubiana / scitime / scitime / _model.py View on Github external
def _add_row_to_csv(self, row_input, row_output):
        """
        writes a row into the csv results file -
        parameters (X) and number of seconds (y)

        :param input: row inputs
        :param output: row output
        :return:
        """
        csv_name = f'{self.algo}_result.csv'
        with open(f'{get_path(csv_name)}', 'a+') as file:
            writer = csv.writer(file)
            row = list(row_input) + [row_output]
            writer.writerows([row])

scitime

Training time estimator for scikit-learn algorithms

BSD-3-Clause
Latest version published 4 years ago

Package Health Score

51 / 100
Full package analysis