Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def create_labels(entityset,
min_training_data='28 days',
lead='7 days',
window='28 days',
reduce='sum',
binarize=None,
iterate_by=None):
label_cols = ['quantity', 'price']
time_index = "order_date"
index = "customer_id"
df = entityset['orders'].df.merge(
entityset['order_products'].df, how='outer')
tqdm.pandas(desc="Creating Labels", unit="customer")
# # Only use data after one of the label columns has been non-null
# for i, v in df[label_cols].iterrows():
# if v.dropna(how='all').shape[0] > 0:
# df = df.loc[slice(i, None), :]
# break
grouped = df.groupby(index, as_index=True)
project_cutoff_dates = grouped.progress_apply(
lambda df: make_labels_from_windows(
df,
cols=label_cols,
min_training_data=min_training_data,
lead=lead, window=window,
index_col=index,
date_col=time_index,
def _apply_on_text_right(self, func, rename, verbose=1):
name = rename or 'text_right'
if verbose:
tqdm.pandas(desc="Processing " + name + " with " + func.__name__)
self._right[name] = self._right['text_right'].progress_apply(func)
else:
self._right[name] = self._right['text_right'].apply(func)
"""
import os
import pandas as pd
import re
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from tqdm import tqdm
import logging
from .utils.misc_utils import save_as_pickle, load_pickle
from .utils.word_char_level_vocab import vocab_mapper
from .models.BERT.tokenization_bert import BertTokenizer
tqdm.pandas(desc="prog_bar")
logging.basicConfig(format='%(asctime)s [%(levelname)s]: %(message)s', \
datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)
logger = logging.getLogger(__file__)
def pad_sos_eos(x, sos, eos):
return [sos] + x + [eos]
class args():
def __init__(self):
self.batch_size = 5
def clean_and_tokenize_text(text, table, tokenizer, clean_only=False):
if isinstance(text, str):
text = text.replace("(CNN) -- ","").replace("U.N.", "UN").replace("U.S.", "USA")
text = text.replace(".", ". ").replace(",", ", ").replace("?", "? ").replace("!", "! ")
text = text.translate(table)
def _apply_on_text_left(self, func, rename, verbose=1):
name = rename or 'text_left'
if verbose:
tqdm.pandas(desc="Processing " + name + " with " + func.__name__)
self._left[name] = self._left['text_left'].progress_apply(func)
else:
self._left[name] = self._left['text_left'].apply(func)
def __call__(self, input_dataframe):
tqdm.pandas(desc=f"Building {self.__class__.__name__} at {self.cache}")
return tf.data.experimental.choose_from_datasets(
datasets=(
input_dataframe.assign(
label_one_hot=lambda df: pd.get_dummies(df.label).values.tolist(),
crop_window=lambda df: df[["crop_y", "crop_x", "crop_height", "crop_width"]].values.tolist(),
)
.groupby("label")
.progress_apply(self.transform_group_to_shuffled_dataset)
),
choice_dataset=(
tf.data.Dataset.range(len(input_dataframe.label.unique()))
.shuffle(buffer_size=len(input_dataframe.label.unique()), reshuffle_each_iteration=True)
.flat_map(self.repeat_k_shot)
),
).map(
lambda annotation: (self.preprocessing(annotation["image"]), tf.cast(annotation[self.label_column], tf.float32)),
"""Bert Preprocessor."""
from tqdm import tqdm
from . import units
from .chain_transform import chain_transform
from matchzoo import DataPack
from matchzoo.engine.base_preprocessor import BasePreprocessor
from .build_vocab_unit import built_bert_vocab_unit
from .build_unit_from_data_pack import build_unit_from_data_pack
tqdm.pandas()
class BertPreprocessor(BasePreprocessor):
"""Bert-base Model preprocessor."""
def __init__(self, bert_vocab_path: str,
fixed_length_left: int = 30,
fixed_length_right: int = 30,
filter_mode: str = 'df',
filter_low_freq: float = 2,
filter_high_freq: float = float('inf'),
remove_stop_words: bool = False,
lower_case: bool = True,
chinese_version: bool = False,
):
"""
"""
import os
import gc
import numpy as np
import pandas as pd
from sklearn.preprocessing import minmax_scale
from keras.layers import (Input, Embedding, SpatialDropout1D, Bidirectional,
LSTM, GRU, GlobalMaxPool1D, Concatenate, Dropout,
Dense)
from keras.models import Model
from neural_networks import Attention, DropConnect
from neural_networks import NeuralNetworkClassifier
from tqdm import tqdm
tqdm.pandas()
# model configs
MAX_FEATURES = int(2.5e5) # total word count = 227,538; clean word count = 186,551 # noqa
MAX_LEN = 75 # mean_len = 12; Q99_len = 40; max_len = 189;
RNN_UNITS = 40
F_DROPOUT = 0.05
DENSE_UNITS_1 = 128
DENSE_UNITS_2 = 16
# file configs
MODEL_FILEPATH = os.path.join(
os.environ['DATA_PATH'],
'models',
'model_v50.hdf5'
)
from sklearn.metrics import f1_score
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras import backend as K
from keras import initializers, regularizers, constraints
from keras.engine.topology import Layer
from keras.layers import (Input, Embedding, SpatialDropout1D, Bidirectional,
CuDNNLSTM, CuDNNGRU, GlobalMaxPool1D, Concatenate,
Dropout, Dense)
from keras.models import Model
from keras.callbacks import (EarlyStopping, ModelCheckpoint,
ReduceLROnPlateau)
from tqdm import tqdm
tqdm.pandas()
"""
utils
"""
def load_data(datapath):
print("loading data ......")
df_train = pd.read_csv(os.path.join(datapath, "train.csv"))
df_test = pd.read_csv(os.path.join(datapath, "test.csv"))
print("train data with shape : ", df_train.shape)
print("test data with shape : ", df_test.shape)
return df_train, df_test
"""
"""DIIN Preprocessor."""
from tqdm import tqdm
import pandas as pd
from matchzoo.engine.base_preprocessor import BasePreprocessor
from matchzoo import DataPack
from .build_vocab_unit import build_vocab_unit
from .chain_transform import chain_transform
from . import units
tqdm.pandas()
class DIINPreprocessor(BasePreprocessor):
"""DIIN Model preprocessor."""
def __init__(self,
truncated_mode: str = 'pre',
truncated_length_left: int = 30,
truncated_length_right: int = 50):
"""
DIIN Model preprocessor.
:param truncated_mode: String, mode used by :class:`TruncatedLength`.
Can be 'pre' or 'post'.
:param truncated_length_left: Integer, maximize length of :attr:
'left' in the data_pack.