new changes to add spc

This commit is contained in:
2024-01-01 09:35:34 +01:00
parent cf20979b1c
commit 8546301ce5
3 changed files with 1932 additions and 0 deletions

1198
src/app.py Normal file

File diff suppressed because it is too large Load Diff

80
src/data/spc_loader.py Normal file
View File

@ -0,0 +1,80 @@
import pandas as pd
class spc_DataSchema:
BATCH = "batch",
SPEED = "speed",
BATCHQUANTITY = "numeric",
PARA1 = "numeric",
PARA2 = "character",
PARA3 = "numeric",
PARA4 = "numeric",
PARA5 = "numeric",
PARA6 = "numeric",
PARA7 = "numeric",
PARA8 = "numeric",
PARA9 = "numeric",
PARA10 ="numeric",
PARA11 ="numeric",
PARA12 ="numeric",
PARA13 ="numeric",
PARA14 ="numeric",
PARA15 ="numeric",
PARA16 ="numeric",
PARA17 ="numeric",
PARA18 ="numeric",
PARA19 ="numeric",
PARA20 ="numeric",
PARA21 ="numeric",
PARA22 ="character",
PARA23 ="numeric",
timestamp ="date"
class DataSchema:
AMOUNT = "amount"
CATEGORY = "category"
DATE = "date"
MONTH = "month"
YEAR = "year"
def load_spc_data(path: str) -> pd.DataFrame:
# load the data from the CSV file
data = pd.read_csv(
path,
dtype={
spc_DataSchema.BATCH : "batch",
spc_DataSchema.SPEED : "speed",
spc_DataSchema.BATCHQUANTITY : "numeric",
spc_DataSchema.PARA1 : "numeric",
spc_DataSchema.PARA2 : "character",
spc_DataSchema.PARA3 : "numeric",
spc_DataSchema.PARA4 : "numeric",
spc_DataSchema.PARA5 : "numeric",
spc_DataSchema.PARA6 : "numeric",
spc_DataSchema.PARA7 : "numeric",
spc_DataSchema.PARA8 : "numeric",
spc_DataSchema.PARA9 : "numeric",
spc_DataSchema.PARA10 :"numeric",
spc_DataSchema.PARA11 :"numeric",
spc_DataSchema.PARA12 :"numeric",
spc_DataSchema.PARA13 :"numeric",
spc_DataSchema.PARA14 :"numeric",
spc_DataSchema.PARA15 :"numeric",
spc_DataSchema.PARA16 :"numeric",
spc_DataSchema.PARA17 :"numeric",
spc_DataSchema.PARA18 :"numeric",
spc_DataSchema.PARA19 :"numeric",
spc_DataSchema.PARA20 :"numeric",
spc_DataSchema.PARA21 :"numeric",
spc_DataSchema.PARA22 :"character",
spc_DataSchema.PARA23 :"numeric",
spc_DataSchema.timestamp :"datetime"
},
parse_dates=[spc_DataSchema.timestamp],
)
data['formatted_date'] = pd.to_datetime(data['datetime'])
data[DataSchema.wd] = data[data.timestamp].dt.isocalendar().week + 0.1 * data[DataSchema.timestamp].dt.weekday()
data[DataSchema.month] = data[DataSchema.DATE].dt.month
return data