Код IT
← Каталог

Машинное обучение — Предобработка данных

Фрагмент из «Машинное обучение»: Предобработка данных.

python aiencyclopedia6-02-mashinnoe-obuchenie-1 embed URL статья в энциклопедии
Python main.py

import pandas as pd

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# Создаем конвейер обработки данных
numeric_features = ['age', 'income', 'score']
categorical_features = ['gender', 'Образование', 'city']

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Применяем конвейер
data = pd.read_csv('dataset.csv')
X_processed = preprocessor.fit_transform(data)

import pandas as pd

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# Создаем конвейер обработки данных
numeric_features = ['age', 'income', 'score']
categorical_features = ['gender', 'Образование', 'city']

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Применяем конвейер
data = pd.read_csv('dataset.csv')
X_processed = preprocessor.fit_transform(data)