Алгоритмы ИИ — Пример реализации на Python

Код IT Загрузка примера кода…

from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

import numpy as np

class ActiveLearningSystem:
    def __init__(self, model, strategy='uncertainty'):
        self.model = model
        self.strategy = strategy
        self.X_labeled = None
        self.y_labeled = None
        self.X_pool = None
        self.y_pool_true = None
    
    def initialize(self, X_labeled, y_labeled, X_pool, y_pool_true):
        self.X_labeled = X_labeled.copy()
        self.y_labeled = y_labeled.copy()
        self.X_pool = X_pool.copy()
        self.y_pool_true = y_pool_true.copy()
    
    def query_uncertainty(self, n_queries):
        probabilities = self.model.predict_proba(self.X_pool)
        uncertainties = 1 - np.max(probabilities, axis=1)
        query_indices = np.argsort(uncertainties)[-n_queries:][::-1]
        return query_indices
    
    def query_margin(self, n_queries):
        probabilities = self.model.predict_proba(self.X_pool)
        sorted_probs = np.sort(probabilities, axis=1)
        margins = sorted_probs[:, -1] - sorted_probs[:, -2]
        query_indices = np.argsort(margins)[:n_queries]
        return query_indices
    
    def query_entropy(self, n_queries):
        probabilities = self.model.predict_proba(self.X_pool)
        probabilities = np.clip(probabilities, 1e-10, 1.0)
        entropy = -np.sum(probabilities * np.log2(probabilities), axis=1)
        query_indices = np.argsort(entropy)[-n_queries:][::-1]
        return query_indices
    
    def query(self, n_queries):
        if self.strategy == 'uncertainty':
            indices = self.query_uncertainty(n_queries)
        elif self.strategy == 'margin':
            indices = self.query_margin(n_queries)
        elif self.strategy == 'entropy':
            indices = self.query_entropy(n_queries)
        else:
            raise ValueError(f"Неизвестная стратегия: {self.strategy}")
        
        return indices
    
    def update(self, query_indices):
        # Получение истинных меток из пула
        new_X = self.X_pool[query_indices]
        new_y = self.y_pool_true[query_indices]
        
        # Добавление в размеченный набор
        self.X_labeled = np.vstack([self.X_labeled, new_X])
        self.y_labeled = np.hstack([self.y_labeled, new_y])
        
        # Удаление из пула
        mask = np.ones(len(self.X_pool), dtype=bool)
        mask[query_indices] = False
        self.X_pool = self.X_pool[mask]
        self.y_pool_true = self.y_pool_true[mask]
    
    def train(self):
        self.model.fit(self.X_labeled, self.y_labeled)

# Генерация данных для демонстрации
X, y = make_classification(
    n_samples=5000,
    n_features=50,
    n_informative=30,
    n_redundant=20,
    n_classes=3,
    random_state=42
)

# Разделение на начальный размеченный набор и пул для запросов
X_init, X_pool, y_init, y_pool = train_test_split(
    X, y, train_size=50, stratify=y, random_state=42
)

# Инициализация системы активного обучения
model = RandomForestClassifier(n_estimators=100, random_state=42)
al_system = ActiveLearningSystem(model, strategy='uncertainty')
al_system.initialize(X_init, y_init, X_pool, y_pool)

# Цикл активного обучения
n_iterations = 30
queries_per_iteration = 10
accuracy_history = []

for iteration in range(n_iterations):
    # Обучение модели
    al_system.train()
    
    # Оценка качества на всём исходном наборе данных
    accuracy = al_system.model.score(X, y)
    accuracy_history.append(accuracy)
    
    print(f"Итерация {iteration}: размечено {len(al_system.X_labeled)} объектов, "
          f"точность {accuracy:.4f}")
    
    # Запрос новых объектов для разметки
    if len(al_system.X_pool) >= queries_per_iteration:
        query_indices = al_system.query(queries_per_iteration)
        al_system.update(query_indices)
    else:
        break

# Сравнение с пассивным обучением
passive_accuracies = []
X_passive_labeled = X_init.copy()
y_passive_labeled = y_init.copy()
X_passive_pool = X_pool.copy()
y_passive_pool = y_pool.copy()

for iteration in range(n_iterations):
    model_passive = RandomForestClassifier(n_estimators=100, random_state=42)
    model_passive.fit(X_passive_labeled, y_passive_labeled)
    accuracy = model_passive.score(X, y)
    passive_accuracies.append(accuracy)
    
    # Случайный выбор объектов для разметки
    if len(X_passive_pool) >= queries_per_iteration:
        random_indices = np.random.choice(
            len(X_passive_pool), 
            queries_per_iteration, 
            replace=False
        )
        X_passive_labeled = np.vstack([X_passive_labeled, X_passive_pool[random_indices]])
        y_passive_labeled = np.hstack([y_passive_labeled, y_passive_pool[random_indices]])
        
        mask = np.ones(len(X_passive_pool), dtype=bool)
        mask[random_indices] = False
        X_passive_pool = X_passive_pool[mask]
        y_passive_pool = y_passive_pool[mask]

# Визуализация эффективности активного обучения
plt.figure(figsize=(12, 6))
labeled_counts = np.arange(len(accuracy_history)) * queries_per_iteration + len(X_init)
plt.plot(labeled_counts, accuracy_history, 'b-o', label='Активное обучение (неопределённость)', linewidth=2)
plt.plot(labeled_counts, passive_accuracies, 'r--s', label='Пассивное обучение (случайный выбор)', linewidth=2)
plt.xlabel('Количество размеченных объектов')
plt.ylabel('Точность на полном наборе данных')
plt.title('Эффективность активного обучения')
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig('active_learning_efficiency.png')
plt.close()

from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

import numpy as np

class ActiveLearningSystem:
    def __init__(self, model, strategy='uncertainty'):
        self.model = model
        self.strategy = strategy
        self.X_labeled = None
        self.y_labeled = None
        self.X_pool = None
        self.y_pool_true = None
    
    def initialize(self, X_labeled, y_labeled, X_pool, y_pool_true):
        self.X_labeled = X_labeled.copy()
        self.y_labeled = y_labeled.copy()
        self.X_pool = X_pool.copy()
        self.y_pool_true = y_pool_true.copy()
    
    def query_uncertainty(self, n_queries):
        probabilities = self.model.predict_proba(self.X_pool)
        uncertainties = 1 - np.max(probabilities, axis=1)
        query_indices = np.argsort(uncertainties)[-n_queries:][::-1]
        return query_indices
    
    def query_margin(self, n_queries):
        probabilities = self.model.predict_proba(self.X_pool)
        sorted_probs = np.sort(probabilities, axis=1)
        margins = sorted_probs[:, -1] - sorted_probs[:, -2]
        query_indices = np.argsort(margins)[:n_queries]
        return query_indices
    
    def query_entropy(self, n_queries):
        probabilities = self.model.predict_proba(self.X_pool)
        probabilities = np.clip(probabilities, 1e-10, 1.0)
        entropy = -np.sum(probabilities * np.log2(probabilities), axis=1)
        query_indices = np.argsort(entropy)[-n_queries:][::-1]
        return query_indices
    
    def query(self, n_queries):
        if self.strategy == 'uncertainty':
            indices = self.query_uncertainty(n_queries)
        elif self.strategy == 'margin':
            indices = self.query_margin(n_queries)
        elif self.strategy == 'entropy':
            indices = self.query_entropy(n_queries)
        else:
            raise ValueError(f"Неизвестная стратегия: {self.strategy}")
        
        return indices
    
    def update(self, query_indices):
        # Получение истинных меток из пула
        new_X = self.X_pool[query_indices]
        new_y = self.y_pool_true[query_indices]
        
        # Добавление в размеченный набор
        self.X_labeled = np.vstack([self.X_labeled, new_X])
        self.y_labeled = np.hstack([self.y_labeled, new_y])
        
        # Удаление из пула
        mask = np.ones(len(self.X_pool), dtype=bool)
        mask[query_indices] = False
        self.X_pool = self.X_pool[mask]
        self.y_pool_true = self.y_pool_true[mask]
    
    def train(self):
        self.model.fit(self.X_labeled, self.y_labeled)

# Генерация данных для демонстрации
X, y = make_classification(
    n_samples=5000,
    n_features=50,
    n_informative=30,
    n_redundant=20,
    n_classes=3,
    random_state=42
)

# Разделение на начальный размеченный набор и пул для запросов
X_init, X_pool, y_init, y_pool = train_test_split(
    X, y, train_size=50, stratify=y, random_state=42
)

# Инициализация системы активного обучения
model = RandomForestClassifier(n_estimators=100, random_state=42)
al_system = ActiveLearningSystem(model, strategy='uncertainty')
al_system.initialize(X_init, y_init, X_pool, y_pool)

# Цикл активного обучения
n_iterations = 30
queries_per_iteration = 10
accuracy_history = []

for iteration in range(n_iterations):
    # Обучение модели
    al_system.train()
    
    # Оценка качества на всём исходном наборе данных
    accuracy = al_system.model.score(X, y)
    accuracy_history.append(accuracy)
    
    print(f"Итерация {iteration}: размечено {len(al_system.X_labeled)} объектов, "
          f"точность {accuracy:.4f}")
    
    # Запрос новых объектов для разметки
    if len(al_system.X_pool) >= queries_per_iteration:
        query_indices = al_system.query(queries_per_iteration)
        al_system.update(query_indices)
    else:
        break

# Сравнение с пассивным обучением
passive_accuracies = []
X_passive_labeled = X_init.copy()
y_passive_labeled = y_init.copy()
X_passive_pool = X_pool.copy()
y_passive_pool = y_pool.copy()

for iteration in range(n_iterations):
    model_passive = RandomForestClassifier(n_estimators=100, random_state=42)
    model_passive.fit(X_passive_labeled, y_passive_labeled)
    accuracy = model_passive.score(X, y)
    passive_accuracies.append(accuracy)
    
    # Случайный выбор объектов для разметки
    if len(X_passive_pool) >= queries_per_iteration:
        random_indices = np.random.choice(
            len(X_passive_pool), 
            queries_per_iteration, 
            replace=False
        )
        X_passive_labeled = np.vstack([X_passive_labeled, X_passive_pool[random_indices]])
        y_passive_labeled = np.hstack([y_passive_labeled, y_passive_pool[random_indices]])
        
        mask = np.ones(len(X_passive_pool), dtype=bool)
        mask[random_indices] = False
        X_passive_pool = X_passive_pool[mask]
        y_passive_pool = y_passive_pool[mask]

# Визуализация эффективности активного обучения
plt.figure(figsize=(12, 6))
labeled_counts = np.arange(len(accuracy_history)) * queries_per_iteration + len(X_init)
plt.plot(labeled_counts, accuracy_history, 'b-o', label='Активное обучение (неопределённость)', linewidth=2)
plt.plot(labeled_counts, passive_accuracies, 'r--s', label='Пассивное обучение (случайный выбор)', linewidth=2)
plt.xlabel('Количество размеченных объектов')
plt.ylabel('Точность на полном наборе данных')
plt.title('Эффективность активного обучения')
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig('active_learning_efficiency.png')
plt.close()