init

2025-08-26 23:33:29 +03:00
commit 3b238ae283
110 changed files with 3837 additions and 0 deletions
--- a/python_parser/adapters/pconfig.py
+++ b/python_parser/adapters/pconfig.py
@@ -0,0 +1,181 @@
+import re
+from functools import lru_cache
+import json
+import numpy as np
+import pandas as pd
+
+OG_IDS = {
+    "Комсомольский НПЗ": "KNPZ",
+    "Ангарская НХК": "ANHK",
+    "Ачинский НПЗ": "AchNPZ",
+    "ЕНПЗ": "BASH",
+    "УНПЗ": "UNPZ",
+    "УНХ": "UNH",
+    "Новойл": "NOV",
+    "Новокуйбышевский НПЗ": "NovKuybNPZ",
+    "Куйбышевский НПЗ": "KuybNPZ",
+    "Сызранский НПЗ": "CyzNPZ",
+    "Туапсинский НПЗ": "TuapsNPZ",
+    "Саратовский НПЗ": "SNPZ",
+    "Рязанская НПК": "RNPK",
+    "Нижневартовское НПО": "NVNPO",
+    "Красноленинский НПЗ": "KLNPZ",
+    "Пурнефтепереработка": "PurNP",
+    "ЯНОС": "YANOS",
+}
+
+SNPZ_IDS = {
+    "Висбрекинг": "SNPZ.VISB",
+    "Изомеризация": "SNPZ.IZOM",
+    "Л-24/6": "SNPZ.L24-6",
+    "ЛЧ-35-11/300": "SNPZ.L35-300",
+    "ЛЧ-35-11/600": "SNPZ.L35-600",
+    "ОЗФХ т.у.т/сут": "SNPZ.OZPH",
+    "УПНБ": "SNPZ.UPB",
+    "УПЭС": "SNPZ.UPES",
+    "ЭЛОУ АВТ-6": "SNPZ.EAVT6",
+    "Итого": "SNPZ.TOTAL",
+    "Норматив по фактическим загрузкам": "SNPZ.TOTAL.FACT",
+}
+
+
+def replace_id_in_path(file_path, new_id):
+    return file_path.replace('ID', str(new_id))
+
+
+def get_table_name(exel):
+    return re.sub(r'^data/(.+)\.(xlsm|xlsx)$', r'\1', exel)
+
+
+def normalize_and_tokenize(text):
+    if not isinstance(text, str) or not text.strip():
+        return set()
+    cleaned = re.sub(r'[^\w\s]', ' ', text.lower())
+    cleaned = cleaned.replace('ё', 'е')
+    words = [word.strip() for word in cleaned.split()]
+    return set(word for word in words if word)
+
+
+@lru_cache(maxsize=512)
+def get_object_by_name(name):
+    return get_id_by_name(name, SNPZ_IDS)
+
+
+@lru_cache(maxsize=512)
+def get_og_by_name(name):
+    return get_id_by_name(name, OG_IDS)
+
+
+def get_id_by_name(name, dictionary):
+    if not name or not isinstance(name, str):
+        return None
+
+    query_words = normalize_and_tokenize(name)
+    if not query_words:
+        return None
+
+    best_match = None
+    best_score = 0
+
+    for full_name, obj_id in dictionary.items():
+        entry_words = normalize_and_tokenize(full_name)
+        if not entry_words:
+            continue
+
+        intersection = query_words & entry_words
+        if not intersection:
+            continue
+
+        # Полное совпадение
+        if query_words == entry_words:
+            return obj_id
+
+        # Все слова из словаря есть в запросе
+        if entry_words <= query_words:
+            score = len(entry_words)
+        # Хорошее пересечение
+        elif len(intersection) >= min(2, len(entry_words), len(query_words)):
+            score = len(intersection) / max(len(query_words), len(entry_words))
+        # Одно слово (аббревиатура)
+        elif len(entry_words) == 1 and list(entry_words)[0] in query_words:
+            score = 1.0
+        else:
+            continue
+
+        if score > best_score:
+            best_score = score
+            best_match = obj_id
+
+    return best_match
+
+
+def data_to_json(data, indent=2, ensure_ascii=False):
+    """
+    Полностью безопасная сериализация данных в JSON.
+    Корректно обрабатывает:
+      - np.nan, pd.NA, None → null
+      - DataFrame, Series, numpy массивы и скаляры
+      - вложенные структуры
+    """
+    def is_nan_like(obj):
+        """Проверяет, является ли объект NaN-подобным."""
+        if obj is None:
+            return True
+        if pd.isna(obj):  # Ловит np.nan, pd.NA, pd.NaT, None
+            return True
+        return False
+
+    def convert_obj(obj):
+        # --- DataFrame ---
+        if isinstance(obj, pd.DataFrame):
+            return [convert_obj(row) for _, row in obj.iterrows()]  # каждая строка → dict
+
+        # --- Series ---
+        if isinstance(obj, pd.Series):
+            # Преобразуем индекс в значения, если нужно
+            values = [convert_obj(v) for v in obj.values]
+            # Убираем None (были NaN), но сохраняем структуру, если нужно
+            return values
+
+        # --- numpy скаляры ---
+        elif isinstance(obj, (np.integer, np.int64, np.int32)):
+            return int(obj)
+        elif isinstance(obj, (np.floating, np.float64, np.float32)):
+            return None if is_nan_like(obj) else float(obj)
+        elif isinstance(obj, np.ndarray):
+            return [convert_obj(v) for v in obj]
+
+        # --- временные метки ---
+        elif isinstance(obj, (pd.Timestamp, pd.Timedelta)):
+            return obj.isoformat() if not pd.isna(obj) else None
+        elif isinstance(obj, pd._libs.missing.NAType):  # pd.NA
+            return None
+
+        # --- рекурсия по dict и list ---
+        elif isinstance(obj, dict):
+            return {
+                key: convert_obj(value)
+                for key, value in obj.items()
+                if not is_nan_like(key)  # фильтруем NaN в ключах (недопустимы в JSON)
+            }
+
+        elif isinstance(obj, list):
+            return [convert_obj(item) for item in obj]
+
+        # --- None и NaN-подобные ---
+        elif is_nan_like(obj):
+            return None
+
+        # --- всё остальное ---
+        else:
+            try:
+                return float(obj) if isinstance(obj, (int, float)) else str(obj)
+            except Exception:
+                return str(obj)  # финальный fallback
+
+    try:
+        cleaned_data = convert_obj(data)
+        cleaned_data_str = json.dumps(cleaned_data, indent=indent, ensure_ascii=ensure_ascii)
+        return cleaned_data
+    except Exception as e:
+        raise ValueError(f"Не удалось сериализовать данные в JSON: {e}")