ch

2025-09-02 07:15:16 +03:00
parent b8074765e3
commit 84069e4e41
24 changed files with 141 additions and 62 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,20 +1,8 @@
 # Python
 __pycache__
-__pycache__/
+*.pyc
 python_parser/__pycache__/
 python_parser/core/__pycache__/
 python_parser/adapters/__pycache__/
 python_parser/tests/__pycache__/
 python_parser/tests/test_core/__pycache__/
 python_parser/tests/test_adapters/__pycache__/
 python_parser/tests/test_app/__pycache__/
 python_parser/app/__pycache__/
 python_parser/app/schemas/__pycache__/
 python_parser/app/schemas/test_schemas/__pycache__/
 python_parser/app/schemas/test_schemas/test_core/__pycache__/
 python_parser/app/schemas/test_schemas/test_adapters/__pycache__/
 python_parser/app/schemas/test_schemas/test_app/__pycache__/
 nin_python_parser
 *.py[cod]
 *$py.class
@@ -169,5 +157,3 @@ node_modules/
 npm-debug.log*
 yarn-debug.log*
 yarn-error.log* 
 __pycache__/
--- a/python_parser/adapters/pycache/init.cpython-313.pyc
+++ b/python_parser/adapters/pycache/init.cpython-313.pyc
--- a/python_parser/adapters/pycache/pconfig.cpython-313.pyc
+++ b/python_parser/adapters/pycache/pconfig.cpython-313.pyc
--- a/python_parser/adapters/pycache/storage.cpython-313.pyc
+++ b/python_parser/adapters/pycache/storage.cpython-313.pyc
--- a/python_parser/adapters/parsers/pycache/init.cpython-313.pyc
+++ b/python_parser/adapters/parsers/pycache/init.cpython-313.pyc
--- a/python_parser/adapters/parsers/pycache/monitoring_fuel.cpython-313.pyc
+++ b/python_parser/adapters/parsers/pycache/monitoring_fuel.cpython-313.pyc
--- a/python_parser/adapters/parsers/pycache/svodka_ca.cpython-313.pyc
+++ b/python_parser/adapters/parsers/pycache/svodka_ca.cpython-313.pyc
--- a/python_parser/adapters/parsers/pycache/svodka_pm.cpython-313.pyc
+++ b/python_parser/adapters/parsers/pycache/svodka_pm.cpython-313.pyc
--- a/python_parser/adapters/parsers/monitoring_fuel.py
+++ b/python_parser/adapters/parsers/monitoring_fuel.py
@@ -4,8 +4,8 @@ import zipfile
 from typing import Dict, Tuple
 from core.ports import ParserPort
 from core.schema_utils import register_getter_from_schema, validate_params_with_schema
-from app.schemas.monitoring_fuel import MonitoringFuelTotalRequest, MonitoringFuelMonthRequest
+from app.schemas.monitoring_fuel import MonitoringFuelTotalRequest, MonitoringFuelMonthRequest, MonitoringFuelSeriesRequest
-from adapters.pconfig import data_to_json
+from adapters.pconfig import data_to_json, find_header_row
 class MonitoringFuelParser(ParserPort):
@@ -31,6 +31,14 @@ class MonitoringFuelParser(ParserPort):
            schema_class=MonitoringFuelMonthRequest,
            description="Получение данных за конкретный месяц"
        )
        register_getter_from_schema(
            parser_instance=self,
            getter_name="series_by_id_and_columns",
            method=self._get_series_by_id_and_columns,
            schema_class=MonitoringFuelSeriesRequest,
            description="Получение временных рядов по ID и колонкам"
        )
    def _get_total_by_columns(self, params: dict):
        """Агрегация данных по колонкам"""
@@ -92,30 +100,13 @@ class MonitoringFuelParser(ParserPort):
        return df_monitorings
    def find_header_row(self, file_path: str, sheet: str, search_value: str = "Установка", max_rows: int = 50) -> int:
        """Определение индекса заголовка в Excel по ключевому слову"""
        # Читаем первые max_rows строк без заголовков
        df_temp = pd.read_excel(
            file_path,
            sheet_name=sheet,
            header=None,
            nrows=max_rows,
            engine='openpyxl'
        )
        # Ищем строку, где хотя бы в одном столбце встречается искомое значение
        for idx, row in df_temp.iterrows():
            if row.astype(str).str.strip().str.contains(f"^{search_value}$", case=False, regex=True).any():
                print(f"Заголовок найден в строке {idx} (Excel: {idx + 1})")
                return idx + 1  # возвращаем индекс строки (0-based)
        raise ValueError(f"Не найдена строка с заголовком '{search_value}' в первых {max_rows} строках.")
    def parse_single(self, file, sheet, header_num=None):
        ''' Собственно парсер отчетов одного объекта'''
        # Автоопределение header_num, если не передан
        if header_num is None:
-            header_num = self.find_header_row(file, sheet, search_value="Установка")
+            header_num = find_header_row(file, sheet, search_value="Установка")
        # Читаем весь лист, начиная с найденной строки как заголовок
        df_full = pd.read_excel(
            file,
@@ -237,3 +228,47 @@ class MonitoringFuelParser(ParserPort):
        total.name = 'mean'
        return total, df_combined
    def _get_series_by_id_and_columns(self, params: dict):
        """Получение временных рядов по ID и колонкам"""
        # Валидируем параметры с помощью схемы Pydantic
        validated_params = validate_params_with_schema(params, MonitoringFuelSeriesRequest)
        columns = validated_params["columns"]
        # Проверяем, что все колонки существуют хотя бы в одном месяце
        valid_columns = set()
        for month in self.df.values():
            valid_columns.update(month.columns)
        for col in columns:
            if col not in valid_columns:
                raise ValueError(f"Колонка '{col}' не найдена ни в одном месяце")
        # Подготавливаем результат: словарь id → {col: [значения по месяцам]}
        result = {}
        # Обрабатываем месяцы от 01 до 12
        for month_key in [f"{i:02d}" for i in range(1, 13)]:
            if month_key not in self.df:
                print(f"Месяц '{month_key}' не найден в df_monitorings, пропускаем.")
                continue
            df = self.df[month_key]
            for col in columns:
                if col not in df.columns:
                    continue  # Пропускаем, если в этом месяце нет колонки
                for idx, value in df[col].items():
                    if pd.isna(value):
                        continue  # Можно пропустить NaN, или оставить как null
                    if idx not in result:
                        result[idx] = {c: [] for c in columns}
                    result[idx][col].append(value)
        # Преобразуем ключи id в строки (для JSON-совместимости)
        result_str_keys = {str(k): v for k, v in result.items()}
        return result_str_keys
--- a/python_parser/adapters/parsers/svodka_pm.py
+++ b/python_parser/adapters/parsers/svodka_pm.py
@@ -3,7 +3,7 @@ import pandas as pd
 from core.ports import ParserPort
 from core.schema_utils import register_getter_from_schema, validate_params_with_schema
 from app.schemas.svodka_pm import SvodkaPMSingleOGRequest, SvodkaPMTotalOGsRequest
-from adapters.pconfig import OG_IDS, replace_id_in_path, data_to_json
+from adapters.pconfig import SINGLE_OGS, replace_id_in_path, data_to_json, find_header_row
 class SvodkaPMParser(ParserPort):
@@ -62,30 +62,13 @@ class SvodkaPMParser(ParserPort):
        self.df = self.parse_svodka_pm_files(file_path, params)
        return self.df
    def find_header_row(self, file: str, sheet: str, search_value: str = "Итого", max_rows: int = 50) -> int:
        """Определения индекса заголовка в excel по ключевому слову"""
        # Читаем первые max_rows строк без заголовков
        df_temp = pd.read_excel(
            file,
            sheet_name=sheet,
            header=None,
            nrows=max_rows,
            engine='openpyxl'
        )
        # Ищем строку, где хотя бы в одном столбце встречается искомое значение
        for idx, row in df_temp.iterrows():
            if row.astype(str).str.strip().str.contains(f"^{search_value}$", case=False, regex=True).any():
                print(f"Заголовок найден в строке {idx} (Excel: {idx + 1})")
                return idx  # 0-based index — то, что нужно для header=
        raise ValueError(f"Не найдена строка с заголовком '{search_value}' в первых {max_rows} строках.")
    def parse_svodka_pm(self, file, sheet, header_num=None):
        ''' Собственно парсер отчетов одного ОГ для БП, ПП и факта '''
        # Автоопределение header_num, если не передан
        if header_num is None:
-            header_num = self.find_header_row(file, sheet, search_value="Итого")
+            header_num = find_header_row(file, sheet, search_value="Итого")
        # Читаем заголовки header_num и 1-2 строки данных, чтобы найти INDICATOR_ID
        df_probe = pd.read_excel(
@@ -183,7 +166,7 @@ class SvodkaPMParser(ParserPort):
        excel_plan_template = 'svodka_plan_pm_ID.xlsx'
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            file_list = zip_ref.namelist()
-            for name, id in OG_IDS.items():
+            for id in SINGLE_OGS:
                if id == 'BASH':
                    continue  # пропускаем BASH
@@ -290,11 +273,11 @@ class SvodkaPMParser(ParserPort):
        ''' Служебная функция агрегации данные по всем ОГ '''
        total_result = {}
-        for name, og_id in OG_IDS.items():
+        for og_id in SINGLE_OGS:
            if og_id == 'BASH':
                continue
-            # print(f"📊 Обработка: {name} ({og_id})")
+            # print(f"📊 Обработка: {og_id}")
            try:
                data = self.get_svodka_og(
                    pm_dict,
@@ -305,7 +288,7 @@ class SvodkaPMParser(ParserPort):
                )
                total_result[og_id] = data
            except Exception as e:
-                print(f"❌ Ошибка при обработке {name} ({og_id}): {e}")
+                print(f"❌ Ошибка при обработке {og_id}: {e}")
                total_result[og_id] = None
        return total_result
--- a/python_parser/adapters/pconfig.py
+++ b/python_parser/adapters/pconfig.py
@@ -3,6 +3,7 @@ from functools import lru_cache
 import json
 import numpy as np
 import pandas as pd
 import os
 OG_IDS = {
    "Комсомольский НПЗ": "KNPZ",
@@ -22,8 +23,37 @@ OG_IDS = {
    "Красноленинский НПЗ": "KLNPZ",
    "Пурнефтепереработка": "PurNP",
    "ЯНОС": "YANOS",
    "Уфанефтехим": "UNH",
    "РНПК": "RNPK",
    "КмсНПЗ": "KNPZ",
    "АНХК": "ANHK",
    "НК НПЗ": "NovKuybNPZ",
    "КНПЗ": "KuybNPZ",
    "СНПЗ": "CyzNPZ",
    "Нижневаторское НПО": "NVNPO",
    "ПурНП": "PurNP",    
 }
 SINGLE_OGS = [
    "KNPZ",
    "ANHK",
    "AchNPZ",
    "BASH",
    "UNPZ",
    "UNH",
    "NOV",
    "NovKuybNPZ",
    "KuybNPZ",
    "CyzNPZ",
    "TuapsNPZ",
    "SNPZ",
    "RNPK",
    "NVNPO",
    "KLNPZ",
    "PurNP",
    "YANOS",
 ]
 SNPZ_IDS = {
    "Висбрекинг": "SNPZ.VISB",
    "Изомеризация": "SNPZ.IZOM",
@@ -40,7 +70,18 @@ SNPZ_IDS = {
 def replace_id_in_path(file_path, new_id):
-    return file_path.replace('ID', str(new_id))
+    # Заменяем 'ID' на новое значение
    modified_path = file_path.replace('ID', str(new_id)) + '.xlsx'
    # Проверяем, существует ли файл
    if not os.path.exists(modified_path):
        # Меняем расширение на .xlsm
        directory, filename = os.path.split(modified_path)
        name, ext = os.path.splitext(filename)
        new_filename = name + '.xlsm'
        modified_path = os.path.join(directory, new_filename)
    return modified_path
 def get_table_name(exel):
@@ -109,6 +150,25 @@ def get_id_by_name(name, dictionary):
    return best_match
 def find_header_row(file, sheet, search_value="Итого", max_rows=50):
    ''' Определения индекса заголовка в exel по ключевому слову '''
    # Читаем первые max_rows строк без заголовков
    df_temp = pd.read_excel(
        file,
        sheet_name=sheet,
        header=None,
        nrows=max_rows
    )
    # Ищем строку, где хотя бы в одном столбце встречается искомое значение
    for idx, row in df_temp.iterrows():
        if row.astype(str).str.strip().str.contains(f"^{search_value}$", case=False, regex=True).any():
            print(f"Заголовок найден в строке {idx} (Excel: {idx + 1})")
            return idx  # 0-based index — то, что нужно для header=
    raise ValueError(f"Не найдена строка с заголовком '{search_value}' в первых {max_rows} строках.")
 def data_to_json(data, indent=2, ensure_ascii=False):
    """
    Полностью безопасная сериализация данных в JSON.
@@ -175,7 +235,6 @@ def data_to_json(data, indent=2, ensure_ascii=False):
    try:
        cleaned_data = convert_obj(data)
-        cleaned_data_str = json.dumps(cleaned_data, indent=indent, ensure_ascii=ensure_ascii)
+        return json.dumps(cleaned_data, indent=indent, ensure_ascii=ensure_ascii)
        return cleaned_data
    except Exception as e:
        raise ValueError(f"Не удалось сериализовать данные в JSON: {e}")
--- a/python_parser/app/pycache/init.cpython-313.pyc
+++ b/python_parser/app/pycache/init.cpython-313.pyc
--- a/python_parser/app/pycache/main.cpython-313.pyc
+++ b/python_parser/app/pycache/main.cpython-313.pyc
--- a/python_parser/app/schemas/pycache/init.cpython-313.pyc
+++ b/python_parser/app/schemas/pycache/init.cpython-313.pyc
--- a/python_parser/app/schemas/pycache/monitoring_fuel.cpython-313.pyc
+++ b/python_parser/app/schemas/pycache/monitoring_fuel.cpython-313.pyc
--- a/python_parser/app/schemas/pycache/server.cpython-313.pyc
+++ b/python_parser/app/schemas/pycache/server.cpython-313.pyc
--- a/python_parser/app/schemas/pycache/svodka_ca.cpython-313.pyc
+++ b/python_parser/app/schemas/pycache/svodka_ca.cpython-313.pyc
--- a/python_parser/app/schemas/pycache/svodka_pm.cpython-313.pyc
+++ b/python_parser/app/schemas/pycache/svodka_pm.cpython-313.pyc
--- a/python_parser/app/schemas/pycache/upload.cpython-313.pyc
+++ b/python_parser/app/schemas/pycache/upload.cpython-313.pyc
--- a/python_parser/app/schemas/monitoring_fuel.py
+++ b/python_parser/app/schemas/monitoring_fuel.py
@@ -32,3 +32,19 @@ class MonitoringFuelTotalRequest(BaseModel):
                "columns": ["total", "normativ"]
            }
        }
 class MonitoringFuelSeriesRequest(BaseModel):
    columns: List[str] = Field(
        ...,
        description="Массив названий выбираемых столбцов для получения временных рядов",
        example=["total", "normativ"],
        min_items=1
    )
    class Config:
        json_schema_extra = {
            "example": {
                "columns": ["total", "normativ"]
            }
        }
--- a/python_parser/core/pycache/init.cpython-313.pyc
+++ b/python_parser/core/pycache/init.cpython-313.pyc
--- a/python_parser/core/pycache/models.cpython-313.pyc
+++ b/python_parser/core/pycache/models.cpython-313.pyc
--- a/python_parser/core/pycache/ports.cpython-313.pyc
+++ b/python_parser/core/pycache/ports.cpython-313.pyc
--- a/python_parser/core/pycache/services.cpython-313.pyc
+++ b/python_parser/core/pycache/services.cpython-313.pyc