diff --git a/.gitignore b/.gitignore index 8722ece..4e1e95e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,20 +1,8 @@ # Python __pycache__ -__pycache__/ -python_parser/__pycache__/ -python_parser/core/__pycache__/ -python_parser/adapters/__pycache__/ -python_parser/tests/__pycache__/ -python_parser/tests/test_core/__pycache__/ -python_parser/tests/test_adapters/__pycache__/ -python_parser/tests/test_app/__pycache__/ -python_parser/app/__pycache__/ -python_parser/app/schemas/__pycache__/ -python_parser/app/schemas/test_schemas/__pycache__/ -python_parser/app/schemas/test_schemas/test_core/__pycache__/ -python_parser/app/schemas/test_schemas/test_adapters/__pycache__/ -python_parser/app/schemas/test_schemas/test_app/__pycache__/ +*.pyc +nin_python_parser *.py[cod] *$py.class @@ -169,5 +157,3 @@ node_modules/ npm-debug.log* yarn-debug.log* yarn-error.log* - -__pycache__/ diff --git a/python_parser/adapters/__pycache__/__init__.cpython-313.pyc b/python_parser/adapters/__pycache__/__init__.cpython-313.pyc deleted file mode 100644 index af840e8..0000000 Binary files a/python_parser/adapters/__pycache__/__init__.cpython-313.pyc and /dev/null differ diff --git a/python_parser/adapters/__pycache__/pconfig.cpython-313.pyc b/python_parser/adapters/__pycache__/pconfig.cpython-313.pyc deleted file mode 100644 index 34fa65d..0000000 Binary files a/python_parser/adapters/__pycache__/pconfig.cpython-313.pyc and /dev/null differ diff --git a/python_parser/adapters/__pycache__/storage.cpython-313.pyc b/python_parser/adapters/__pycache__/storage.cpython-313.pyc deleted file mode 100644 index 1fc1ad5..0000000 Binary files a/python_parser/adapters/__pycache__/storage.cpython-313.pyc and /dev/null differ diff --git a/python_parser/adapters/parsers/__pycache__/__init__.cpython-313.pyc b/python_parser/adapters/parsers/__pycache__/__init__.cpython-313.pyc deleted file mode 100644 index a8629ae..0000000 Binary files a/python_parser/adapters/parsers/__pycache__/__init__.cpython-313.pyc and /dev/null differ diff --git a/python_parser/adapters/parsers/__pycache__/monitoring_fuel.cpython-313.pyc b/python_parser/adapters/parsers/__pycache__/monitoring_fuel.cpython-313.pyc deleted file mode 100644 index c8ed95c..0000000 Binary files a/python_parser/adapters/parsers/__pycache__/monitoring_fuel.cpython-313.pyc and /dev/null differ diff --git a/python_parser/adapters/parsers/__pycache__/svodka_ca.cpython-313.pyc b/python_parser/adapters/parsers/__pycache__/svodka_ca.cpython-313.pyc deleted file mode 100644 index 9913883..0000000 Binary files a/python_parser/adapters/parsers/__pycache__/svodka_ca.cpython-313.pyc and /dev/null differ diff --git a/python_parser/adapters/parsers/__pycache__/svodka_pm.cpython-313.pyc b/python_parser/adapters/parsers/__pycache__/svodka_pm.cpython-313.pyc deleted file mode 100644 index 1fecb74..0000000 Binary files a/python_parser/adapters/parsers/__pycache__/svodka_pm.cpython-313.pyc and /dev/null differ diff --git a/python_parser/adapters/parsers/monitoring_fuel.py b/python_parser/adapters/parsers/monitoring_fuel.py index 7f41328..8f7875e 100644 --- a/python_parser/adapters/parsers/monitoring_fuel.py +++ b/python_parser/adapters/parsers/monitoring_fuel.py @@ -4,8 +4,8 @@ import zipfile from typing import Dict, Tuple from core.ports import ParserPort from core.schema_utils import register_getter_from_schema, validate_params_with_schema -from app.schemas.monitoring_fuel import MonitoringFuelTotalRequest, MonitoringFuelMonthRequest -from adapters.pconfig import data_to_json +from app.schemas.monitoring_fuel import MonitoringFuelTotalRequest, MonitoringFuelMonthRequest, MonitoringFuelSeriesRequest +from adapters.pconfig import data_to_json, find_header_row class MonitoringFuelParser(ParserPort): @@ -31,6 +31,14 @@ class MonitoringFuelParser(ParserPort): schema_class=MonitoringFuelMonthRequest, description="Получение данных за конкретный месяц" ) + + register_getter_from_schema( + parser_instance=self, + getter_name="series_by_id_and_columns", + method=self._get_series_by_id_and_columns, + schema_class=MonitoringFuelSeriesRequest, + description="Получение временных рядов по ID и колонкам" + ) def _get_total_by_columns(self, params: dict): """Агрегация данных по колонкам""" @@ -92,30 +100,13 @@ class MonitoringFuelParser(ParserPort): return df_monitorings - def find_header_row(self, file_path: str, sheet: str, search_value: str = "Установка", max_rows: int = 50) -> int: - """Определение индекса заголовка в Excel по ключевому слову""" - # Читаем первые max_rows строк без заголовков - df_temp = pd.read_excel( - file_path, - sheet_name=sheet, - header=None, - nrows=max_rows, - engine='openpyxl' - ) - # Ищем строку, где хотя бы в одном столбце встречается искомое значение - for idx, row in df_temp.iterrows(): - if row.astype(str).str.strip().str.contains(f"^{search_value}$", case=False, regex=True).any(): - print(f"Заголовок найден в строке {idx} (Excel: {idx + 1})") - return idx + 1 # возвращаем индекс строки (0-based) - - raise ValueError(f"Не найдена строка с заголовком '{search_value}' в первых {max_rows} строках.") def parse_single(self, file, sheet, header_num=None): ''' Собственно парсер отчетов одного объекта''' # Автоопределение header_num, если не передан if header_num is None: - header_num = self.find_header_row(file, sheet, search_value="Установка") + header_num = find_header_row(file, sheet, search_value="Установка") # Читаем весь лист, начиная с найденной строки как заголовок df_full = pd.read_excel( file, @@ -237,3 +228,47 @@ class MonitoringFuelParser(ParserPort): total.name = 'mean' return total, df_combined + + def _get_series_by_id_and_columns(self, params: dict): + """Получение временных рядов по ID и колонкам""" + # Валидируем параметры с помощью схемы Pydantic + validated_params = validate_params_with_schema(params, MonitoringFuelSeriesRequest) + + columns = validated_params["columns"] + + # Проверяем, что все колонки существуют хотя бы в одном месяце + valid_columns = set() + for month in self.df.values(): + valid_columns.update(month.columns) + + for col in columns: + if col not in valid_columns: + raise ValueError(f"Колонка '{col}' не найдена ни в одном месяце") + + # Подготавливаем результат: словарь id → {col: [значения по месяцам]} + result = {} + + # Обрабатываем месяцы от 01 до 12 + for month_key in [f"{i:02d}" for i in range(1, 13)]: + if month_key not in self.df: + print(f"Месяц '{month_key}' не найден в df_monitorings, пропускаем.") + continue + + df = self.df[month_key] + + for col in columns: + if col not in df.columns: + continue # Пропускаем, если в этом месяце нет колонки + + for idx, value in df[col].items(): + if pd.isna(value): + continue # Можно пропустить NaN, или оставить как null + + if idx not in result: + result[idx] = {c: [] for c in columns} + + result[idx][col].append(value) + + # Преобразуем ключи id в строки (для JSON-совместимости) + result_str_keys = {str(k): v for k, v in result.items()} + return result_str_keys diff --git a/python_parser/adapters/parsers/svodka_pm.py b/python_parser/adapters/parsers/svodka_pm.py index df473ca..c69b026 100644 --- a/python_parser/adapters/parsers/svodka_pm.py +++ b/python_parser/adapters/parsers/svodka_pm.py @@ -3,7 +3,7 @@ import pandas as pd from core.ports import ParserPort from core.schema_utils import register_getter_from_schema, validate_params_with_schema from app.schemas.svodka_pm import SvodkaPMSingleOGRequest, SvodkaPMTotalOGsRequest -from adapters.pconfig import OG_IDS, replace_id_in_path, data_to_json +from adapters.pconfig import SINGLE_OGS, replace_id_in_path, data_to_json, find_header_row class SvodkaPMParser(ParserPort): @@ -62,30 +62,13 @@ class SvodkaPMParser(ParserPort): self.df = self.parse_svodka_pm_files(file_path, params) return self.df - def find_header_row(self, file: str, sheet: str, search_value: str = "Итого", max_rows: int = 50) -> int: - """Определения индекса заголовка в excel по ключевому слову""" - # Читаем первые max_rows строк без заголовков - df_temp = pd.read_excel( - file, - sheet_name=sheet, - header=None, - nrows=max_rows, - engine='openpyxl' - ) - # Ищем строку, где хотя бы в одном столбце встречается искомое значение - for idx, row in df_temp.iterrows(): - if row.astype(str).str.strip().str.contains(f"^{search_value}$", case=False, regex=True).any(): - print(f"Заголовок найден в строке {idx} (Excel: {idx + 1})") - return idx # 0-based index — то, что нужно для header= - - raise ValueError(f"Не найдена строка с заголовком '{search_value}' в первых {max_rows} строках.") def parse_svodka_pm(self, file, sheet, header_num=None): ''' Собственно парсер отчетов одного ОГ для БП, ПП и факта ''' # Автоопределение header_num, если не передан if header_num is None: - header_num = self.find_header_row(file, sheet, search_value="Итого") + header_num = find_header_row(file, sheet, search_value="Итого") # Читаем заголовки header_num и 1-2 строки данных, чтобы найти INDICATOR_ID df_probe = pd.read_excel( @@ -183,7 +166,7 @@ class SvodkaPMParser(ParserPort): excel_plan_template = 'svodka_plan_pm_ID.xlsx' with zipfile.ZipFile(zip_path, 'r') as zip_ref: file_list = zip_ref.namelist() - for name, id in OG_IDS.items(): + for id in SINGLE_OGS: if id == 'BASH': continue # пропускаем BASH @@ -290,11 +273,11 @@ class SvodkaPMParser(ParserPort): ''' Служебная функция агрегации данные по всем ОГ ''' total_result = {} - for name, og_id in OG_IDS.items(): + for og_id in SINGLE_OGS: if og_id == 'BASH': continue - # print(f"📊 Обработка: {name} ({og_id})") + # print(f"📊 Обработка: {og_id}") try: data = self.get_svodka_og( pm_dict, @@ -305,7 +288,7 @@ class SvodkaPMParser(ParserPort): ) total_result[og_id] = data except Exception as e: - print(f"❌ Ошибка при обработке {name} ({og_id}): {e}") + print(f"❌ Ошибка при обработке {og_id}: {e}") total_result[og_id] = None return total_result diff --git a/python_parser/adapters/pconfig.py b/python_parser/adapters/pconfig.py index 12be990..8a5e8ab 100644 --- a/python_parser/adapters/pconfig.py +++ b/python_parser/adapters/pconfig.py @@ -3,6 +3,7 @@ from functools import lru_cache import json import numpy as np import pandas as pd +import os OG_IDS = { "Комсомольский НПЗ": "KNPZ", @@ -22,8 +23,37 @@ OG_IDS = { "Красноленинский НПЗ": "KLNPZ", "Пурнефтепереработка": "PurNP", "ЯНОС": "YANOS", + "Уфанефтехим": "UNH", + "РНПК": "RNPK", + "КмсНПЗ": "KNPZ", + "АНХК": "ANHK", + "НК НПЗ": "NovKuybNPZ", + "КНПЗ": "KuybNPZ", + "СНПЗ": "CyzNPZ", + "Нижневаторское НПО": "NVNPO", + "ПурНП": "PurNP", } +SINGLE_OGS = [ + "KNPZ", + "ANHK", + "AchNPZ", + "BASH", + "UNPZ", + "UNH", + "NOV", + "NovKuybNPZ", + "KuybNPZ", + "CyzNPZ", + "TuapsNPZ", + "SNPZ", + "RNPK", + "NVNPO", + "KLNPZ", + "PurNP", + "YANOS", +] + SNPZ_IDS = { "Висбрекинг": "SNPZ.VISB", "Изомеризация": "SNPZ.IZOM", @@ -40,7 +70,18 @@ SNPZ_IDS = { def replace_id_in_path(file_path, new_id): - return file_path.replace('ID', str(new_id)) + # Заменяем 'ID' на новое значение + modified_path = file_path.replace('ID', str(new_id)) + '.xlsx' + + # Проверяем, существует ли файл + if not os.path.exists(modified_path): + # Меняем расширение на .xlsm + directory, filename = os.path.split(modified_path) + name, ext = os.path.splitext(filename) + new_filename = name + '.xlsm' + modified_path = os.path.join(directory, new_filename) + + return modified_path def get_table_name(exel): @@ -109,6 +150,25 @@ def get_id_by_name(name, dictionary): return best_match +def find_header_row(file, sheet, search_value="Итого", max_rows=50): + ''' Определения индекса заголовка в exel по ключевому слову ''' + # Читаем первые max_rows строк без заголовков + df_temp = pd.read_excel( + file, + sheet_name=sheet, + header=None, + nrows=max_rows + ) + + # Ищем строку, где хотя бы в одном столбце встречается искомое значение + for idx, row in df_temp.iterrows(): + if row.astype(str).str.strip().str.contains(f"^{search_value}$", case=False, regex=True).any(): + print(f"Заголовок найден в строке {idx} (Excel: {idx + 1})") + return idx # 0-based index — то, что нужно для header= + + raise ValueError(f"Не найдена строка с заголовком '{search_value}' в первых {max_rows} строках.") + + def data_to_json(data, indent=2, ensure_ascii=False): """ Полностью безопасная сериализация данных в JSON. @@ -175,7 +235,6 @@ def data_to_json(data, indent=2, ensure_ascii=False): try: cleaned_data = convert_obj(data) - cleaned_data_str = json.dumps(cleaned_data, indent=indent, ensure_ascii=ensure_ascii) - return cleaned_data + return json.dumps(cleaned_data, indent=indent, ensure_ascii=ensure_ascii) except Exception as e: raise ValueError(f"Не удалось сериализовать данные в JSON: {e}") diff --git a/python_parser/app/__pycache__/__init__.cpython-313.pyc b/python_parser/app/__pycache__/__init__.cpython-313.pyc deleted file mode 100644 index c812a10..0000000 Binary files a/python_parser/app/__pycache__/__init__.cpython-313.pyc and /dev/null differ diff --git a/python_parser/app/__pycache__/main.cpython-313.pyc b/python_parser/app/__pycache__/main.cpython-313.pyc deleted file mode 100644 index 98546da..0000000 Binary files a/python_parser/app/__pycache__/main.cpython-313.pyc and /dev/null differ diff --git a/python_parser/app/schemas/__pycache__/__init__.cpython-313.pyc b/python_parser/app/schemas/__pycache__/__init__.cpython-313.pyc deleted file mode 100644 index 1aec30e..0000000 Binary files a/python_parser/app/schemas/__pycache__/__init__.cpython-313.pyc and /dev/null differ diff --git a/python_parser/app/schemas/__pycache__/monitoring_fuel.cpython-313.pyc b/python_parser/app/schemas/__pycache__/monitoring_fuel.cpython-313.pyc deleted file mode 100644 index cd2c909..0000000 Binary files a/python_parser/app/schemas/__pycache__/monitoring_fuel.cpython-313.pyc and /dev/null differ diff --git a/python_parser/app/schemas/__pycache__/server.cpython-313.pyc b/python_parser/app/schemas/__pycache__/server.cpython-313.pyc deleted file mode 100644 index 0d41df4..0000000 Binary files a/python_parser/app/schemas/__pycache__/server.cpython-313.pyc and /dev/null differ diff --git a/python_parser/app/schemas/__pycache__/svodka_ca.cpython-313.pyc b/python_parser/app/schemas/__pycache__/svodka_ca.cpython-313.pyc deleted file mode 100644 index 0e04d96..0000000 Binary files a/python_parser/app/schemas/__pycache__/svodka_ca.cpython-313.pyc and /dev/null differ diff --git a/python_parser/app/schemas/__pycache__/svodka_pm.cpython-313.pyc b/python_parser/app/schemas/__pycache__/svodka_pm.cpython-313.pyc deleted file mode 100644 index 88553be..0000000 Binary files a/python_parser/app/schemas/__pycache__/svodka_pm.cpython-313.pyc and /dev/null differ diff --git a/python_parser/app/schemas/__pycache__/upload.cpython-313.pyc b/python_parser/app/schemas/__pycache__/upload.cpython-313.pyc deleted file mode 100644 index c91559a..0000000 Binary files a/python_parser/app/schemas/__pycache__/upload.cpython-313.pyc and /dev/null differ diff --git a/python_parser/app/schemas/monitoring_fuel.py b/python_parser/app/schemas/monitoring_fuel.py index 4239f05..784262a 100644 --- a/python_parser/app/schemas/monitoring_fuel.py +++ b/python_parser/app/schemas/monitoring_fuel.py @@ -32,3 +32,19 @@ class MonitoringFuelTotalRequest(BaseModel): "columns": ["total", "normativ"] } } + + +class MonitoringFuelSeriesRequest(BaseModel): + columns: List[str] = Field( + ..., + description="Массив названий выбираемых столбцов для получения временных рядов", + example=["total", "normativ"], + min_items=1 + ) + + class Config: + json_schema_extra = { + "example": { + "columns": ["total", "normativ"] + } + } diff --git a/python_parser/core/__pycache__/__init__.cpython-313.pyc b/python_parser/core/__pycache__/__init__.cpython-313.pyc deleted file mode 100644 index 876b1a0..0000000 Binary files a/python_parser/core/__pycache__/__init__.cpython-313.pyc and /dev/null differ diff --git a/python_parser/core/__pycache__/models.cpython-313.pyc b/python_parser/core/__pycache__/models.cpython-313.pyc deleted file mode 100644 index fcb688a..0000000 Binary files a/python_parser/core/__pycache__/models.cpython-313.pyc and /dev/null differ diff --git a/python_parser/core/__pycache__/ports.cpython-313.pyc b/python_parser/core/__pycache__/ports.cpython-313.pyc deleted file mode 100644 index 6bf9520..0000000 Binary files a/python_parser/core/__pycache__/ports.cpython-313.pyc and /dev/null differ diff --git a/python_parser/core/__pycache__/services.cpython-313.pyc b/python_parser/core/__pycache__/services.cpython-313.pyc deleted file mode 100644 index d655af4..0000000 Binary files a/python_parser/core/__pycache__/services.cpython-313.pyc and /dev/null differ