From c8d2fad00fb119f8c15655ebd78d8ed148bdd6cb Mon Sep 17 00:00:00 2001
From: Sukrit Sharma <sukrit.sharma@tuwien.ac.at>
Date: Wed, 2 Apr 2025 10:38:18 +0200
Subject: [PATCH] new test file
---
load_data/tests.py | 201 ++++++++++++++++++++++++++-------------------
1 file changed, 118 insertions(+), 83 deletions(-)
diff --git a/load_data/tests.py b/load_data/tests.py
index c15b698..5242602 100644
--- a/load_data/tests.py
+++ b/load_data/tests.py
@@ -1,89 +1,124 @@
-from django.test import TestCase
-from django.db import models
-from load_data.models import WienerNetztePortalData
+import os
+import logging
+from datetime import datetime
import pandas as pd
-from django_pandas.io import read_frame
-import plotly.graph_objs as go
-import datetime
-# Create your tests here.
+from django.utils.timezone import make_aware
+from load_data.models import BboData
+
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+# Define constants
+FILTER_DATE = make_aware(datetime(2020, 1, 1)) # Fix: Make datetime timezone-aware
+DP_MAPPING = {
+ 606: "Trafo 1 OA",
+ 607: "Trafo 2 OA",
+ 608: "Trafo 3 OA",
+ 613: "Summe Trafos OA",
+ 609: "Trafo 1 OC",
+ 611: "Trafo 2 OC",
+ 612: "Trafo 3 OC",
+ 618: "Summe Trafos OC",
+ 490: "Trafo 2 OX-OZ",
+ 491: "Trafo 3 OX-OZ",
+ 492: "Trafo 5 OX-OZ",
+ 667: "Summe Trafos OX-OZ",
+}
+OUTPUT_FOLDER = 'vsc_data'
+
+# Ensure the output folder exists
+os.makedirs(OUTPUT_FOLDER, exist_ok=True)
+
+
+def query_bbo_data():
+ """Query BboData for specific dp_ids."""
+ try:
+ data = BboData.objects.filter(
+ dp_id__in=DP_MAPPING.keys(), # Filter for specific dp_ids
+ utc__gt=FILTER_DATE
+ ).values('dp_id', 'utc', 'value')
+
+ if not data.exists():
+ logger.warning("No data found for the given filter criteria.")
+ return []
+
+ return list(data)
+ except Exception as e:
+ logger.error(f"Database query failed: {e}")
+ return []
+
+
+def preprocess_data(filtered_data):
+ """Convert the filtered data to a DataFrame and process it."""
+ if not filtered_data:
+ logger.warning("No data received for processing. Exiting...")
+ return pd.DataFrame()
+
+ # Convert QuerySet to DataFrame
+ df = pd.DataFrame.from_records(filtered_data)
+
+ if df.empty:
+ logger.warning("Converted DataFrame is empty. No data to process.")
+ return df
+
+ # Convert 'utc' to datetime if not already
+ df['utc'] = pd.to_datetime(df['utc'])
+
+ # Add a new column for hourly aggregation
+ df['utc_hour'] = df['utc'].dt.floor('H') # Truncate to the hour
+
+ # Group by dp_id and utc_hour, and aggregate values
+ hourly_data = df.groupby(['dp_id', 'utc_hour'])['value'].sum().reset_index()
+
+ if hourly_data.empty:
+ logger.warning("Aggregated data is empty after grouping. Exiting...")
+ return hourly_data
+
+ # Convert 'value' from Wh to kWh
+ hourly_data['value_kwh'] = hourly_data['value'] / 1000
+
+ return hourly_data
+
+
+def save_data_for_dp_id(dp_id, dp_name, dp_data):
+ """Save the data for a given dp_id to an Excel file."""
+ if dp_data.empty:
+ logger.warning(f"No data available for {dp_name}. Skipping...")
+ return
+
+ file_name = os.path.join(OUTPUT_FOLDER, f"{dp_name.replace(' ', '_')}.xlsx")
+
+ try:
+ # Save to Excel
+ dp_data[['utc_hour', 'value_kwh']].to_excel(file_name, index=False, engine='openpyxl')
+ logger.info(f"Data for {dp_name} saved in {file_name}")
+ except Exception as e:
+ logger.error(f"Error saving data for {dp_name}: {e}")
+
+
+def main():
+ """Main function to execute the data processing and saving."""
+ try:
+ # Step 1: Query the data for specific dp_ids
+ filtered_data = query_bbo_data()
+ if not filtered_data:
+ return # Exit early if no data
-# Dictionary mapping month numbers to names
-month_names = {1: "Januar", 2: "Februar", 3: "März", 4: "April", 5: "Mai", 6: "Juni", 7: "Juli", 8: "August", 9: "September", 10: "Oktober", 11: "November", 12: "Dezember"}
+ # Step 2: Preprocess the data
+ hourly_data = preprocess_data(filtered_data)
+ if hourly_data.empty:
+ return # Exit early if processed data is empty
+
+ # Step 3: Save data for each dp_id in DP_MAPPING
+ for dp_id, dp_name in DP_MAPPING.items():
+ dp_data = hourly_data[hourly_data['dp_id'] == dp_id]
-dataquery = StromrechnungenDataAll.objects.all()
+ save_data_for_dp_id(dp_id, dp_name, dp_data)
-dataframe = read_frame(dataquery)
+ except Exception as e:
+ logger.error(f"An unexpected error occurred: {e}")
-filtered_df = dataframe[(dataframe['Adresse'] == 'Getreidemarkt 9') & (dataframe['Year'].isin([2021, 2022, 2023]))]
-# Group by 'Year' and 'Adresse', and calculate the sum of 'Verbrauch_kWh'
-sum_verbrauch_kWh = filtered_df.groupby(['Year', 'Adresse'])['Verbrauch_kWh'].sum().reset_index()
-
-print(sum_verbrauch_kWh)
-
-# Sort data by date
-dataframe = dataframe.sort_values('Date')
-
-# Group by Year and Monat, and calculate the sum of Verbrauch_kWh
-df_grouped = dataframe.groupby(['Year', 'Monat']).agg({'Verbrauch_kWh': 'sum'}).reset_index()
-
-# get average from last five years
-last_year = datetime.datetime.now().year - 1
-df_average = df_grouped[(df_grouped['Year']>= last_year-5) & (df_grouped['Year']<last_year)]
-df_average = df_average.groupby(['Monat']).agg({'Verbrauch_kWh': 'mean'}).reset_index()
-df_average['Year'] = "Durchschnitt"
-
-df_grouped = pd.concat([df_grouped,df_average], ignore_index=True)
-
-
-# Define bubble size and color based on sum
-#df_grouped['bubble_size'] = df_grouped['Verbrauch_kWh'] * 0.01 # Adjust the scaling factor as needed
-#df_grouped['bubble_color'] = ['red' if x > df_grouped['Verbrauch_kWh'].mean() else 'green' for x in
-# df_grouped['Verbrauch_kWh']]
-
-# Replace month numbers with names
-df_grouped['Monat'] = df_grouped['Monat'].map(month_names)
-
-# Create Plotly scatter plot
-fig = go.Figure(data=go.Scatter(
- x=df_grouped['Monat'],
- y=df_grouped['Year'],
- mode='markers',
- marker=dict(
- size=df_grouped['bubble_size'],
- color=df_grouped['bubble_color']
- )
-))
-
-fig.update_layout(
- title="Bubble Diagram",
- xaxis_title="Monat",
- yaxis_title="Year",
- showlegend=False
-)
-
-plot_div = fig.to_html(full_html=False, default_height=500, default_width=700)
-
-dataquery = WienerNetztePortalData.objects.all()
-
-# Fetch all data
-dataquery = WienerNetztePortalData.objects.all()
-
-# Convert the query to a dataframe
-dataframe = read_frame(dataquery)
-
-# Filter the dataframe by zaehlpunkt values
-filtered_dataframe = dataframe[dataframe['zaehlpunkt'].isin(['AT0010000000000000001000014249051', 'AT0010000000000000001000015162803'])]
-
-
-
-import pandas as pd
-
-# Filter the DataFrame for years 2023 and 2024
-df_filtered = dataframe[dataframe['zaehlpunkt'].isin([2023, 2024])]
-
-# Group by 'Year' and 'Monat' and sum the 'Verbrauch_kWh' column
-monthly_sums = df_filtered.groupby(['Year', 'Monat'])['Verbrauch_kWh'].sum().reset_index()
-
-# Display the result
-print(monthly_sums)
\ No newline at end of file
+if __name__ == "__main__":
+ main()
--
GitLab