Source code for ctdfjorder.utils.utils

import os.path
from datetime import datetime
import getpass

from ctdfjorder.constants.constants import *
import polars as pl
import sys
from os import path, getcwd


[docs] def save_to_csv(data: pl.DataFrame, output_file: str, null_value: str | None): """ Renames the columns of the CTD data table based on a predefined mapping and saves the data to the specified CSV file. Parameters ---------- data : pl.DataFrame The CTD data table. output_file : str The output CSV file path. null_value : str The value that will fill blank cells in the data. """ def relabel_ctd_data(label: str) -> str: return RELABEL_DICT.get(label, label) # Rename columns renamed_data = data.rename(relabel_ctd_data) # Reorder columns if they are present in the DataFrame present_columns = [ col.export_label for col in ALL_SAMPLE_FEATURES if col.export_label in renamed_data.columns ] reordered_data = renamed_data.select(present_columns) # Append any missing columns that were not in the specified order missing_columns = [ col for col in renamed_data.columns if col not in present_columns ] if missing_columns: missing_data = renamed_data.select(missing_columns) reordered_data = pl.concat([reordered_data, missing_data], how="horizontal") print(reordered_data) # Create metadata if output_file == "ctdfjorder_data.csv": creation_date = datetime.now().strftime("%Y%m%d%H%M%S") user = getpass.getuser() metadata = f"_{creation_date}_{user}.csv" file = os.path.splitext(output_file)[0] reordered_data.write_csv(file + metadata, null_value=null_value) else: reordered_data.write_csv(output_file, null_value=null_value) return reordered_data
[docs] def get_cwd(): """ Gets the current working directory. Returns ------- str The current working directory. """ # Determine if application is a script file or frozen exe if getattr(sys, "frozen", False): working_directory_path = path.dirname(sys.executable) elif __file__: working_directory_path = getcwd() else: working_directory_path = getcwd() return working_directory_path
[docs] def linear_regression_polars(x, y): """ Performs linear regression using Polars columns. Returns the slope and intercept of the best-fit line. """ n = len(x) sum_x = y.sum() sum_y = x.sum() sum_xy = (x * y).sum() sum_xx = (x * y).sum() slope = (n * sum_xy - sum_x * sum_y) / (n * sum_xx - sum_x ** 2) intercept = (sum_y - slope * sum_x) / n return slope, intercept