diff --git a/.idea/dataSources.xml b/.idea/dataSources.xml index 907d402..77a0c08 100644 --- a/.idea/dataSources.xml +++ b/.idea/dataSources.xml @@ -22,5 +22,12 @@ jdbc:sqlite:C:\work\repos\db_interactor\local.db $ProjectFileDir$ + + sqlite.xerial + true + org.sqlite.JDBC + jdbc:sqlite:C:\Users\Frede\IdeaProjects\db_interactor\local.db + $ProjectFileDir$ + \ No newline at end of file diff --git a/.idea/database_interacter.iml b/.idea/database_interacter.iml index 387eec3..8b76679 100644 --- a/.idea/database_interacter.iml +++ b/.idea/database_interacter.iml @@ -1,8 +1,10 @@ - - + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 06e2318..fa9a4d0 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,5 +3,5 @@ - + \ No newline at end of file diff --git a/db_adapter.py b/db_adapter.py index 83bf793..25bceb9 100644 --- a/db_adapter.py +++ b/db_adapter.py @@ -16,6 +16,7 @@ class DBAdapter: _has_tables_been_initialized: bool = False _logger: logging.Logger _output_folder: str = 'output/' + def __init__(self, keepass: KeePass, database_config: DatabaseConfig, logger: logging.Logger): self._database_config = database_config connection_string: str @@ -35,7 +36,6 @@ class DBAdapter: self._engine: sq.Engine = sq.create_engine(connection_string) logger.info('Database initialized') - def _read_and_sql_file_and_strip_for_comments(self, filename: str) -> str: query: str self._logger.info(f'Reading file {filename}') @@ -93,6 +93,10 @@ class DBAdapter: case ExportType.EXCEL: output_file_name += '.xlsx' result.to_excel(output_file_name, index=False) + case ExportType.XML: + output_file_name += '.xml' + result.to_xml(output_file_name, index=False) + self._logger.info(f'Created file {output_file_name}') def run_query(self, query: str, read_only = True) -> sq.CursorResult: @@ -133,6 +137,7 @@ class DBAdapter: """ Runs a single SQL query and creates a csv file with the given output name and resulting contents. If more than one query, throws an error + :param export_type: the type of file to export :param schema: the schema to use :param input_name: the name of the sql file to use :param output_name: the name of the csv file to create @@ -162,16 +167,20 @@ class DBAdapter: self._set_transaction_readonly(conn) if schema is not None: self._set_schema(conn, schema) - start = time.time() - result = pd.read_sql(query, conn) - end = time.time() - self._logger.info(f'Query took {(end - start):.4f} seconds') - conn.commit() + result = self._extract_dataframe_no_safeguards(conn, query) except Exception as e: conn.rollback() raise e return result + def _extract_dataframe_no_safeguards(self, conn: sq.Connection, query: str) -> pd.DataFrame: + result: pd.DataFrame + start = time.time() + result = pd.read_sql(query, conn) + end = time.time() + self._logger.info(f'Query took {(end - start):.4f} seconds') + return result + def run_sql_file_export_to_file_multiple_schemas(self, municipalities: list[Municipality], base_output_name: str = "", input_name: str = "query.sql", read_only = True, export_type = ExportType.CSV): query = self._read_and_sql_file_and_strip_for_comments(input_name) self._logger.info(f'Running on {len(municipalities)} schemas') @@ -212,3 +221,37 @@ class DBAdapter: conn.rollback() raise e self._logger.info(f'Transaction commited') + + + def run_sql_files_export_to_files_multiple_schemas(self, municipalities: list[Municipality], input_querie_file_names: list[str] = None, read_only: bool = True, export_type = ExportType.XML): + """" + Runs the list of granted sql files against the list of municipalities + :param export_type: the type of files to export + :param municipalities: a list of municipalities + :param input_querie_file_names: a list of sql files to run TODO: make this a pair list with sql file and translation for root_name and row_name to give the XML file the correct namespaces, consider using the stylesheet option from panda to xml https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_xml.html + :param read_only: if the transaction should be set too read-only to avoid changes to the database + :return: Nothing + """ + with self._engine.connect() as conn: + with conn.begin(): + if read_only: + self._set_transaction_readonly(conn) + + for municipality_index, municipality in enumerate(municipalities): + self._logger.info(f'({municipality_index + 1}/{len(municipalities)}) Starting to process municipality {municipality.name} ({municipality.schema})') + self._set_schema(conn, municipality.schema) + file_prefix = f'{municipality.kommunekode}/' + + for query_file_index, query_filename in enumerate(input_querie_file_names): + self._logger.info(f'({query_file_index + 1}/{len(municipalities)}) Starting to process query file: {query_filename}') + raw_query = self._read_and_sql_file_and_strip_for_comments(query_filename) + + if not self._verify_singular_query(raw_query): + self._logger.error(f'Query file {query_filename} failed due to multiple queries') + raise Exception(f'Query file {query_filename} failed due to multiple queries') + + dataframe = self._extract_dataframe_no_safeguards(conn, raw_query) + self._logger.info(f'[{municipality.kommunekode}][{query_filename}][{len(dataframe.index)}]') + filename = self._generate_filename(f'{file_prefix}{query_filename}') + self._export_to_file(export_type, filename, dataframe) + self._logger.info(f'Finished processing {query_filename} generated file {filename}') diff --git a/models.py b/models.py index 389ce58..d270658 100644 --- a/models.py +++ b/models.py @@ -3,6 +3,7 @@ from enum import Enum class ExportType(Enum): + XML = 3 CSV = 1 EXCEL = 2 @@ -25,4 +26,8 @@ class Municipality: @property def schema(self): - return self._schema \ No newline at end of file + return self._schema + + @property + def kommunekode(self): + return self._kommunekode \ No newline at end of file