diff --git a/.idea/dataSources.xml b/.idea/dataSources.xml
index 907d402..77a0c08 100644
--- a/.idea/dataSources.xml
+++ b/.idea/dataSources.xml
@@ -22,5 +22,12 @@
jdbc:sqlite:C:\work\repos\db_interactor\local.db$ProjectFileDir$
+
+ sqlite.xerial
+ true
+ org.sqlite.JDBC
+ jdbc:sqlite:C:\Users\Frede\IdeaProjects\db_interactor\local.db
+ $ProjectFileDir$
+
\ No newline at end of file
diff --git a/.idea/database_interacter.iml b/.idea/database_interacter.iml
index 387eec3..8b76679 100644
--- a/.idea/database_interacter.iml
+++ b/.idea/database_interacter.iml
@@ -1,8 +1,10 @@
-
-
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 06e2318..fa9a4d0 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,5 +3,5 @@
-
+
\ No newline at end of file
diff --git a/db_adapter.py b/db_adapter.py
index 83bf793..25bceb9 100644
--- a/db_adapter.py
+++ b/db_adapter.py
@@ -16,6 +16,7 @@ class DBAdapter:
_has_tables_been_initialized: bool = False
_logger: logging.Logger
_output_folder: str = 'output/'
+
def __init__(self, keepass: KeePass, database_config: DatabaseConfig, logger: logging.Logger):
self._database_config = database_config
connection_string: str
@@ -35,7 +36,6 @@ class DBAdapter:
self._engine: sq.Engine = sq.create_engine(connection_string)
logger.info('Database initialized')
-
def _read_and_sql_file_and_strip_for_comments(self, filename: str) -> str:
query: str
self._logger.info(f'Reading file {filename}')
@@ -93,6 +93,10 @@ class DBAdapter:
case ExportType.EXCEL:
output_file_name += '.xlsx'
result.to_excel(output_file_name, index=False)
+ case ExportType.XML:
+ output_file_name += '.xml'
+ result.to_xml(output_file_name, index=False)
+
self._logger.info(f'Created file {output_file_name}')
def run_query(self, query: str, read_only = True) -> sq.CursorResult:
@@ -133,6 +137,7 @@ class DBAdapter:
"""
Runs a single SQL query and creates a csv file with the given output name and resulting contents.
If more than one query, throws an error
+ :param export_type: the type of file to export
:param schema: the schema to use
:param input_name: the name of the sql file to use
:param output_name: the name of the csv file to create
@@ -162,16 +167,20 @@ class DBAdapter:
self._set_transaction_readonly(conn)
if schema is not None:
self._set_schema(conn, schema)
- start = time.time()
- result = pd.read_sql(query, conn)
- end = time.time()
- self._logger.info(f'Query took {(end - start):.4f} seconds')
- conn.commit()
+ result = self._extract_dataframe_no_safeguards(conn, query)
except Exception as e:
conn.rollback()
raise e
return result
+ def _extract_dataframe_no_safeguards(self, conn: sq.Connection, query: str) -> pd.DataFrame:
+ result: pd.DataFrame
+ start = time.time()
+ result = pd.read_sql(query, conn)
+ end = time.time()
+ self._logger.info(f'Query took {(end - start):.4f} seconds')
+ return result
+
def run_sql_file_export_to_file_multiple_schemas(self, municipalities: list[Municipality], base_output_name: str = "", input_name: str = "query.sql", read_only = True, export_type = ExportType.CSV):
query = self._read_and_sql_file_and_strip_for_comments(input_name)
self._logger.info(f'Running on {len(municipalities)} schemas')
@@ -212,3 +221,37 @@ class DBAdapter:
conn.rollback()
raise e
self._logger.info(f'Transaction commited')
+
+
+ def run_sql_files_export_to_files_multiple_schemas(self, municipalities: list[Municipality], input_querie_file_names: list[str] = None, read_only: bool = True, export_type = ExportType.XML):
+ """"
+ Runs the list of granted sql files against the list of municipalities
+ :param export_type: the type of files to export
+ :param municipalities: a list of municipalities
+ :param input_querie_file_names: a list of sql files to run TODO: make this a pair list with sql file and translation for root_name and row_name to give the XML file the correct namespaces, consider using the stylesheet option from panda to xml https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_xml.html
+ :param read_only: if the transaction should be set too read-only to avoid changes to the database
+ :return: Nothing
+ """
+ with self._engine.connect() as conn:
+ with conn.begin():
+ if read_only:
+ self._set_transaction_readonly(conn)
+
+ for municipality_index, municipality in enumerate(municipalities):
+ self._logger.info(f'({municipality_index + 1}/{len(municipalities)}) Starting to process municipality {municipality.name} ({municipality.schema})')
+ self._set_schema(conn, municipality.schema)
+ file_prefix = f'{municipality.kommunekode}/'
+
+ for query_file_index, query_filename in enumerate(input_querie_file_names):
+ self._logger.info(f'({query_file_index + 1}/{len(municipalities)}) Starting to process query file: {query_filename}')
+ raw_query = self._read_and_sql_file_and_strip_for_comments(query_filename)
+
+ if not self._verify_singular_query(raw_query):
+ self._logger.error(f'Query file {query_filename} failed due to multiple queries')
+ raise Exception(f'Query file {query_filename} failed due to multiple queries')
+
+ dataframe = self._extract_dataframe_no_safeguards(conn, raw_query)
+ self._logger.info(f'[{municipality.kommunekode}][{query_filename}][{len(dataframe.index)}]')
+ filename = self._generate_filename(f'{file_prefix}{query_filename}')
+ self._export_to_file(export_type, filename, dataframe)
+ self._logger.info(f'Finished processing {query_filename} generated file {filename}')
diff --git a/models.py b/models.py
index 389ce58..d270658 100644
--- a/models.py
+++ b/models.py
@@ -3,6 +3,7 @@ from enum import Enum
class ExportType(Enum):
+ XML = 3
CSV = 1
EXCEL = 2
@@ -25,4 +26,8 @@ class Municipality:
@property
def schema(self):
- return self._schema
\ No newline at end of file
+ return self._schema
+
+ @property
+ def kommunekode(self):
+ return self._kommunekode
\ No newline at end of file