Started working on XML export type

This commit is contained in:
Frederik Jacobsen 2025-03-05 21:10:32 +01:00
parent 6cc3a8aaaa
commit 73c333229b
5 changed files with 67 additions and 10 deletions

7
.idea/dataSources.xml generated
View File

@ -22,5 +22,12 @@
<jdbc-url>jdbc:sqlite:C:\work\repos\db_interactor\local.db</jdbc-url>
<working-dir>$ProjectFileDir$</working-dir>
</data-source>
<data-source source="LOCAL" name="local [2]" uuid="7f194b02-693f-4588-bf16-c3d3d6759880">
<driver-ref>sqlite.xerial</driver-ref>
<synchronize>true</synchronize>
<jdbc-driver>org.sqlite.JDBC</jdbc-driver>
<jdbc-url>jdbc:sqlite:C:\Users\Frede\IdeaProjects\db_interactor\local.db</jdbc-url>
<working-dir>$ProjectFileDir$</working-dir>
</data-source>
</component>
</project>

View File

@ -1,8 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.13 virtualenv at C:\work\repos\db_interactor\.venv" jdkType="Python SDK" />
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/.venv" />
</content>
<orderEntry type="jdk" jdkName="uv (db_interactor)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

2
.idea/misc.xml generated
View File

@ -3,5 +3,5 @@
<component name="Black">
<option name="sdkName" value="Python 3.13" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13 virtualenv at C:\work\repos\db_interactor\.venv" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="uv (db_interactor)" project-jdk-type="Python SDK" />
</project>

View File

@ -16,6 +16,7 @@ class DBAdapter:
_has_tables_been_initialized: bool = False
_logger: logging.Logger
_output_folder: str = 'output/'
def __init__(self, keepass: KeePass, database_config: DatabaseConfig, logger: logging.Logger):
self._database_config = database_config
connection_string: str
@ -35,7 +36,6 @@ class DBAdapter:
self._engine: sq.Engine = sq.create_engine(connection_string)
logger.info('Database initialized')
def _read_and_sql_file_and_strip_for_comments(self, filename: str) -> str:
query: str
self._logger.info(f'Reading file {filename}')
@ -93,6 +93,10 @@ class DBAdapter:
case ExportType.EXCEL:
output_file_name += '.xlsx'
result.to_excel(output_file_name, index=False)
case ExportType.XML:
output_file_name += '.xml'
result.to_xml(output_file_name, index=False)
self._logger.info(f'Created file {output_file_name}')
def run_query(self, query: str, read_only = True) -> sq.CursorResult:
@ -133,6 +137,7 @@ class DBAdapter:
"""
Runs a single SQL query and creates a csv file with the given output name and resulting contents.
If more than one query, throws an error
:param export_type: the type of file to export
:param schema: the schema to use
:param input_name: the name of the sql file to use
:param output_name: the name of the csv file to create
@ -162,14 +167,18 @@ class DBAdapter:
self._set_transaction_readonly(conn)
if schema is not None:
self._set_schema(conn, schema)
result = self._extract_dataframe_no_safeguards(conn, query)
except Exception as e:
conn.rollback()
raise e
return result
def _extract_dataframe_no_safeguards(self, conn: sq.Connection, query: str) -> pd.DataFrame:
result: pd.DataFrame
start = time.time()
result = pd.read_sql(query, conn)
end = time.time()
self._logger.info(f'Query took {(end - start):.4f} seconds')
conn.commit()
except Exception as e:
conn.rollback()
raise e
return result
def run_sql_file_export_to_file_multiple_schemas(self, municipalities: list[Municipality], base_output_name: str = "", input_name: str = "query.sql", read_only = True, export_type = ExportType.CSV):
@ -212,3 +221,37 @@ class DBAdapter:
conn.rollback()
raise e
self._logger.info(f'Transaction commited')
def run_sql_files_export_to_files_multiple_schemas(self, municipalities: list[Municipality], input_querie_file_names: list[str] = None, read_only: bool = True, export_type = ExportType.XML):
""""
Runs the list of granted sql files against the list of municipalities
:param export_type: the type of files to export
:param municipalities: a list of municipalities
:param input_querie_file_names: a list of sql files to run TODO: make this a pair list with sql file and translation for root_name and row_name to give the XML file the correct namespaces, consider using the stylesheet option from panda to xml https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_xml.html
:param read_only: if the transaction should be set too read-only to avoid changes to the database
:return: Nothing
"""
with self._engine.connect() as conn:
with conn.begin():
if read_only:
self._set_transaction_readonly(conn)
for municipality_index, municipality in enumerate(municipalities):
self._logger.info(f'({municipality_index + 1}/{len(municipalities)}) Starting to process municipality {municipality.name} ({municipality.schema})')
self._set_schema(conn, municipality.schema)
file_prefix = f'{municipality.kommunekode}/'
for query_file_index, query_filename in enumerate(input_querie_file_names):
self._logger.info(f'({query_file_index + 1}/{len(municipalities)}) Starting to process query file: {query_filename}')
raw_query = self._read_and_sql_file_and_strip_for_comments(query_filename)
if not self._verify_singular_query(raw_query):
self._logger.error(f'Query file {query_filename} failed due to multiple queries')
raise Exception(f'Query file {query_filename} failed due to multiple queries')
dataframe = self._extract_dataframe_no_safeguards(conn, raw_query)
self._logger.info(f'[{municipality.kommunekode}][{query_filename}][{len(dataframe.index)}]')
filename = self._generate_filename(f'{file_prefix}{query_filename}')
self._export_to_file(export_type, filename, dataframe)
self._logger.info(f'Finished processing {query_filename} generated file {filename}')

View File

@ -3,6 +3,7 @@ from enum import Enum
class ExportType(Enum):
XML = 3
CSV = 1
EXCEL = 2
@ -26,3 +27,7 @@ class Municipality:
@property
def schema(self):
return self._schema
@property
def kommunekode(self):
return self._kommunekode