fixed some issues with oracle support, and added excel export

This commit is contained in:
frj 2024-12-30 21:29:49 +01:00
parent 65d9dd809d
commit 23acbb5918
14 changed files with 2117 additions and 22 deletions

3
.gitignore vendored
View File

@ -9,4 +9,7 @@ wheels/
# Virtual environments # Virtual environments
.venv .venv
*.csv *.csv
*.xslx
output/*
local.db local.db
243_22562_220-pycharm-support-libs

7
.idea/dataSources.xml generated
View File

@ -15,5 +15,12 @@
<jdbc-url>jdbc:sqlite:local.db</jdbc-url> <jdbc-url>jdbc:sqlite:local.db</jdbc-url>
<working-dir>$ProjectFileDir$</working-dir> <working-dir>$ProjectFileDir$</working-dir>
</data-source> </data-source>
<data-source source="LOCAL" name="local" uuid="c59114c7-62f9-4dfd-a08f-f552076a028f">
<driver-ref>sqlite.xerial</driver-ref>
<synchronize>true</synchronize>
<jdbc-driver>org.sqlite.JDBC</jdbc-driver>
<jdbc-url>jdbc:sqlite:C:\work\repos\db_interactor\local.db</jdbc-url>
<working-dir>$ProjectFileDir$</working-dir>
</data-source>
</component> </component>
</project> </project>

View File

@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4"> <module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager"> <component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" /> <content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.13 virtualenv at ~/Repos/database_interacter/.venv" jdkType="Python SDK" /> <orderEntry type="jdk" jdkName="Python 3.13 virtualenv at C:\work\repos\db_interactor\.venv" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
</component> </component>
</module> </module>

31
.idea/jupyter-settings.xml generated Normal file
View File

@ -0,0 +1,31 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="JupyterPersistentConnectionParameters">
<option name="knownRemoteServers">
<list>
<JupyterConnectionParameters>
<option name="authType" value="notebook" />
<option name="urlString" value="http://localhost:8888/" />
<authParams2>
<map />
</authParams2>
</JupyterConnectionParameters>
</list>
</option>
<option name="moduleParameters">
<map>
<entry key="$PROJECT_DIR$/.idea/database_interacter.iml">
<value>
<JupyterConnectionParameters>
<option name="authType" value="notebook" />
<option name="urlString" value="http://localhost:8888/" />
<authParams2>
<map />
</authParams2>
</JupyterConnectionParameters>
</value>
</entry>
</map>
</option>
</component>
</project>

2
.idea/misc.xml generated
View File

@ -3,5 +3,5 @@
<component name="Black"> <component name="Black">
<option name="sdkName" value="Python 3.13" /> <option name="sdkName" value="Python 3.13" />
</component> </component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13 virtualenv at ~/Repos/database_interacter/.venv" project-jdk-type="Python SDK" /> <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13 virtualenv at C:\work\repos\db_interactor\.venv" project-jdk-type="Python SDK" />
</project> </project>

2
.idea/sqldialects.xml generated
View File

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="SqlDialectMappings"> <component name="SqlDialectMappings">
<file url="file://$PROJECT_DIR$/query.sql" dialect="PostgreSQL" /> <file url="file://$PROJECT_DIR$/query.sql" dialect="OracleSqlPlus" />
</component> </component>
</project> </project>

View File

@ -1,11 +1,12 @@
import logging import logging
import os
import sqlalchemy as sq import sqlalchemy as sq
import sqlparse import sqlparse
import pandas as pd import pandas as pd
from config import DatabaseConfig, DatabaseType from config import DatabaseConfig, DatabaseType
from keepass import KeePass from keepass import KeePass
from models import Municipality from models import Municipality, ExportType
import time import time
@ -14,6 +15,7 @@ class DBAdapter:
_database_config: DatabaseConfig _database_config: DatabaseConfig
_has_tables_been_initialized: bool = False _has_tables_been_initialized: bool = False
_logger: logging.Logger _logger: logging.Logger
_output_folder: str = 'output/'
def __init__(self, keepass: KeePass, database_config: DatabaseConfig, logger: logging.Logger): def __init__(self, keepass: KeePass, database_config: DatabaseConfig, logger: logging.Logger):
self._database_config = database_config self._database_config = database_config
connection_string: str connection_string: str
@ -24,7 +26,7 @@ class DBAdapter:
case DatabaseType.PSQL: case DatabaseType.PSQL:
connection_string = f'postgresql+pg8000://{keepass_entry.name}:{keepass_entry.password}@{self._database_config.host}/{self._database_config.name}' connection_string = f'postgresql+pg8000://{keepass_entry.name}:{keepass_entry.password}@{self._database_config.host}/{self._database_config.name}'
case DatabaseType.ORCL: case DatabaseType.ORCL:
connection_string = f'oracle:{keepass_entry.name}:{keepass_entry.password}@{self._database_config.host}:{self._database_config.port}:{self._database_config.ssid}' connection_string = f'oracle+cx_oracle://{keepass_entry.name}:{keepass_entry.password}@{self._database_config.host}:{self._database_config.port}/{self._database_config.ssid}'
case DatabaseType.SQLITE: case DatabaseType.SQLITE:
connection_string = f'sqlite:///{self._database_config.host}' connection_string = f'sqlite:///{self._database_config.host}'
case _: case _:
@ -64,18 +66,35 @@ class DBAdapter:
match self._database_config.type: match self._database_config.type:
case DatabaseType.ORCL: case DatabaseType.ORCL:
conn.execute(sq.text(f"alter session set current_schema = '{schema}'")) conn.execute(sq.text(f"alter session set current_schema = {schema}"))
case DatabaseType.PSQL: case DatabaseType.PSQL:
conn.execute(sq.text(f"set schema '{schema}'")) conn.execute(sq.text(f"set schema '{schema}'"))
case _: case _:
raise Exception(f'Database type {self.database_config.type} is not supported for readonly transactions') raise Exception(f'Database type {self.database_config.type} is not supported for readonly transactions')
def _verify_singular_query(self, query: str): def _verify_singular_query(self, query: str):
self._logger.info(f'Verifying query: {query}') self._logger.info(f'Verifying query')
if len(self._split_query_to_singular_queries(query)) > 1: if len(self._split_query_to_singular_queries(query)) > 1:
self._logger.critical(f'Multiple queries found for query: {query}') self._logger.critical(f'Multiple queries found for query: {query}')
raise Exception(f'Multiple queries found in {query}') raise Exception(f'Multiple queries found in {query}')
def _generate_filename(self, filename: str) -> str:
try:
os.mkdir(self._output_folder)
except FileExistsError:
pass
return f'{self._output_folder}{filename}'
def _export_to_file(self, export_type, output_file_name, result):
match export_type:
case ExportType.CSV:
output_file_name += '.csv'
result.to_csv(output_file_name, index=False, sep=';', encoding='utf-8')
case ExportType.EXCEL:
output_file_name += '.xlsx'
result.to_excel(output_file_name, index=False)
self._logger.info(f'Created file {output_file_name}')
def run_query(self, query: str, read_only = True) -> sq.CursorResult: def run_query(self, query: str, read_only = True) -> sq.CursorResult:
""" """
Runs a single SQL query and returns the result as a CursorResult. Runs a single SQL query and returns the result as a CursorResult.
@ -110,7 +129,7 @@ class DBAdapter:
query = self._read_and_sql_file_and_strip_for_comments(filename) query = self._read_and_sql_file_and_strip_for_comments(filename)
return self.run_query(query, read_only) return self.run_query(query, read_only)
def run_sql_file_export_to_csv(self, schema: str | None = None, input_name: str = "query.sql", output_name: str = "export.csv", read_only = True): def run_sql_file_export_to_file(self, schema: str | None = None, input_name: str = "query.sql", output_name: str = "export", read_only = True, export_type = ExportType.CSV):
""" """
Runs a single SQL query and creates a csv file with the given output name and resulting contents. Runs a single SQL query and creates a csv file with the given output name and resulting contents.
If more than one query, throws an error If more than one query, throws an error
@ -130,7 +149,7 @@ class DBAdapter:
if schema is not None: if schema is not None:
self._set_schema(conn, schema) self._set_schema(conn, schema)
result = self._extract_dataframe(conn, query, read_only) result = self._extract_dataframe(conn, query, read_only)
result.to_csv(output_name, index=False, sep=';') self._export_to_file(export_type, self._generate_filename(output_name), result)
def _extract_dataframe(self, conn: sq.Connection, query: str, read_only: bool, schema: str | None = None) -> pd.DataFrame: def _extract_dataframe(self, conn: sq.Connection, query: str, read_only: bool, schema: str | None = None) -> pd.DataFrame:
result: pd.DataFrame result: pd.DataFrame
@ -153,17 +172,16 @@ class DBAdapter:
raise e raise e
return result return result
def run_sql_file_export_to_csv_multiple_schemas(self, municipalities: list[Municipality], base_output_name: str = "", input_name: str = "query.sql", read_only = True): def run_sql_file_export_to_file_multiple_schemas(self, municipalities: list[Municipality], base_output_name: str = "", input_name: str = "query.sql", read_only = True, export_type = ExportType.CSV):
query = self._read_and_sql_file_and_strip_for_comments(input_name) query = self._read_and_sql_file_and_strip_for_comments(input_name)
self._logger.info(f'Running on {len(municipalities)}') self._logger.info(f'Running on {len(municipalities)} schemas')
self._logger.info(f'Running query: "{query}"') self._logger.info(f'Running query: "{query}"')
with self._engine.connect() as conn: with self._engine.connect() as conn:
for index, municipality in enumerate(municipalities): for index, municipality in enumerate(municipalities):
self._logger.info(f'({index + 1}/{len(municipalities)}) running for municipality {municipality.name}') self._logger.info(f'({index + 1}/{len(municipalities)}) running for municipality {municipality.name}')
result = self._extract_dataframe(conn, query, read_only, schema = municipality.schema) result = self._extract_dataframe(conn, query, read_only, schema = municipality.schema)
output_file_name = f'{base_output_name}{municipality.name}.csv' output_file_name = self._generate_filename(f'{base_output_name}{municipality.name}')
result.to_csv(output_file_name, index=False, sep=';') self._export_to_file(export_type, output_file_name, result)
self._logger.info(f'Created file {output_file_name}')
def run_sql_file_multiple_statements(self, filename: str = "query.sql", read_only = False): def run_sql_file_multiple_statements(self, filename: str = "query.sql", read_only = False):
""" """

2007
jupyter.ipynb Normal file

File diff suppressed because it is too large Load Diff

View File

@ -32,14 +32,14 @@ class KeePass:
def get_db_credentials(self) -> KeePassEntry: def get_db_credentials(self) -> KeePassEntry:
self._logger.info(f'Searching for database credentials on credential: {self._kee_pass_config.db_credentials_name}') self._logger.info(f'Searching for database credentials on credential: {self._kee_pass_config.db_credentials_name}')
group = self._kee_pass group = None
if self._kee_pass_config.db_credentials_group.strip() is not None and self._kee_pass_config.db_credentials_group.strip() != "": if self._kee_pass_config.db_credentials_group.strip() is not None and self._kee_pass_config.db_credentials_group.strip() != "":
self._logger.info(f'Searching in group {self._kee_pass_config.db_credentials_group}') self._logger.info(f'Searching in group {self._kee_pass_config.db_credentials_group}')
group = self._kee_pass.find_entries(name=self._kee_pass_config.db_credentials_name) group = self._kee_pass.find_groups(name=self._kee_pass_config.db_credentials_group)[0]
else: else:
self._logger.info('Searching in root') self._logger.info('Searching in root')
group = group.find_entries(title=self._kee_pass_config.db_credentials_name) group = self._kee_pass.find_entries(title=self._kee_pass_config.db_credentials_name, group=group)
if group is None: if group is None:
self._logger.critical(f'Group {self._kee_pass_config.db_credentials_group} not found') self._logger.critical(f'Group {self._kee_pass_config.db_credentials_group} not found')

View File

@ -11,6 +11,6 @@ config = Config()
keepass = KeePass(config.kee_pass, logger) keepass = KeePass(config.kee_pass, logger)
db_adapter = DBAdapter(keepass, config.database, logger) db_adapter = DBAdapter(keepass, config.database, logger)
#db_adapter.run_sql_file_export_to_csv_multiple_schemas([Municipality('test', '123456789', 'op_test', '123')]) db_adapter.run_sql_file_export_to_file_multiple_schemas([Municipality('København Kommune', '64942212', 'KY_0101', '101')])
db_adapter.run_sql_file_multiple_statements() #db_adapter.run_sql_file_multiple_statements()

View File

@ -1,4 +1,10 @@
from dataclasses import dataclass from dataclasses import dataclass
from enum import Enum
class ExportType(Enum):
CSV = 1
EXCEL = 2
@dataclass @dataclass
class Municipality: class Municipality:

View File

@ -6,6 +6,7 @@ readme = "README.md"
requires-python = ">=3.13" requires-python = ">=3.13"
dependencies = [ dependencies = [
"cx-oracle>=8.3.0", "cx-oracle>=8.3.0",
"openpyxl>=3.1.5",
"pandas>=2.2.3", "pandas>=2.2.3",
"pg8000>=1.31.2", "pg8000>=1.31.2",
"pykeepass>=4.1.0.post1", "pykeepass>=4.1.0.post1",

View File

@ -1,2 +1 @@
SELECT * FROM public.simple_healthcheck_counters shc; SELECT 1 FROM DUAL;
SELECT * FROM public.simple_healthcheck_counters shc;

23
uv.lock generated
View File

@ -86,6 +86,7 @@ version = "0.1.0"
source = { virtual = "." } source = { virtual = "." }
dependencies = [ dependencies = [
{ name = "cx-oracle" }, { name = "cx-oracle" },
{ name = "openpyxl" },
{ name = "pandas" }, { name = "pandas" },
{ name = "pg8000" }, { name = "pg8000" },
{ name = "pykeepass" }, { name = "pykeepass" },
@ -97,6 +98,7 @@ dependencies = [
[package.metadata] [package.metadata]
requires-dist = [ requires-dist = [
{ name = "cx-oracle", specifier = ">=8.3.0" }, { name = "cx-oracle", specifier = ">=8.3.0" },
{ name = "openpyxl", specifier = ">=3.1.5" },
{ name = "pandas", specifier = ">=2.2.3" }, { name = "pandas", specifier = ">=2.2.3" },
{ name = "pg8000", specifier = ">=1.31.2" }, { name = "pg8000", specifier = ">=1.31.2" },
{ name = "pykeepass", specifier = ">=4.1.0.post1" }, { name = "pykeepass", specifier = ">=4.1.0.post1" },
@ -105,6 +107,15 @@ requires-dist = [
{ name = "toml", specifier = ">=0.10.2" }, { name = "toml", specifier = ">=0.10.2" },
] ]
[[package]]
name = "et-xmlfile"
version = "2.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059 },
]
[[package]] [[package]]
name = "lxml" name = "lxml"
version = "5.3.0" version = "5.3.0"
@ -158,6 +169,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ce/e1/e0d06ec34036c92b43aef206efe99a5f5f04e12c776eab82a36e00c40afc/numpy-2.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:d35717333b39d1b6bb8433fa758a55f1081543de527171543a2b710551d40881", size = 12692303 }, { url = "https://files.pythonhosted.org/packages/ce/e1/e0d06ec34036c92b43aef206efe99a5f5f04e12c776eab82a36e00c40afc/numpy-2.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:d35717333b39d1b6bb8433fa758a55f1081543de527171543a2b710551d40881", size = 12692303 },
] ]
[[package]]
name = "openpyxl"
version = "3.1.5"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "et-xmlfile" },
]
sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910 },
]
[[package]] [[package]]
name = "pandas" name = "pandas"
version = "2.2.3" version = "2.2.3"