fixed some issues with oracle support, and added excel export

This commit is contained in:
frj 2024-12-30 21:29:49 +01:00
parent 65d9dd809d
commit 23acbb5918
14 changed files with 2117 additions and 22 deletions

3
.gitignore vendored
View File

@ -9,4 +9,7 @@ wheels/
# Virtual environments
.venv
*.csv
*.xslx
output/*
local.db
243_22562_220-pycharm-support-libs

7
.idea/dataSources.xml generated
View File

@ -15,5 +15,12 @@
<jdbc-url>jdbc:sqlite:local.db</jdbc-url>
<working-dir>$ProjectFileDir$</working-dir>
</data-source>
<data-source source="LOCAL" name="local" uuid="c59114c7-62f9-4dfd-a08f-f552076a028f">
<driver-ref>sqlite.xerial</driver-ref>
<synchronize>true</synchronize>
<jdbc-driver>org.sqlite.JDBC</jdbc-driver>
<jdbc-url>jdbc:sqlite:C:\work\repos\db_interactor\local.db</jdbc-url>
<working-dir>$ProjectFileDir$</working-dir>
</data-source>
</component>
</project>

View File

@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.13 virtualenv at ~/Repos/database_interacter/.venv" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.13 virtualenv at C:\work\repos\db_interactor\.venv" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

31
.idea/jupyter-settings.xml generated Normal file
View File

@ -0,0 +1,31 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="JupyterPersistentConnectionParameters">
<option name="knownRemoteServers">
<list>
<JupyterConnectionParameters>
<option name="authType" value="notebook" />
<option name="urlString" value="http://localhost:8888/" />
<authParams2>
<map />
</authParams2>
</JupyterConnectionParameters>
</list>
</option>
<option name="moduleParameters">
<map>
<entry key="$PROJECT_DIR$/.idea/database_interacter.iml">
<value>
<JupyterConnectionParameters>
<option name="authType" value="notebook" />
<option name="urlString" value="http://localhost:8888/" />
<authParams2>
<map />
</authParams2>
</JupyterConnectionParameters>
</value>
</entry>
</map>
</option>
</component>
</project>

2
.idea/misc.xml generated
View File

@ -3,5 +3,5 @@
<component name="Black">
<option name="sdkName" value="Python 3.13" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13 virtualenv at ~/Repos/database_interacter/.venv" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13 virtualenv at C:\work\repos\db_interactor\.venv" project-jdk-type="Python SDK" />
</project>

2
.idea/sqldialects.xml generated
View File

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="SqlDialectMappings">
<file url="file://$PROJECT_DIR$/query.sql" dialect="PostgreSQL" />
<file url="file://$PROJECT_DIR$/query.sql" dialect="OracleSqlPlus" />
</component>
</project>

View File

@ -1,11 +1,12 @@
import logging
import os
import sqlalchemy as sq
import sqlparse
import pandas as pd
from config import DatabaseConfig, DatabaseType
from keepass import KeePass
from models import Municipality
from models import Municipality, ExportType
import time
@ -14,6 +15,7 @@ class DBAdapter:
_database_config: DatabaseConfig
_has_tables_been_initialized: bool = False
_logger: logging.Logger
_output_folder: str = 'output/'
def __init__(self, keepass: KeePass, database_config: DatabaseConfig, logger: logging.Logger):
self._database_config = database_config
connection_string: str
@ -24,7 +26,7 @@ class DBAdapter:
case DatabaseType.PSQL:
connection_string = f'postgresql+pg8000://{keepass_entry.name}:{keepass_entry.password}@{self._database_config.host}/{self._database_config.name}'
case DatabaseType.ORCL:
connection_string = f'oracle:{keepass_entry.name}:{keepass_entry.password}@{self._database_config.host}:{self._database_config.port}:{self._database_config.ssid}'
connection_string = f'oracle+cx_oracle://{keepass_entry.name}:{keepass_entry.password}@{self._database_config.host}:{self._database_config.port}/{self._database_config.ssid}'
case DatabaseType.SQLITE:
connection_string = f'sqlite:///{self._database_config.host}'
case _:
@ -64,18 +66,35 @@ class DBAdapter:
match self._database_config.type:
case DatabaseType.ORCL:
conn.execute(sq.text(f"alter session set current_schema = '{schema}'"))
conn.execute(sq.text(f"alter session set current_schema = {schema}"))
case DatabaseType.PSQL:
conn.execute(sq.text(f"set schema '{schema}'"))
case _:
raise Exception(f'Database type {self.database_config.type} is not supported for readonly transactions')
def _verify_singular_query(self, query: str):
self._logger.info(f'Verifying query: {query}')
self._logger.info(f'Verifying query')
if len(self._split_query_to_singular_queries(query)) > 1:
self._logger.critical(f'Multiple queries found for query: {query}')
raise Exception(f'Multiple queries found in {query}')
def _generate_filename(self, filename: str) -> str:
try:
os.mkdir(self._output_folder)
except FileExistsError:
pass
return f'{self._output_folder}{filename}'
def _export_to_file(self, export_type, output_file_name, result):
match export_type:
case ExportType.CSV:
output_file_name += '.csv'
result.to_csv(output_file_name, index=False, sep=';', encoding='utf-8')
case ExportType.EXCEL:
output_file_name += '.xlsx'
result.to_excel(output_file_name, index=False)
self._logger.info(f'Created file {output_file_name}')
def run_query(self, query: str, read_only = True) -> sq.CursorResult:
"""
Runs a single SQL query and returns the result as a CursorResult.
@ -110,7 +129,7 @@ class DBAdapter:
query = self._read_and_sql_file_and_strip_for_comments(filename)
return self.run_query(query, read_only)
def run_sql_file_export_to_csv(self, schema: str | None = None, input_name: str = "query.sql", output_name: str = "export.csv", read_only = True):
def run_sql_file_export_to_file(self, schema: str | None = None, input_name: str = "query.sql", output_name: str = "export", read_only = True, export_type = ExportType.CSV):
"""
Runs a single SQL query and creates a csv file with the given output name and resulting contents.
If more than one query, throws an error
@ -130,7 +149,7 @@ class DBAdapter:
if schema is not None:
self._set_schema(conn, schema)
result = self._extract_dataframe(conn, query, read_only)
result.to_csv(output_name, index=False, sep=';')
self._export_to_file(export_type, self._generate_filename(output_name), result)
def _extract_dataframe(self, conn: sq.Connection, query: str, read_only: bool, schema: str | None = None) -> pd.DataFrame:
result: pd.DataFrame
@ -153,17 +172,16 @@ class DBAdapter:
raise e
return result
def run_sql_file_export_to_csv_multiple_schemas(self, municipalities: list[Municipality], base_output_name: str = "", input_name: str = "query.sql", read_only = True):
def run_sql_file_export_to_file_multiple_schemas(self, municipalities: list[Municipality], base_output_name: str = "", input_name: str = "query.sql", read_only = True, export_type = ExportType.CSV):
query = self._read_and_sql_file_and_strip_for_comments(input_name)
self._logger.info(f'Running on {len(municipalities)}')
self._logger.info(f'Running on {len(municipalities)} schemas')
self._logger.info(f'Running query: "{query}"')
with self._engine.connect() as conn:
for index, municipality in enumerate(municipalities):
self._logger.info(f'({index + 1}/{len(municipalities)}) running for municipality {municipality.name}')
result = self._extract_dataframe(conn, query, read_only, schema = municipality.schema)
output_file_name = f'{base_output_name}{municipality.name}.csv'
result.to_csv(output_file_name, index=False, sep=';')
self._logger.info(f'Created file {output_file_name}')
output_file_name = self._generate_filename(f'{base_output_name}{municipality.name}')
self._export_to_file(export_type, output_file_name, result)
def run_sql_file_multiple_statements(self, filename: str = "query.sql", read_only = False):
"""

2007
jupyter.ipynb Normal file

File diff suppressed because it is too large Load Diff

View File

@ -32,14 +32,14 @@ class KeePass:
def get_db_credentials(self) -> KeePassEntry:
self._logger.info(f'Searching for database credentials on credential: {self._kee_pass_config.db_credentials_name}')
group = self._kee_pass
group = None
if self._kee_pass_config.db_credentials_group.strip() is not None and self._kee_pass_config.db_credentials_group.strip() != "":
self._logger.info(f'Searching in group {self._kee_pass_config.db_credentials_group}')
group = self._kee_pass.find_entries(name=self._kee_pass_config.db_credentials_name)
group = self._kee_pass.find_groups(name=self._kee_pass_config.db_credentials_group)[0]
else:
self._logger.info('Searching in root')
group = group.find_entries(title=self._kee_pass_config.db_credentials_name)
group = self._kee_pass.find_entries(title=self._kee_pass_config.db_credentials_name, group=group)
if group is None:
self._logger.critical(f'Group {self._kee_pass_config.db_credentials_group} not found')

View File

@ -11,6 +11,6 @@ config = Config()
keepass = KeePass(config.kee_pass, logger)
db_adapter = DBAdapter(keepass, config.database, logger)
#db_adapter.run_sql_file_export_to_csv_multiple_schemas([Municipality('test', '123456789', 'op_test', '123')])
db_adapter.run_sql_file_export_to_file_multiple_schemas([Municipality('København Kommune', '64942212', 'KY_0101', '101')])
db_adapter.run_sql_file_multiple_statements()
#db_adapter.run_sql_file_multiple_statements()

View File

@ -1,4 +1,10 @@
from dataclasses import dataclass
from enum import Enum
class ExportType(Enum):
CSV = 1
EXCEL = 2
@dataclass
class Municipality:

View File

@ -6,6 +6,7 @@ readme = "README.md"
requires-python = ">=3.13"
dependencies = [
"cx-oracle>=8.3.0",
"openpyxl>=3.1.5",
"pandas>=2.2.3",
"pg8000>=1.31.2",
"pykeepass>=4.1.0.post1",

View File

@ -1,2 +1 @@
SELECT * FROM public.simple_healthcheck_counters shc;
SELECT * FROM public.simple_healthcheck_counters shc;
SELECT 1 FROM DUAL;

23
uv.lock generated
View File

@ -86,6 +86,7 @@ version = "0.1.0"
source = { virtual = "." }
dependencies = [
{ name = "cx-oracle" },
{ name = "openpyxl" },
{ name = "pandas" },
{ name = "pg8000" },
{ name = "pykeepass" },
@ -97,6 +98,7 @@ dependencies = [
[package.metadata]
requires-dist = [
{ name = "cx-oracle", specifier = ">=8.3.0" },
{ name = "openpyxl", specifier = ">=3.1.5" },
{ name = "pandas", specifier = ">=2.2.3" },
{ name = "pg8000", specifier = ">=1.31.2" },
{ name = "pykeepass", specifier = ">=4.1.0.post1" },
@ -105,6 +107,15 @@ requires-dist = [
{ name = "toml", specifier = ">=0.10.2" },
]
[[package]]
name = "et-xmlfile"
version = "2.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059 },
]
[[package]]
name = "lxml"
version = "5.3.0"
@ -158,6 +169,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ce/e1/e0d06ec34036c92b43aef206efe99a5f5f04e12c776eab82a36e00c40afc/numpy-2.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:d35717333b39d1b6bb8433fa758a55f1081543de527171543a2b710551d40881", size = 12692303 },
]
[[package]]
name = "openpyxl"
version = "3.1.5"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "et-xmlfile" },
]
sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910 },
]
[[package]]
name = "pandas"
version = "2.2.3"