diff --git a/.gitignore b/.gitignore
index fd3178c..0c288c3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,4 +8,5 @@ wheels/
# Virtual environments
.venv
-*.csv
\ No newline at end of file
+*.csv
+local.db
\ No newline at end of file
diff --git a/.idea/dataSources.xml b/.idea/dataSources.xml
index 8817b7c..c88b885 100644
--- a/.idea/dataSources.xml
+++ b/.idea/dataSources.xml
@@ -8,5 +8,12 @@
jdbc:postgresql://192.168.1.44:5432/postgres
$ProjectFileDir$
+
+ sqlite.xerial
+ true
+ org.sqlite.JDBC
+ jdbc:sqlite:local.db
+ $ProjectFileDir$
+
\ No newline at end of file
diff --git a/config.py b/config.py
index 4e8ca9f..7c82af6 100644
--- a/config.py
+++ b/config.py
@@ -1,3 +1,4 @@
+import logging
from enum import Enum
import toml
@@ -7,6 +8,7 @@ from dataclasses import dataclass
class DatabaseType(Enum):
PSQL = 1
ORCL = 2
+ SQLITE = 3
@dataclass
class DatabaseConfig:
@@ -25,6 +27,8 @@ class DatabaseConfig:
self._database_type = DatabaseType.PSQL
case 'ORCL':
self._database_type = DatabaseType.ORCL
+ case 'SQLITE':
+ self._database_type = DatabaseType.SQLITE
case _:
self._database_type = None
@@ -32,6 +36,19 @@ class DatabaseConfig:
self._database_ssid = config["DATABASE_SSID"]
self._database_port = config["DATABASE_PORT"]
+ @classmethod
+ def create(cls, database_name: str, database_type: DatabaseType, database_ssid: str, database_port: str, host: str):
+ config = {
+ "HOST": host,
+ "DATABASE_TYPE": database_type,
+ "DATABASE_NAME": database_name,
+ "DATABASE_SSID": database_ssid,
+ "DATABASE_PORT": database_port,
+ }
+ obj = cls.__new__(cls)
+ cls.__init__(obj, config)
+ return obj
+
@property
def host(self) -> str:
return self._host
diff --git a/db_adapter.py b/db_adapter.py
index 91314c2..b2b40b8 100644
--- a/db_adapter.py
+++ b/db_adapter.py
@@ -1,78 +1,135 @@
+import logging
+
import sqlalchemy as sq
import sqlparse
import pandas as pd
-
from config import DatabaseConfig, DatabaseType
from keepass import KeePass
-
-
-def _read_and_sql_file_and_strip_for_comments(filename: str):
- query: str
- with open(filename, 'r') as f:
- query = f.read()
- query = sqlparse.format(query, strip_comments=True)
- return query
+from models import Municipality
+import time
class DBAdapter:
_engine: sq.Engine
_database_config: DatabaseConfig
- def __init__(self, keepass: KeePass, database_config: DatabaseConfig):
+ _has_tables_been_initialized: bool = False
+ _logger: logging.Logger
+ def __init__(self, keepass: KeePass, database_config: DatabaseConfig, logger: logging.Logger):
self._database_config = database_config
connection_string: str
keepass_entry = keepass.get_db_credentials()
+ self._logger = logger
match self._database_config.type:
case DatabaseType.PSQL:
connection_string = f'postgresql+pg8000://{keepass_entry.name}:{keepass_entry.password}@{self._database_config.host}/{self._database_config.name}'
case DatabaseType.ORCL:
connection_string = f'oracle:{keepass_entry.name}:{keepass_entry.password}@{self._database_config.host}:{self._database_config.port}:{self._database_config.ssid}'
+ case DatabaseType.SQLITE:
+ connection_string = f'sqlite:///{self._database_config.host}'
case _:
raise Exception(f'Database type {database_config.type} is not supported')
-
+ logger.info(f'Initializing database {database_config.host}:{database_config.name}')
self._engine: sq.Engine = sq.create_engine(connection_string)
+ logger.info('Database initialized')
+
+ def _read_and_sql_file_and_strip_for_comments(self, filename: str):
+ query: str
+ self._logger.info(f'Reading file {filename}')
+ with open(filename, 'r') as f:
+ query = f.read()
+ self._logger.info(f'Stripping comments from file {filename}')
+ query = sqlparse.format(query, strip_comments=True)
+ return query
def _set_transaction_readonly(self, conn: sq.Connection):
+ self._logger.info('Setting transaction to readonly.')
if not conn.in_transaction():
raise Exception('Connection is not in a transaction')
match self._database_config.type:
- case DatabaseType.PSQL:
- conn.execute(sq.text('SET TRANSACTION READ ONLY'))
- case DatabaseType.ORCL:
+ case DatabaseType.PSQL | DatabaseType.ORCL:
conn.execute(sq.text('SET TRANSACTION READ ONLY'))
case _:
raise Exception(f'Database type {self.database_config.type} is not supported for readonly transactions')
+
+ def _set_schema(self, conn: sq.Connection, schema: str):
+ self._logger.info(f'Setting schema to "{schema}"')
+ if not conn.in_transaction():
+ raise Exception('Connection is not in a transaction')
+
+ match self._database_config.type:
+ case DatabaseType.ORCL:
+ conn.execute(sq.text(f"alter session set current_schema = '{schema}'"))
+ case DatabaseType.PSQL:
+ conn.execute(sq.text(f"set schema '{schema}'"))
+ case _:
+ raise Exception(f'Database type {self.database_config.type} is not supported for readonly transactions')
+
def run_query(self, query: str, read_only = True) -> sq.CursorResult:
result: sq.CursorResult
+ self._logger.info(f'Running query: "{query}"')
with self._engine.connect() as conn:
with conn.begin():
+ self._logger.info("Starting transaction")
try:
if read_only:
self._set_transaction_readonly(conn)
+ start = time.time()
result = conn.execute(sq.text(query))
+ end = time.time()
+ self._logger.info(f'Query took {end - start} seconds')
conn.commit()
except Exception as e:
conn.rollback()
raise e
+ self._logger.info(f'Transaction commited')
return result
def run_sql_file_one_statement(self, filename: str = "query.sql", read_only = True) -> sq.CursorResult:
- query = _read_and_sql_file_and_strip_for_comments(filename)
+ query = self._read_and_sql_file_and_strip_for_comments(filename)
return self.run_query(query, read_only)
- def run_sql_file_export_to_csv(self, input_name: str = "query.sql", output_name: str = "export.csv", read_only = True):
+ def run_sql_file_export_to_csv(self, schema: str | None = None, input_name: str = "query.sql", output_name: str = "export.csv", read_only = True):
result: pd.DataFrame
- query = _read_and_sql_file_and_strip_for_comments(input_name)
+
+ query = self._read_and_sql_file_and_strip_for_comments(input_name)
+
+ self._logger.info(f'Running query: "{query}"')
with self._engine.connect() as conn:
- with conn.begin():
- try:
- if read_only:
- self._set_transaction_readonly(conn)
- result = pd.read_sql(query, conn)
- conn.commit()
- except Exception as e:
- conn.rollback()
- raise e
+ if schema is not None:
+ self._set_schema(conn, schema)
+ result = self._extract_dataframe(conn, query, read_only)
result.to_csv(output_name, index=False, sep=';')
+
+ def _extract_dataframe(self, conn: sq.Connection, query: str, read_only: bool, schema: str | None = None) -> pd.DataFrame:
+ result: pd.DataFrame
+ with conn.begin():
+ self._logger.info("Starting transaction")
+ try:
+ if read_only:
+ self._set_transaction_readonly(conn)
+ if schema is not None:
+ self._set_schema(conn, schema)
+ start = time.time()
+ result = pd.read_sql(query, conn)
+ end = time.time()
+ self._logger.info(f'Query took {(end - start):.4f} seconds')
+ conn.commit()
+ except Exception as e:
+ conn.rollback()
+ raise e
+ return result
+
+ def run_sql_file_export_to_csv_multiple_schemas(self, municipalities: list[Municipality], base_output_name: str = "", input_name: str = "query.sql", read_only = True):
+ query = self._read_and_sql_file_and_strip_for_comments(input_name)
+ self._logger.info(f'Running on {len(municipalities)}')
+ self._logger.info(f'Running query: "{query}"')
+ with self._engine.connect() as conn:
+ for index, municipality in enumerate(municipalities):
+ self._logger.info(f'({index}/{len(municipalities)} running for municipality {municipality.name})')
+ result = self._extract_dataframe(conn, query, read_only, schema = municipality.schema)
+ output_file_name = f'{base_output_name}{municipality.name}.csv'
+ result.to_csv(output_file_name, index=False, sep=';')
+ self._logger.info(f'Created file {output_file_name}')
diff --git a/keepass.py b/keepass.py
index d82f2e5..2f83e30 100644
--- a/keepass.py
+++ b/keepass.py
@@ -1,3 +1,5 @@
+import logging
+
from pykeepass import PyKeePass
from config import KeePassConfig
import getpass
@@ -20,22 +22,32 @@ class KeePassEntry:
class KeePass:
- def __init__(self, config: KeePassConfig):
+ def __init__(self, config: KeePassConfig, logger: logging.Logger):
+ self._logger: logging.Logger = logger
self._kee_pass_config: KeePassConfig = config
self._password: str = getpass.getpass(f'KeePass password for {config.path}: ')
+ self._logger.info(f'Initializing connection to keepass file {config.path}')
self._kee_pass: PyKeePass = PyKeePass(config.path, password=self._password)
+ self._logger.info(f'Successfully connected to keepass')
def get_db_credentials(self) -> KeePassEntry:
+ self._logger.info(f'Searching for database credentials on credential: {self._kee_pass_config.db_credentials_name}')
group = self._kee_pass
- if self._kee_pass_config.db_credentials_group.strip() != "" and self._kee_pass_config.db_credentials_group.strip() is not None:
+ if self._kee_pass_config.db_credentials_group.strip() is not None and self._kee_pass_config.db_credentials_group.strip() != "":
+ self._logger.info(f'Searching in group {self._kee_pass_config.db_credentials_group}')
group = self._kee_pass.find_entries(name=self._kee_pass_config.db_credentials_name)
+ else:
+ self._logger.info('Searching in root')
group = group.find_entries(title=self._kee_pass_config.db_credentials_name)
if group is None:
+ self._logger.critical(f'Group {self._kee_pass_config.db_credentials_group} not found')
raise Exception(f'Group {self._kee_pass_config.db_credentials_group} not found')
if len(group) != 1:
+ self._logger.critical(f'Could not find password, found {len(group)} entries')
raise Exception(f'Could not find password, found {len(group)} entries')
+ self._logger.info(f'Found credentials for database for username {group[0].username}')
return KeePassEntry(group[0].username, group[0].password)
\ No newline at end of file
diff --git a/logger.py b/logger.py
new file mode 100644
index 0000000..ca290a2
--- /dev/null
+++ b/logger.py
@@ -0,0 +1,92 @@
+import logging
+import os
+import sqlite3
+
+DEFAULT_SEPARATOR = '|'
+DEFAULT_DATA_TYPE = 'TEXT'
+
+
+#WARNING: attributes must be choosen from https://docs.python.org/3/library/logging.html#formatter-objects
+DEFAULT_ATTRIBUTES_LIST = ['asctime', 'levelname', 'name', 'message']
+
+
+class SQLiteHandler(logging.Handler):
+ """
+ Logging handler for SQLite
+ Based on Yarin Kessler's sqlite_handler.py https://gist.github.com/ykessler/2662203#file_sqlite_handler.py
+ """
+
+ def __init__(self, database:str = "local.db", table = "log", attributes_list=None):
+ """
+ SQLiteHandler class constructor
+ Parameters:
+ self: instance of the class
+ database: database
+ table: log table name
+ attributes_list: log table columns
+ Returns:
+ None
+ """
+ #super(SQLiteHandler, self).__init__() # for python 2.X
+ super().__init__() # for python 3.X
+ if attributes_list is None:
+ attributes_list = DEFAULT_ATTRIBUTES_LIST
+ self.database = database
+ self.table = table
+ self.attributes = attributes_list
+
+ # Create table if needed
+ create_table_sql = 'CREATE TABLE IF NOT EXISTS ' + self.table + ' (' + ((' ' + DEFAULT_DATA_TYPE + ', ').join(self.attributes)) + ' ' + DEFAULT_DATA_TYPE + ');'
+ #print(create_table_sql)
+ conn = sqlite3.connect(self.database)
+ conn.execute(create_table_sql)
+ conn.commit()
+ conn.close()
+
+
+ def emit(self, record):
+ """
+ Save the log record
+ Parameters:
+ self: instance of the class
+ record: log record to be saved
+ Returns:
+ None
+ """
+ # Use default formatting if no formatter is set
+ self.format(record)
+
+ #print(record.__dict__)
+ record_values = [record.__dict__[k] for k in self.attributes]
+ str_record_values = ', '.join("'{0}'".format(v.replace("'", '').replace('"', '').replace('\n', ' ')) for v in record_values)
+ #print(str_record_values)
+
+ insert_sql = 'INSERT INTO ' + self.table + ' (' + (', '.join(self.attributes)) + ') VALUES (' + str_record_values + ');'
+ #print(insert_sql)
+ conn = sqlite3.connect(self.database)
+ conn.execute(insert_sql)
+ conn.commit()
+ conn.close()
+
+
+def init_logger() -> logging.Logger:
+ # changing level we can change frome what level we want to log the events
+
+ logger = logging.getLogger(__name__)
+ logger.setLevel(logging.DEBUG)
+
+ attributes_list = ['asctime', 'levelname', 'message']
+ formatter = logging.Formatter('%(' + ((')s' + DEFAULT_SEPARATOR + '%(').join(attributes_list)) + ')s')
+
+ console_handler = logging.StreamHandler()
+ console_handler.setLevel(logging.DEBUG)
+ console_handler.setFormatter(formatter)
+
+ sql_handler = SQLiteHandler()
+ sql_handler.setLevel(logging.INFO)
+ sql_handler.setFormatter(formatter)
+
+ logger.addHandler(console_handler)
+ logger.addHandler(sql_handler)
+
+ return logger
\ No newline at end of file
diff --git a/main.py b/main.py
index c4c750c..d29900e 100644
--- a/main.py
+++ b/main.py
@@ -1,13 +1,14 @@
-from config import Config, DatabaseConfig, KeePassConfig
+from config import Config
from db_adapter import DBAdapter
from keepass import KeePass
+from logger import init_logger
+from models import Municipality
+
+logger = init_logger()
config = Config()
-print(config.kee_pass)
-print(config.database)
-keepass = KeePass(config.kee_pass)
-print(keepass.get_db_credentials())
+keepass = KeePass(config.kee_pass, logger)
-db_adapter = DBAdapter(keepass, config.database)
-db_adapter.run_sql_file_export_to_csv()
\ No newline at end of file
+db_adapter = DBAdapter(keepass, config.database, logger)
+db_adapter.run_sql_file_export_to_csv_multiple_schemas([Municipality('test', '123456789', 'op_test', '123')])
\ No newline at end of file
diff --git a/query.sql b/query.sql
index d6b9735..64b92e3 100644
--- a/query.sql
+++ b/query.sql
@@ -1 +1 @@
-SELECT * FROM simple_healthcheck_counters shc
\ No newline at end of file
+SELECT * FROM public.simple_healthcheck_counters shc
\ No newline at end of file