import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
from sqlalchemy import create_engine
Introduction to EMAP star
Working with EMAP star
A template JupyterNotebook for working with EMAP. The following features of this notebook, and associated files are documented here to minimise the risk of data leaks or other incidents.
- Usernames and passwords are stored in a .env file that is excluded from version control. The example
env
file at./config/env
should be edited and saved as./config/.env
. A utility functionload_env_vars()
is provided that will confirm this file exists and load the configuration into the working environment. - .gitattributes are set to strip JupyterNotebook cells when pushing to GitHub
Basic set-up
Load libraries
from utils.setup import load_env_vars
Load environment variables
Load environment variables and set-up SQLAlchemy connection engine for the EMAP Star
# Load environment variables
load_env_vars()
# Construct the PostgreSQL connection
= os.getenv('EMAP_DB_HOST')
uds_host = os.getenv('EMAP_DB_NAME')
uds_name = os.getenv('EMAP_DB_PORT')
uds_port = os.getenv('EMAP_DB_USER')
uds_user = os.getenv('EMAP_DB_PASSWORD')
uds_passwd
= create_engine(f'postgresql://{uds_user}:{uds_passwd}@{uds_host}:{uds_port}/{uds_name}') emapdb_engine
The above code is also abstracted into a function (below) but shown in long form above to make clear what we are doing.
from utils.setup import make_emap_engine
= make_emap_engine emapdb_engine
A first example script
Now use the connection to work with EMAP.
For example, let’s inspect patients currently in ED or Resus.
Here’s the SQL:
-- Example script
-- to pick out patients currently in A&E resus or majors
SELECT
vd.location_visit_id
,vd.hospital_visit_id
,vd.location_id-- ugly HL7 location string
,lo.location_string-- time admitted to that bed/theatre/scan etc.
,vd.admission_time-- time discharged from that bed
,vd.discharge_time
FROM star.location_visit vd
-- location label
INNER JOIN star.location lo ON vd.location_id = lo.location_id
WHERE
-- last few hours
> NOW() - '12 HOURS'::INTERVAL
vd.admission_time -- just CURRENT patients
AND
IS NULL
vd.discharge_time -- filter out just ED and Resus or Majors
AND
-- unpacking the HL7 string formatted as
-- Department^Ward^Bed string
'^',1) = 'ED'
SPLIT_PART(lo.location_string,AND
'^',2) ~ '(RESUS|MAJORS)'
SPLIT_PART(lo.location_string,-- sort
ORDER BY lo.location_string
;
The SQL script is stored at ../snippets/sql-vignettes/current_bed.sql
.
We can load the script, and read the results into a Pandas dataframe.
# Read the sql file into a query 'q' and the query into a dataframe
= Path('../snippets/sql-vignettes/current_bed.sql').read_text()
q = pd.read_sql_query(q, emapdb_engine) df
df.head()
Working with hospital visits
A series of three scripts
- Simply pull hospital visits
- Add in hospital numbers (MRN) and handle patient merges
- Add in patient demographics
Simply pull hospital visits
SELECT
vo.hospital_visit_id
,vo.encounter-- admission to hospital
,vo.admission_time
,vo.arrival_method
,vo.presentation_time-- discharge from hospital
-- NB: Outpatients have admission events but not discharge events
,vo.discharge_time
,vo.discharge_disposition
-- start from hospital visits
FROM star.hospital_visit vo
WHERE
-- hospital visits within the last 12 hours
> NOW() - '12 HOURS'::INTERVAL
vo.presentation_time -- emergencies
AND vo.patient_class = 'EMERGENCY'
-- attending via ambulance
AND vo.arrival_method = 'Ambulance'
-- sort descending
ORDER BY vo.presentation_time DESC
;
# Read the sql file into a query 'q' and the query into a dataframe
= Path('../snippets/sql-vignettes/hospital_visit_1.sql').read_text()
q = pd.read_sql_query(q, emapdb_engine)
df
df.head()
Add in hospital numbers (MRN) and handle patient merges
See the series of joins in the middle of the script that retrieve the live MRN. That is we recognise that patients may have had an episode of care with one MRN, and then that episode was merged with another historical MRN. One of those two MRNs will then become the ‘live’ MRN and can be used to trace the patient across what otherwise would be different identities.
SELECT
vo.hospital_visit_id
,vo.encounter
,vo.admission_time
,vo.arrival_method
,vo.presentation_time
,vo.discharge_time
,vo.discharge_disposition-- original MRN
AS original_mrn
,original_mrn.mrn -- live MRN
AS live_mrn
,live_mrn.mrn
-- start from hospital visits
FROM star.hospital_visit vo
-- get original mrn
INNER JOIN star.mrn original_mrn ON vo.mrn_id = original_mrn.mrn_id
-- get mrn to live mapping
INNER JOIN star.mrn_to_live mtl ON vo.mrn_id = mtl.mrn_id
-- get live mrn
INNER JOIN star.mrn live_mrn ON mtl.live_mrn_id = live_mrn.mrn_id
WHERE
-- hospital visits within the last 12 hours
> NOW() - '12 HOURS'::INTERVAL
vo.presentation_time -- emergencies
AND vo.patient_class = 'EMERGENCY'
-- attending via ambulance
AND vo.arrival_method = 'Ambulance'
-- sort descending
ORDER BY vo.presentation_time DESC
;
# Read the sql file into a query 'q' and the query into a dataframe
= Path('../snippets/sql-vignettes/hospital_visit_2.sql').read_text()
q = pd.read_sql_query(q, emapdb_engine)
df
df.head()
Add in patient demographics
SELECT
vo.hospital_visit_id
,vo.encounter
,vo.admission_time
,vo.arrival_method
,vo.presentation_time
,vo.discharge_time
,vo.discharge_disposition-- original MRN
AS original_mrn
,original_mrn.mrn -- live MRN
AS live_mrn
,live_mrn.mrn
-- core demographics
,cd.date_of_birth-- convert dob to age in years
'year', AGE(cd.date_of_birth)) AS age
,date_part(
,cd.sex
,cd.home_postcode-- grab initials from first and last name
CONCAT(LEFT(cd.firstname, 1), LEFT(cd.lastname, 1)) AS initials
,
-- start from hospital visits
FROM star.hospital_visit vo
INNER JOIN star.core_demographic cd ON vo.mrn_id = cd.mrn_id
-- get original mrn
INNER JOIN star.mrn original_mrn ON vo.mrn_id = original_mrn.mrn_id
-- get mrn to live mapping
INNER JOIN star.mrn_to_live mtl ON vo.mrn_id = mtl.mrn_id
-- get live mrn
INNER JOIN star.mrn live_mrn ON mtl.live_mrn_id = live_mrn.mrn_id
WHERE
-- hospital visits within the last 12 hours
> NOW() - '12 HOURS'::INTERVAL
vo.presentation_time -- emergencies
AND vo.patient_class = 'EMERGENCY'
-- attending via ambulance
AND vo.arrival_method = 'Ambulance'
-- sort descending
ORDER BY vo.presentation_time DESC
;
# Read the sql file into a query 'q' and the query into a dataframe
= Path('../snippets/sql-vignettes/hospital_visit_3.sql').read_text()
q = pd.read_sql_query(q, emapdb_engine)
df
df.head()
Working with observations
WIP: example script to work with vitals
-- Example script showing how to work with observations
-- V simple view that finds recent observations
-- for current inpatients in the last few minutes
SELECT
-- observation details
ob.visit_observation_id
,ob.hospital_visit_id
,ob.observation_datetime
--,ob.visit_observation_type_id
--,ot.id_in_application
-- label nicely
CASE
,WHEN ot.id_in_application = '10' THEN 'SpO2'
WHEN ot.id_in_application = '5' THEN 'BP'
WHEN ot.id_in_application = '3040109304' THEN 'Oxygen'
WHEN ot.id_in_application = '6' THEN 'Temp'
WHEN ot.id_in_application = '8' THEN 'Pulse'
WHEN ot.id_in_application = '9' THEN 'Resp'
WHEN ot.id_in_application = '6466' THEN 'AVPU'
END AS vital
,ob.value_as_real
,ob.value_as_text
,ob.unit
FROM
star.visit_observation ob-- observation look-up
LEFT JOIN
star.visit_observation_type oton ob.visit_observation_type_id = ot.visit_observation_type_id
WHERE
> NOW() - '5 MINS'::INTERVAL
ob.observation_datetime AND
in
ot.id_in_application
('10' --'SpO2' -- 602063230
'5' --'BP' -- 602063234
,'3040109304' --'Room Air or Oxygen' -- 602063268
,'6' --'Temp' -- 602063248
,'8' --'Pulse' -- 602063237
,'9' --'Resp' -- 602063257
,'6466' -- Level of consciousness
,
)ORDER BY ob.observation_datetime DESC
;
# Read the sql file into a query 'q' and the query into a dataframe
= Path('../snippets/sql-vignettes/vital_signs.sql').read_text()
q = pd.read_sql_query(q, emapdb_engine)
df
df.head()
Now let’s drill down on just heart rate
# Read the sql file into a query 'q' and the query into a dataframe
= Path('../snippets/sql-vignettes/heart_rate.sql').read_text()
q = pd.read_sql_query(q, emapdb_engine)
df
df.head()
import plotly.express as px
= px.histogram(df,
figx ='value_as_real',
x='Heart rate distribution at UCLH in the last 24 hours',
title={'value_as_real': 'Heart Rate'})
labels figx.show()