Friday, June 25, 2021

Loading VAERS data into Postgres

For those who want to load VAERS data into Postgres, here's how I did it.

In case you have technical questions -> twitter: @waukema. 

/*

DROP TABLE vaers.symptoms;


CREATE TABLE vaers.symptoms

(

  id serial NOT NULL,

  vaers_id char(7),

  symptom1 character varying,

  symptomversion1 numeric,

  symptom2 character varying,

  symptomversion2 numeric,

  symptom3 character varying,

  symptomversion3 numeric,

  symptom4 character varying,

  symptomversion4 numeric,

  symptom5 character varying,

  symptomversion5 numeric

)

WITH (

  OIDS=FALSE

);



--msdos windows server 2008 postgres 9.3 using gnuwin32 utils:

--cat 2021VAERSSYMPTOMS.csv|psql -h localhost -d postgres -a -c "set client_encoding=latin1; copy vaers.symptoms (VAERS_ID, SYMPTOM1, SYMPTOMVERSION1, SYMPTOM2, SYMPTOMVERSION2, SYMPTOM3, SYMPTOMVERSION3, SYMPTOM4, SYMPTOMVERSION4, SYMPTOM5, SYMPTOMVERSION5) from stdin delimiter ',' CSV HEADER"


create table vaers.allsymptoms as

(

select 

vaers_id,

symptom1 as symptom,

symptomversion1 as symptomversion

from 

vaers.symptoms

where

symptom1 is not null

union all

select 

vaers_id,

symptom2 as symptom,

symptomversion2 as symptomversion

from 

vaers.symptoms

where

symptom2 is not null

union all

select 

vaers_id,

symptom3 as symptom,

symptomversion3 as symptomversion

from 

vaers.symptoms

where

symptom3 is not null

union all

select 

vaers_id,

symptom4 as symptom,

symptomversion4 as symptomversion

from 

vaers.symptoms

where

symptom4 is not null

union all

select 

vaers_id,

symptom5 as symptom,

symptomversion5 as symptomversion

from 

vaers.symptoms

where

symptom5 is not null

);


create index ix_allsymptoms_vaers_id on vaers.allsymptoms using btree (vaers_id);

create index ix_allsymptoms_symptom on vaers.allsymptoms using btree (symptom);


drop table vaers.vax ;

create table vaers.vax 

(

id serial,

VAERS_ID varchar,

VAX_TYPE varchar,

VAX_MANU varchar,

VAX_LOT varchar,

VAX_DOSE_SERIES varchar,

VAX_ROUTE varchar,

VAX_SITE varchar,

VAX_NAME varchar

)

WITH (

  OIDS=FALSE

);


--msdos postgres 9.3

--cat 2021VAERSSYMPTOMS.csv|psql -h localhost -d postgres -a -c "set client_encoding=latin1; copy vaers.symptoms (VAERS_ID, SYMPTOM1, SYMPTOMVERSION1, SYMPTOM2, SYMPTOMVERSION2, SYMPTOM3, SYMPTOMVERSION3, SYMPTOM4, SYMPTOMVERSION4, SYMPTOM5, SYMPTOMVERSION5) from stdin delimiter ',' CSV HEADER"

--cat 2021VAERSVAX.csv|psql -h localhost -d postgres -a -c "copy vaers.vax (VAERS_ID, VAX_TYPE, VAX_MANU, VAX_LOT, VAX_DOSE_SERIES, VAX_ROUTE, VAX_SITE, VAX_NAME) from stdin delimiter ',' CSV HEADER"


create index ix_vax_vaers_id on vaers.vax using btree (vaers_id);


drop table if exists vaers.data;

create table vaers.data

(

id serial,

VAERS_ID varchar,

RECVDATE varchar,

STATE varchar,

AGE_YRS varchar,

CAGE_YR varchar,

CAGE_MO varchar,

SEX varchar,

RPT_DATE varchar,

SYMPTOM_TEXT varchar,

DIED varchar,

DATEDIED varchar,

L_THREAT varchar,

ER_VISIT varchar,

HOSPITAL varchar,

HOSPDAYS varchar,

X_STAY varchar,

DISABLE varchar,

RECOVD varchar,

VAX_DATE varchar,

ONSET_DATE varchar,

NUMDAYS varchar,

LAB_DATA varchar,

V_ADMINBY varchar,

V_FUNDBY varchar,

OTHER_MEDS varchar,

CUR_ILL varchar,

HISTORY varchar,

PRIOR_VAX varchar,

SPLTTYPE varchar,

FORM_VERS varchar,

TODAYS_DATE varchar,

BIRTH_DEFECT varchar,

OFC_VISIT varchar,

ER_ED_VISIT varchar,

ALLERGIES varchar

)

WITH (

  OIDS=FALSE

);


--msdos postgres 9.3

--cat 2021VAERSDATA.csv|psql -h localhost -d postgres -a -c "set client_encoding=latin1; copy vaers.data (VAERS_ID,RECVDATE,STATE,AGE_YRS,CAGE_YR,CAGE_MO,SEX,RPT_DATE,SYMPTOM_TEXT,DIED,DATEDIED,L_THREAT,ER_VISIT,HOSPITAL,HOSPDAYS,X_STAY,DISABLE,RECOVD,VAX_DATE,ONSET_DATE,NUMDAYS,LAB_DATA,V_ADMINBY,V_FUNDBY,OTHER_MEDS,CUR_ILL,HISTORY,PRIOR_VAX,SPLTTYPE,FORM_VERS,TODAYS_DATE,BIRTH_DEFECT,OFC_VISIT,ER_ED_VISIT,ALLERGIES) from stdin delimiter ',' CSV HEADER"

create index ix_data_vaers_id on vaers.data using btree (vaers_id);

*/



select 

vv.VAX_MANU as manufaturer,

vv.VAX_NAME as vax_name,

VAX_TYPE,

RECVDATE,

count(distinct vd.vaers_id) as num_records

from 

vaers.data vd

left join vaers.allsymptoms vs on vs.vaers_id = vd.vaers_id

left join vaers.vax vv on vv.vaers_id = vd.vaers_id

where 

vs.symptom = 'Myocard%' 

--and not vd.died = 'Y'

group by

vv.VAX_MANU,

vv.VAX_NAME,

VAX_TYPE,

RECVDATE

;


-- example:


WITH D as

(

select 

vd.recvdate,

vd.state,

vd.age_yrs,

vd.sex,

vd.recovd,

vd.died,

vd.l_threat,

vd.hospital,

vd.disable,

vd.ofc_visit,

vs.symptom,

vv.vax_manu,

vv.vax_type,

vd.vaers_id

from 

vaers.data vd

left join vaers.allsymptoms vs on vs.vaers_id = vd.vaers_id

left join vaers.vax vv on vv.vaers_id = vd.vaers_id

where

vs.symptom_text ilike '%myocardi%'

)


select 

*

from D

limit 10