Defining & printing sequence objects

Chapter 2.1 Basic Concepts and Terminology

Click here to get instructions…
# assuming you are working within .Rproj environment
library(here)

# install (if necessary) and load other required packages
source(here("source", "LoadInstallPackages.R"))

In chapter 2.1, we introduce different notations of sequence data using example data on family biographies from age 18 to 40. The data come from a sub-sample of the German Family Panel - pairfam. For further information on the study and on how to access the full scientific use file see here.

Defining a state sequence object

We generated the example dataset in Stata. Next to the sequence variables it comprises a few additional variables which will be used to analyze the sequences in later chapters.

We import the data to R using the read_dta function from the {haven} package and inspect the names of the imported variables.

# import data
family <- read_dta(here("data", "Stata", "PartnerBirthbio.dta"))
# view variable names
names(family)
  [1] "id"             "weight40"       "sex"           
  [4] "doby_gen"       "dob"            "ethni"         
  [7] "migstatus"      "yeduc"          "sat1i4"        
 [10] "sat5"           "sat6"           "highschool"    
 [13] "church"         "biosib"         "stepsib"       
 [16] "east"           "famstructure18" "state1"        
 [19] "state2"         "state3"         "state4"        
 [22] "state5"         "state6"         "state7"        
 [25] "state8"         "state9"         "state10"       
 [28] "state11"        "state12"        "state13"       
 [31] "state14"        "state15"        "state16"       
 [34] "state17"        "state18"        "state19"       
 [37] "state20"        "state21"        "state22"       
 [40] "state23"        "state24"        "state25"       
 [43] "state26"        "state27"        "state28"       
 [46] "state29"        "state30"        "state31"       
 [49] "state32"        "state33"        "state34"       
 [52] "state35"        "state36"        "state37"       
 [55] "state38"        "state39"        "state40"       
 [58] "state41"        "state42"        "state43"       
 [61] "state44"        "state45"        "state46"       
 [64] "state47"        "state48"        "state49"       
 [67] "state50"        "state51"        "state52"       
 [70] "state53"        "state54"        "state55"       
 [73] "state56"        "state57"        "state58"       
 [76] "state59"        "state60"        "state61"       
 [79] "state62"        "state63"        "state64"       
 [82] "state65"        "state66"        "state67"       
 [85] "state68"        "state69"        "state70"       
 [88] "state71"        "state72"        "state73"       
 [91] "state74"        "state75"        "state76"       
 [94] "state77"        "state78"        "state79"       
 [97] "state80"        "state81"        "state82"       
[100] "state83"        "state84"        "state85"       
[103] "state86"        "state87"        "state88"       
[106] "state89"        "state90"        "state91"       
[109] "state92"        "state93"        "state94"       
[112] "state95"        "state96"        "state97"       
[115] "state98"        "state99"        "state100"      
[118] "state101"       "state102"       "state103"      
[121] "state104"       "state105"       "state106"      
[124] "state107"       "state108"       "state109"      
[127] "state110"       "state111"       "state112"      
[130] "state113"       "state114"       "state115"      
[133] "state116"       "state117"       "state118"      
[136] "state119"       "state120"       "state121"      
[139] "state122"       "state123"       "state124"      
[142] "state125"       "state126"       "state127"      
[145] "state128"       "state129"       "state130"      
[148] "state131"       "state132"       "state133"      
[151] "state134"       "state135"       "state136"      
[154] "state137"       "state138"       "state139"      
[157] "state140"       "state141"       "state142"      
[160] "state143"       "state144"       "state145"      
[163] "state146"       "state147"       "state148"      
[166] "state149"       "state150"       "state151"      
[169] "state152"       "state153"       "state154"      
[172] "state155"       "state156"       "state157"      
[175] "state158"       "state159"       "state160"      
[178] "state161"       "state162"       "state163"      
[181] "state164"       "state165"       "state166"      
[184] "state167"       "state168"       "state169"      
[187] "state170"       "state171"       "state172"      
[190] "state173"       "state174"       "state175"      
[193] "state176"       "state177"       "state178"      
[196] "state179"       "state180"       "state181"      
[199] "state182"       "state183"       "state184"      
[202] "state185"       "state186"       "state187"      
[205] "state188"       "state189"       "state190"      
[208] "state191"       "state192"       "state193"      
[211] "state194"       "state195"       "state196"      
[214] "state197"       "state198"       "state199"      
[217] "state200"       "state201"       "state202"      
[220] "state203"       "state204"       "state205"      
[223] "state206"       "state207"       "state208"      
[226] "state209"       "state210"       "state211"      
[229] "state212"       "state213"       "state214"      
[232] "state215"       "state216"       "state217"      
[235] "state218"       "state219"       "state220"      
[238] "state221"       "state222"       "state223"      
[241] "state224"       "state225"       "state226"      
[244] "state227"       "state228"       "state229"      
[247] "state230"       "state231"       "state232"      
[250] "state233"       "state234"       "state235"      
[253] "state236"       "state237"       "state238"      
[256] "state239"       "state240"       "state241"      
[259] "state242"       "state243"       "state244"      
[262] "state245"       "state246"       "state247"      
[265] "state248"       "state249"       "state250"      
[268] "state251"       "state252"       "state253"      
[271] "state254"       "state255"       "state256"      
[274] "state257"       "state258"       "state259"      
[277] "state260"       "state261"       "state262"      
[280] "state263"       "state264"      

 

The sequence variables begin with the prefix state. The data comprise 264 sequence variables per person (wide data format). These variables include monthly information on family biographies covering a period of 22 years. Haven imports them as numeric variables with labels attached to them (class = "haven_labelled"). With the follwing commands we can take a look at the labels.

str(family$state1) 
 dbl+lbl [1:1866] 5, 1, 5, 1, 3, 3, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1,...
 @ format.stata: chr "%22.0g"
 @ labels      : Named num [1:9] 1 2 3 4 5 6 7 8 9
  ..- attr(*, "names")= chr [1:9] "Single, no child" "Single, child(ren)" "LAT, no child" "LAT, child(ren)" ...
attributes(family$state1)$labels
      Single, no child     Single, child(ren)          LAT, no child 
                     1                      2                      3 
       LAT, child(ren)   Cohabiting, no child Cohabiting, child(ren) 
                     4                      5                      6 
     Married, no child       Married, 1 child   Married, 2+ children 
                     7                      8                      9 

The first examples in the book are based on sequences with a reduced alphabet only distinguishing partnership states. The following code generates a data set (seqvars.partner) containing the recoded sequence variables using {dplyr}.

# extracting and recoding the sequence variables (which all start with state)
# recode to reduced state space capturing partnership status only 
seqvars.partner <- family %>%
  select(starts_with("state")) %>%
  mutate_all(~(case_when(
    . < 3 ~ 1,            # Single
    . %in% c(3,4) ~ 2,    # LAT
    . %in% c(5,6) ~ 3,    # Cohabiting 
    . > 6 ~ 4,)))         # Married

Then we define two vectors storing the long and short labels for the states in the newly defined alphabet. Once that is done, we can define the data as a state sequence object. Most {TraMineR} functions for analyzing sequences require the data to have this format.

shortlab.partner <- c("S", "LAT", "COH", "MAR")
longlab.partner <-  c("Single", "LAT", "Cohabiting", "Married")

# create state sequence object
partner.month.seq <- seqdef(seqvars.partner,
                            labels = longlab.partner,
                            states = shortlab.partner,
                            weights = family$weight40)

Note that the seqdef function can include many more optional arguments. Some of these arguments - most importantly cpal - affect the appearance of state sequence plots rendered with seqplot or seqplot.rf. We cover the definition of color palettes on two separate pages (definition of color palettes; definition of grayscale palettes)

 

Sequence data notation

In chapter 2.1, we introduce different notations for printing sequences. The following commands print the sequences in STS, DSS, and SPS format.

print(partner.month.seq[8, ], format = "STS")
  Sequence                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     
8 LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-S-S-S-S-S-S-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-S-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-LAT-COH-COH-COH-COH-COH-COH-COH-COH-COH-COH-COH-COH-COH-COH-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR-MAR
seqdss(partner.month.seq[8, ])
  Sequence                     
8 LAT-S-LAT-S-LAT-S-LAT-COH-MAR
print(partner.month.seq[8, ], format = "SPS")
  Sequence                                                                 
8 (LAT,13)-(S,6)-(LAT,33)-(S,24)-(LAT,41)-(S,35)-(LAT,10)-(COH,14)-(MAR,88)

Corrections

If you see mistakes or want to suggest changes, please create an issue on the source repository.

Reuse

Text and figures are licensed under Creative Commons Attribution CC BY-NC 4.0. Source code is available at https://github.com/sa-book/sa-book.github.io, unless otherwise noted. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".