[ARTFL]
id = artfl
name = ARTFL
desc = American and French Research on the Treasury of the French Language
link = https://artfl-project.uchicago.edu
public = 
private = metadata,freqs
license_type = Academic

[BPO]
id = bpo
name = BPO
desc = British Periodicals Online
link = https://proquest.libguides.com/britishperiodicals
public = 
private = raw,metadata
license_type = Commercial

[CLMET]
id = clmet
name = CLMET
desc = Corpus of Late Modern English Texts
link = https://perswww.kuleuven.be/~u0044428/clmet3_0.htm
ext_xml = .txt
public = metadata,freqs
private = txt,xml
license = https://ota.bodleian.ox.ac.uk/repository/xmlui/page/licence-ota
license_type = Academic
url_freqs = https://www.dropbox.com/s/lnvwnbzskvqsu9p/clmet_freqs.zip?dl=1
url_metadata = https://www.dropbox.com/s/m1jxj45al7b17cv/clmet_metadata.zip?dl=1

[COCA]
id = coca
name = COCA
desc = Corpus of Contemporary American English
link = https://www.english-corpora.org/coca/
public = 
private = metadata,raw,freqs,txt
license_type = Commercial

[COHA]
id = coha
name = COHA
desc = Corpus of Historical American English
link = https://www.english-corpora.org/coha/
public = 
private = metadata,raw,freqs,txt
license_type = Commercial
mfw_n=10000

[CanonFiction]
id = canon_fiction
name = CanonFiction
desc = Long history of fiction (Chadwyck, MarkMark, misc.)
link = 
public = freqs,metadata
private = txt
path_metadata = metadata.xls
license_type = Mixed
url_freqs = https://www.dropbox.com/s/i5byh4cxmo5x3pr/canon_fiction_freqs.zip?dl=1
url_metadata = https://www.dropbox.com/s/1bmvz17pcjpqnab/canon_fiction_metadata.zip?dl=1

[Chadwyck]
id = chadwyck
name = Chadwyck
desc = Chadwyck-Healey Fiction Collections
public = metadata,freqs
link = http://collections.chadwyck.co.uk/marketing/list_of_all.jsp
private = raw,txt,xml
license_type = Mixed
url_freqs = https://www.dropbox.com/s/syluxyz1mcx5495/chadwyck_freqs.zip?dl=1
url_metadata = https://www.dropbox.com/s/byqbi8sik255469/chadwyck_metadata.zip?dl=1


[ChadwyckDrama]
id = chadwyck_drama
name = ChadwyckDrama
desc = Chadwyck-Healey Drama Collections
link = http://collections.chadwyck.co.uk/marketing/list_of_all.jsp
license_type = Mixed
private = raw,txt,xml,metadata,freqs

[ChadwyckPoetry]
id = chadwyck_poetry
name = ChadwyckPoetry
desc = Chadwyck-Healey Poetry Collections
link = http://collections.chadwyck.co.uk/marketing/list_of_all.jsp
private = raw,txt,xml,metadata,freqs
license_type = Mixed

[Chicago]
id = chicago
name = Chicago
desc = U of Chicago Corpus of C20 Novels
link = https://textual-optics-lab.uchicago.edu/us_novel_corpus
public = metadata,freqs
private = txt
license_type = Academic
url_freqs = https://www.dropbox.com/s/w29o1urthijbxgn/chicago_freqs.zip?dl=1
url_metadata = https://www.dropbox.com/s/oba29ymlg7arhdu/chicago_metadata.zip?dl=1

[DTA]
id = dta
name = DTA
desc = Deutsches Text Archiv
link = http://www.deutschestextarchiv.de
license_type = Free
license = https://creativecommons.org/licenses/by-sa/4.0/
path_xml_meta = xml_meta
public = metadata,freqs,raw,raw_metadata,txt,xml
url_freqs = https://www.dropbox.com/s/nb1u0e77ng2d5mh/dta_freqs.zip?dl=1
url_metadata = https://www.dropbox.com/s/294h2suvtu6sing/dta_metadata.zip?dl=1
url_raw = http://media.dwds.de/dta/download/dta_komplett_2019-06-05.zip
url_raw_metadata = http://media.dwds.de/dta/download/dta_metadaten_oai_dc_2018-09-12.zip
url_txt = https://www.dropbox.com/s/8ez1tpa7awfb100/dta_txt.zip?dl=1
url_xml = https://www.dropbox.com/s/jy0o1cy37wioqqv/dta_xml.zip?dl=1

[DialNarr]
id = dialnarr
name = DialNarr
desc = Dialogue and Narration separated in Chadwyck-Healey Novels
link = https://doi.org/10.1093/llc/fqx031
public = metadata,freqs
private = txt
license_type = Academic
url_metadata = https://www.dropbox.com/s/jw53k1mba6eumna/dialnarr_metadata.zip?dl=1
url_freqs = https://www.dropbox.com/s/rgduzqatl4j0x5s/dialnarr_freqs.zip?dl=1

[ECCO]
id = ecco
name = ECCO
desc = Eighteenth Century Collections Online
link = https://www.gale.com/intl/primary-sources/eighteenth-century-collections-online
public = 
private = raw,txt,xml,metadata,freqs
license_type = Commercial

[ECCO_TCP]
id = ecco_tcp
name = ECCO_TCP
desc = ECCO (Text Creation Partnership)
link = https://textcreationpartnership.org/tcp-texts/ecco-tcp-eighteenth-century-collections-online/
url_freqs = https://www.dropbox.com/s/sdf5pdyifnrulyk/ecco_tcp_freqs.zip?dl=1
url_metadata = https://www.dropbox.com/s/xh991n4sohulczb/ecco_tcp_metadata.zip?dl=1
url_txt = https://www.dropbox.com/s/8sa4f6yqpz6ku3d/ecco_tcp_txt.zip?dl=1
url_xml = https://www.dropbox.com/s/vtv2iw7ujtivqss/ecco_tcp_xml.zip?dl=1
url_raw = https://www.dropbox.com/s/aubdaixvc59d8o9/ecco_tcp_raw.zip?dl=1
public = raw,metadata,freqs,txt,xml
license_type = Free

[EEBO_TCP]
id = eebo_tcp
name = EEBO_TCP
desc = Early English Books Online (curated by the Text Creation Partnership)
link = https://textcreationpartnership.org/tcp-texts/eebo-tcp-early-english-books-online/
url_freqs = https://www.dropbox.com/s/n2oocs233wh5edo/eebo_tcp_freqs.zip?dl=1
url_metadata = https://www.dropbox.com/s/th2i7jvuxksb0ma/eebo_tcp_metadata.zip?dl=1
url_txt = https://www.dropbox.com/s/otgqbs0vdli3gvb/eebo_tcp_txt.zip?dl=1
url_xml = https://www.dropbox.com/s/1wui9qjhkzy8fnm/eebo_tcp_xml.zip?dl=1
public = raw,metadata,freqs,txt,xml
license_type = Free

[ESTC]
id = estc
name = ESTC
desc = English Short Title Catalogue
link = http://estc.ucr.edu/
path_txt = json
public = 
private = metadata
license_type = Academic

[EnglishDialogues]
id = dialogues
name = EnglishDialogues
desc = A Corpus of English Dialogues, 1560-1760
link = https://ota.bodleian.ox.ac.uk/repository/xmlui/handle/20.500.12024/2507
url_freqs = https://www.dropbox.com/s/tji67pv89e61wd6/dialogues_freqs.zip?dl=1
url_metadata = https://www.dropbox.com/s/lcudgwmxdpspsc9/dialogues_metadata.zip?dl=1
url_xml = https://www.dropbox.com/s/u07u3mrrom3i9f5/dialogues_xml.zip?dl=1
public = metadata,freqs,xml,raw
license_type = Academic
license = https://ota.bodleian.ox.ac.uk/repository/xmlui/page/licence-ota

[EvansTCP]
desc = Early American Fiction
id = evans_tcp
link = https://textcreationpartnership.org/tcp-texts/evans-tcp-evans-early-american-imprints/
name = EvansTCP
path_root = evans_tcp
public = raw,metadata,freqs,xml,txt
url_freqs = https://www.dropbox.com/s/4r426a5f6jk3tq8/evans_tcp_freqs.zip?dl=1
url_metadata = https://www.dropbox.com/s/jr1j9i7wbz5uh0f/evans_tcp_metadata.zip?dl=1
url_raw = https://www.dropbox.com/s/05qtu8r2xejqpkh/evans_tcp_raw.zip?dl=1
url_txt = https://www.dropbox.com/s/ezen3zxyt9hzxxp/evans_tcp_txt.zip?dl=1
url_xml = https://www.dropbox.com/s/yg7hjf536klg04c/evans_tcp_xml.zip?dl=1
license_type = Free
year_start = 1640
year_end = 1810


[FanFic]
id = fanfic
name = FanFic
desc = Corpus of Harry Potter Fan Fiction
link = 
public =
private =


[GaleAmericanFiction]
id = gale_amfic
name = GaleAmericanFiction
desc = Gale American Fiction, 1774-1920
link = https://www.gale.com/c/american-fiction-1774-1920
url_freqs = https://www.dropbox.com/s/7tbwfcgbcincdi1/gale_amfic_freqs.zip?dl=1
url_metadata = https://www.dropbox.com/s/9ysabqrrx05832u/gale_amfic_metadata.zip?dl=1
public = metadata,freqs
private = txt,raw
license_type = Academic



[GildedAge]
id = gildedage
name = GildedAge
desc = U.S. Fiction of the Gilded Age
link = https://doi.org/10.1093/llc/fqv066
url_metadata = https://www.dropbox.com/s/fg605k0cnebf70i/gildedage_metadata.zip?dl=1
url_freqs = https://www.dropbox.com/s/i5zjhil743rm907/gildedage_freqs.zip?dl=1
public = metadata,txt,freqs
license_type = Academic


[Hathi]
id = hathi
name = Hathi
desc = Hathi Trust Research Center
link = https://www.hathitrust.org/
url_full_metadata = https://www.hathitrust.org/filebrowser/download/297178
#public = metadata,freqs
license_type = Academic


[HathiBio]
id = hathi_bio
name = HathiBio
desc = Biographies from Hathi Trust
link = https://www.ideals.illinois.edu/handle/2142/99554
url_metadata = https://www.dropbox.com/s/wth2i53gg0tq18a/hathi_bio_metadata.zip?dl=1
public = metadata,freqs
license_type = Academic


[HathiEngLit]
id = hathi_englit
name = HathiEngLit
desc = Fiction, drama, verse word frequencies from Hathi Trust
link = https://wiki.htrc.illinois.edu/display/COM/Word+Frequencies+in+English-Language+Literature
url_freqs = https://www.dropbox.com/s/jm858ej78x7h0vk/hathi_englit_freqs.zip?dl=1
url_metadata = https://www.dropbox.com/s/gnwuwkpy4jybr5r/hathi_englit_metadata.zip?dl=1
public = metadata,freqs
license_type = Academic


[HathiEssays]
id = hathi_essays
name = HathiEssays
desc = Hathi Trust volumes with "essay(s)" in title
link = https://catalog.hathitrust.org/Search/Home
path_freqs = ../hathi/freqs
path_python = hathi/hathi.py
public = metadata,freqs
license_type = Academic


[HathiLetters]
id = hathi_letters
name = HathiLetters
desc = Hathi Trust volumes with "letter(s)" in title
link = https://catalog.hathitrust.org/Search/Home
path_freqs = ../hathi/freqs
path_python = hathi/hathi.py
public = metadata,freqs
license_type = Academic


[HathiNovels]
id = hathi_novels
name = HathiNovels
desc = Hathi Trust volumes with "novel(s)" in title
link = https://catalog.hathitrust.org/Search/Home
path_freqs = ../hathi/freqs
path_python = hathi/hathi.py
public = metadata,freqs
license_type = Academic


[HathiProclamations]
id = hathi_proclamations
name = HathiProclamations
desc = Hathi Trust volumes with "proclamation(s)" in title
link = https://catalog.hathitrust.org/Search/Home
path_freqs = ../hathi/freqs
path_python = hathi/hathi.py
public = metadata,freqs
license_type = Academic


[HathiSermons]
id = hathi_sermons
name = HathiSermons
desc = Hathi Trust volumes with "sermon(s)" in title
link = https://catalog.hathitrust.org/Search/Home
path_freqs = ../hathi/freqs
path_python = hathi/hathi.py
public = metadata,freqs
license_type = Academic


[HathiStories]
id = hathi_stories
name = HathiStories
desc = Hathi Trust volumes with "story/stories" in title
link = https://catalog.hathitrust.org/Search/Home
path_freqs = ../hathi/freqs
path_python = hathi/hathi.py
public = metadata,freqs
license_type = Academic


[HathiTales]
id = hathi_tales
name = HathiTales
desc = Hathi Trust volumes with "tale(s)" in title
link = https://catalog.hathitrust.org/Search/Home
path_freqs = ../hathi/freqs
path_python = hathi/hathi.py
public = metadata,freqs
url_metadata = https://www.dropbox.com/s/hig9r9igcxp95sy/hathi_tales_metadata.zip?dl=1
license_type = Academic


[HathiTreatises]
id = hathi_treatises
name = HathiTreatises
desc = Hathi Trust volumes with "treatise(s)" in title
link = https://catalog.hathitrust.org/Search/Home
path_freqs = ../hathi/freqs
path_python = hathi/hathi.py
public = metadata,freqs
license_type = Academic


[InternetArchive]
id = internet_archive
name = InternetArchive
desc = 19th Century Novels, curated by the U of Illinois and hosted on the Internet Archive
link = https://archive.org/details/19thcennov?tab=about
path_xml = meta_xml
url_txt = https://www.dropbox.com/s/bs1ec7k9kk2jkrt/internet_archive_txt.zip?dl=1
url_metadata = https://www.dropbox.com/s/yymc8t060eik7bt/internet_archive_metadata.zip?dl=1
url_freqs = https://www.dropbox.com/s/eofh9npy5x7qn5o/internet_archive_freqs.zip?dl=1
url_data = https://www.dropbox.com/s/3yi0zcj6w5eehko/internet_archive_data.zip?dl=1
public = metadata,freqs,txt,data
license_type = Free


[LitLab]
id = litlab
name = LitLab
desc = Literary Lab Corpus of 18th and 19th Century Novels
link = https://litlab.stanford.edu/LiteraryLabPamphlet11.pdf
url_metadata = https://www.dropbox.com/s/ruur7jrckhm8nqz/litlab_metadata.zip?dl=1
url_freqs = https://www.dropbox.com/s/itoj9a8n4vrjot9/litlab_freqs.zip?dl=1
public = metadata,freqs
private = txt
license_type = Academic


[MarkMark]
id = markmark
name = MarkMark
desc = Mark Algee-Hewitt's and Mark McGurl's 20th Century Corpus
link = https://litlab.stanford.edu/LiteraryLabPamphlet8.pdf
url_freqs = https://www.dropbox.com/s/xbjugeqndquph55/markmark_freqs.zip?dl=1
url_metadata = https://www.dropbox.com/s/y5r316u8fzorx3g/markmark_metadata.zip?dl=1
public = metadata,freqs
private = txt
license_type = Academic


[NewYorker]
id = new_yorker
name = NewYorker
desc = New Yorker archives, 1925-2017
link = 
public = 
private =
license_type = Academic


[OldBailey]
id = oldbailey
name = OldBailey
desc = Old Bailey Online
link = https://www.oldbaileyonline.org/
url_freqs = https://www.dropbox.com/s/rwgt7q1f6pl65jh/oldbailey_freqs.zip?dl=1
url_metadata = https://www.dropbox.com/s/zc6osrvsgp0n1m4/oldbailey_metadata.zip?dl=1
url_txt = https://www.dropbox.com/s/yjsjnk4eyprifem/oldbailey_txt.zip?dl=1
url_xml = https://www.dropbox.com/s/90bsbu7re5tnbtp/oldbailey_xml.zip?dl=1
public = metadata,freqs,txt,xml
license_type = Free
license = https://creativecommons.org/licenses/by-nc/4.0/

[PMLA]
id = pmla
name = PMLA
desc = 
link = 
path_python = jstor_dfr/jstor_dfr.py
public = 
private =
license_type = Academic

[RavenGarside]
id = ravengarside
name = RavenGarside
desc = Raven & Garside's Bibliography of English Novels, 1770-1830
link = https://catalog.hathitrust.org/Record/004098100
public = 
private = metadata
path_metadata = metadata.xlsx
license_type = Academic

[SOTU]
id = sotu
name = SOTU
desc = State of the Union Addresses
link = https://www.kaggle.com/rtatman/state-of-the-union-corpus-1989-2017
url_freqs = https://www.dropbox.com/s/34gz1aifsot65fw/sotu_freqs.zip?dl=1
url_txt = https://www.dropbox.com/s/w73qio0thhfzdpx/sotu_txt.zip?dl=1
url_metadata = https://www.dropbox.com/s/6gyueael6smbxyg/sotu_metadata.zip?dl=1
public = metadata,freqs,txt
license_type = Free


[Sellers]
id = sellers
name = Sellers
desc = 19th Century Texts compiled by Jordan Sellers
link = http://journalofdigitalhumanities.org/1-2/the-emergence-of-literary-diction-by-ted-underwood-and-jordan-sellers/
url_freqs = https://www.dropbox.com/s/k293ip4wrswhl8j/sellers_freqs.zip?dl=1
url_metadata = https://www.dropbox.com/s/7mos2k5qx8bdc1l/sellers_metadata.zip?dl=1
url_txt = https://www.dropbox.com/s/j7e5my3s20n3xq4/sellers_txt.zip?dl=1
public = metadata,freqs,txt
license_type = Free


[SemanticCohort]
id = semantic_cohort
name = SemanticCohort
desc = Corpus used in "Semantic Cohort Method" (2012)
link = https://litlab.stanford.edu/LiteraryLabPamphlet8.pdf
url_data = https://www.dropbox.com/s/tnqna3bdsiovtnt/semantic_cohort_data.zip?dl=1
url_metadata = https://www.dropbox.com/s/f6imhtfzgpf7tvz/semantic_cohort_metadata.zip?dl=1
public = metadata,data
license_type = Free


[Spectator]
id = spectator
name = Spectator
desc = The Spectator (1711-1714)
link = http://www.gutenberg.org/ebooks/12030
url_freqs = https://www.dropbox.com/s/sil5q31833rz4n0/spectator_freqs.zip?dl=1
url_metadata = https://www.dropbox.com/s/3cw2lcza68djzj1/spectator_metadata.zip?dl=1
url_txt = https://www.dropbox.com/s/goj6xbom3qnv5u5/spectator_txt.zip?dl=1
public = metadata,freqs,txt
license_type = Free


[TedJDH]
id = tedjdh
name = TedJDH
desc = Corpus used in "Emergence of Literary Diction" (2012)
link = http://journalofdigitalhumanities.org/1-2/the-emergence-of-literary-diction-by-ted-underwood-and-jordan-sellers/
url_metadata = https://www.dropbox.com/s/ibjl7x0eyyz5zm6/tedjdh_metadata.zip?dl=1
url_txt = https://www.dropbox.com/s/8ug3h24h5bggnx7/tedjdh_txt.zip?dl=1
url_freqs = https://www.dropbox.com/s/igoxb4y7buctm5o/tedjdh_freqs.zip?dl=1
public = metadata,freqs,txt
license_type = Free

[TxtLab]
id = txtlab
name = TxtLab
desc = A multilingual dataset of 450 novels
link = https://txtlab.org/2016/01/txtlab450-a-data-set-of-multilingual-novels-for-teaching-and-research
url_raw = https://github.com/christofs/txtlab450/archive/master.zip
url_txt = https://www.dropbox.com/s/q4bm4yf76zgumi6/txtlab_txt.zip?dl=1
url_freqs = https://www.dropbox.com/s/56azeswx0omjum2/txtlab_freqs.zip?dl=1
url_metadata = https://www.dropbox.com/s/eh33qy6bcm7rvcp/txtlab_metadata.zip?dl=1
public = metadata,freqs,txt,raw
license_type = Free


[MM]
name = MM
id = mm
desc = --
link = --
path_root = mm
path_txt = /home/ryan/markmark/txt
path_xml = xml
path_metadata = /home/ryan/markmark/metadata.csv
path_python = mm.py
class_name = MM


