To cite 'textometry' in publications use:
TXM Team (2013). TXM Manual. ICAR Laboratory, Lyon University & CNRS, Lyon, France. https://txm.gitpages.huma-num.fr/textometrie?lang=en.
Heiden, Serge (2010). “The TXM Platform: Building Open-Source Textual Analysis Software Compatible with the TEI Encoding Scheme.” In Otoguro R, Ishikawa K (eds.), Proceedings of the 24th Pacific Asia Conference on Language, Information and Computation (PACLIC 24). 4-7 November 2010, Sendai, 389–398. https://halshs.archives-ouvertes.fr/halshs-00549764/en.
Lafon, Pierre (1980). “Sur la variabilit'e de la fr'equence des formes dans un corpus.” Mots, 1(1), 127–165. ISSN 0243-6450, doi:10.3406/mots.1980.1008, https://www.persee.fr/doc/mots_0243-6450_1980_num_1_1_1008.
Corresponding BibTeX entries:
@Manual{,
title = {TXM Manual},
author = {{TXM Team}},
organization = {ICAR Laboratory, Lyon University & CNRS},
address = {Lyon, France},
year = {2013},
url = {https://txm.gitpages.huma-num.fr/textometrie?lang=en},
}
@InProceedings{,
title = {The TXM Platform: Building Open-Source Textual Analysis
Software Compatible with the TEI Encoding Scheme},
booktitle = {Proceedings of the 24th Pacific Asia Conference on
Language, Information and Computation (PACLIC 24). 4-7 November
2010, Sendai},
publisher = {Institute for Digital Enhancement of Cognitive
Development, Waseda University},
author = {{Heiden, Serge}},
editor = {Ryo Otoguro and Kiyoshi Ishikawa},
month = {nov},
year = {2010},
keywords = {xml-tei corpora ; search engine ; statistical analysis
; textometry ; open-source},
pages = {389--398},
url = {https://halshs.archives-ouvertes.fr/halshs-00549764/en},
abstract = {This paper describes the rationale and design of an
XML-TEI encoded corpora compatible analysis platform for text
mining called TXM. The design of this platform is based on a
synthesis of the best available algorithms in existing textometry
software. It also relies on identifying the most relevant
open-source technologies for processing textual resources encoded
in XML and Unicode, for efficient full-text search on annotated
corpora and for statistical data analysis. The architecture is
based on a Java toolbox articulating a full-text search engine
component with a statistical computing environment and with an
original import environment able to process a large variety of
data sources, including XML-TEI, and to apply embedded NLP tools
to them. The platform is distributed as an open-source Eclipse
project for developers and in the form of two demonstrator
applications for end users: a standard application to install on
a workstation and an online web application framework.},
}
@Article{,
title = {Sur la variabilit'e de la fr'equence des formes dans un
corpus},
author = {{Lafon, Pierre}},
journal = {Mots},
volume = {1},
number = {1},
pages = {127--165},
year = {1980},
issn = {0243-6450},
url = {https://www.persee.fr/doc/mots_0243-6450_1980_num_1_1_1008},
doi = {10.3406/mots.1980.1008},
language = {fre},
publisher = {Presses de la Fondation nationale des sciences
politiques, 27 Rue Saint Guillaume, 75341, Paris},
abstract = {FORM FREQUENCY VARIABILITY IN A CORPUS P. L. is
studying the distribution of the frequency of a word in a corpus
divided into several fragments. Contrary to current work in this
field, he proposes to use the formulae of the hypergeometric
distribution, choosing the whole corpus as the norm of the
fragments. These choices lead to the calculation of a
probabilistic index valid for the whole range of frequencies. The
calculation of this index for every form in the vocabulary
enables us to define two complementary subsets of forms : that of
specific forms and that of basic forms, and to attribute to each
fragment its own lexical specifications.},
}