{"journal": {"journal_issue": {"issue": "Long Papers", "journal_volume": {"volume": "Connecting the Knowledge..."}, "publication_date": {"day": "20", "year": "2018", "month": "06", "@media_type": "online"}}, "journal_article": {"titles": {"title": "Automatic Subject Indexing and Classification Using Text Recognition and Computer-Based Analysis of Tables of Contents"}, "program": [{"@name": "AccessIndicators", "license_ref": [{"value": "https://creativecommons.org/licenses/by/4.0", "@applies_to": "am", "@start_date": "2018-06-20"}, {"value": "https://creativecommons.org/licenses/by/4.0", "@applies_to": "vor", "@start_date": "2018-06-20"}, {"value": "https://creativecommons.org/licenses/by/4.0", "@applies_to": "tdm", "@start_date": "2018-06-20"}], "free_to_read": {"value": "", "@start_date": "2018-06-20"}}, {"related_item": {"intra_work_relation": {"value": "https://hal.science/hal-01816705v1", "@identifier-type": "uri", "@relationship-type": "isSameAs"}}}], "abstract": {"value": {"value": "This paper will describe a method for machine-based creation of high quality subject indexing and classification for both electronic and print documents using tables of contents (ToCs). The technology described here is primarily focused on electronic and print documents for which, because of technical or licensing reasons, it is not possible to index full text. However, the technology would also be useful for full text documents, because it could significantly enhance the accuracy and relevance of subject description by analyzing the structure of ToCs.", "@xml:lang": "en"}}, "doi_data": {"doi": "10.4000/proceedings.elpub.2018.19", "resource": "http://elpub.episciences.org/4607", "collection": [{"item": {"@crawler": "iParadigms", "resource": "https://hal.science/hal-01816705v1/document"}, "@property": "crawler-based"}, {"item": {"resource": {"value": "https://hal.science/hal-01816705v1/document", "@mime_type": "application/pdf"}}, "@property": "text-mining"}]}, "keywords": {"0": "[SHS.INFO]Humanities and Social Sciences/Library and information sciences", "en": ["machine learning system", "computer-generated keywords", "library automatization", "text mining", "computer-generated subject headings"]}, "@language": "en", "contributors": {"person_name": {"surname": "Pokorny", "@sequence": "first", "given_name": "Jan", "affiliations": {"institution": {"institution_name": "ENKI, o.p.s."}}, "@contributor_role": "author"}}, "publisher_item": {"item_number": {"value": "4607", "@item_number_type": "article_number"}}, "acceptance_date": {"day": "20", "year": "2018", "month": "06", "@media_type": "online"}, "publication_date": {"day": "20", "year": "2018", "month": "06", "@media_type": "online"}, "@publication_type": "full_text"}, "journal_metadata": {"@language": "en", "full_title": "ElPub - ELectronic PUBlishing"}}, "database": {"current": {"url": "http://elpub.episciences.org/4607", "flag": "imported", "type": {"title": "conferenceobject"}, "dates": {"posted_date": "2018-06-20 21:22:57", "publication_date": "2018-06-20 21:57:28", "modification_date": "2025-03-31 23:02:51", "first_submission_date": "2018-06-20 21:22:57"}, "files": {"link": "http://elpub.episciences.org/4607/pdf"}, "isTmp": false, "status": {"id": 16, "label": {"en": "published", "fr": "publié"}}, "volume": {"id": 339, "year": "2018", "number": 1, "titles": {"en": "Connecting the Knowledge Commons: From Projects to Sustainable Infrastructure"}, "position": "1", "settings": {"is_open": false, "is_current_issue": false, "is_special_issue": false}, "descriptions": {"en": "ELPUB 2018 marks the 22nd edition of the International Conference in ELectronic PUBlishing and the 10th anniversary of the meeting being held in Toronto."}, "has_proceedings": false, "bibliographical_references": ""}, "journal": {"id": 16, "url": "http://elpub.episciences.org", "code": "elpub", "name": "ElPub - ELectronic PUBlishing"}, "metrics": {"file_count": "1196", "page_count": "385"}, "section": {"id": 88, "titles": {"en": "Long Papers"}, "position": 1, "settings": {"is_open": true}, "descriptions": null}, "version": 1, "cited_by": {"464": {"id": "464", "docid": "4607", "citation": "{\"0\":{\"type\":null,\"author\":\"Worrawan Wandee; Pokpong Songmuang, 0000-0002-4831-686X\",\"year\":2022,\"title\":\"Hierarchical Multi-Label Classification of Library Subject Headings\",\"event_place\":\"\",\"source_title\":\"2022 International Conference on Cybernetics and Innovations (ICCI)\",\"volume\":\"8\",\"issue\":\"\",\"page\":\"1-5\",\"doi\":\"10.1109/icci54995.2022.9744189\",\"oa_link\":\"\"},\"1\":{\"type\":null,\"author\":\"Michalis Sfakakis, 0000-0003-2973-7455; Leonidas Papachristopoulos, 0000-0002-4148-2689; Kyriaki Zoutsou, 0000-0001-9600-5612; Giannis Tsakonas, 0000-0002-8786-9440; Christos Papatheodorou, 0000-0002-9025-6469\",\"year\":2019,\"title\":\"Automated Subject Indexing of Domain Specific Collections Using Word Embeddings and General Purpose Thesauri\",\"event_place\":\"\",\"source_title\":\"Communications in computer and information science\",\"volume\":\"\",\"issue\":\"\",\"page\":\"103-114\",\"doi\":\"10.1007/978-3-030-36599-8_9\",\"oa_link\":\"\"}}", "source_id": "13", "updated_at": "2025-11-22 03:09:21", "source_id_name": "OpenCitations"}}, "mainPdfUrl": "https://hal.science/hal-01816705v1/document", "repository": {"id": "1", "name": "HAL", "type": "repository", "status": "1", "api_url": "https://api.archives-ouvertes.fr", "doc_url": "https://hal.science/hal-01816705v1", "base_url": "https://api.archives-ouvertes.fr/oai/hal/", "paper_url": "https://hal.science/hal-01816705v1/document", "doi_prefix": "", "identifier": "oai:HAL:hal-01816705v1"}, "identifiers": {"concept_identifier": null, "document_item_number": 4607, "permanent_item_number": 4607, "repository_identifier": "hal-01816705"}, "classifications": [], "original_language": "en", "position_in_volume": 3, "graphical_abstract_file": ""}, "previous_versions": null, "first_version_item_number": 4607, "latest_version_item_number": 4607}}