
    Of	                         S r SSK7  SSK7  SSKJr  SSKJr  \R                  S5      r	\R                  S5      r
\R                  S5      r\R                  S5      r " S	 S
\5      rg)a  
Sinica Treebank Corpus Sample

http://rocling.iis.sinica.edu.tw/CKIP/engversion/treebank.htm

10,000 parsed sentences, drawn from the Academia Sinica Balanced
Corpus of Modern Chinese.  Parse tree notation is based on
Information-based Case Grammar.  Tagset documentation is available
at https://www.sinica.edu.tw/SinicaCorpus/modern_e_wordtype.html

Language and Knowledge Processing Group, Institute of Information
Science, Academia Sinica

The data is distributed with the Natural Language Toolkit under the terms of
the Creative Commons Attribution-NonCommercial-ShareAlike License
[https://creativecommons.org/licenses/by-nc-sa/2.5/].

References:

Feng-Yi Chen, Pi-Fang Tsai, Keh-Jiann Chen, and Chu-Ren Huang (1999)
The Construction of Sinica Treebank. Computational Linguistics and
Chinese Language Processing, 4, pp 87-104.

Huang Chu-Ren, Keh-Jiann Chen, Feng-Yi Chen, Keh-Jiann Chen, Zhao-Ming
Gao, and Kuang-Yu Chen. 2000. Sinica Treebank: Design Criteria,
Annotation Guidelines, and On-line Interface. Proceedings of 2nd
Chinese Language Processing Workshop, Association for Computational
Linguistics.

Chen Keh-Jiann and Yu-Ming Hsieh (2004) Chinese Treebanks and Grammar
Extraction, Proceedings of IJCNLP-04, pp560-565.
    )*)map_tagsinica_parsez^#\S+\sz(?<=\))#.*$z:([^:()|]+):([^:()|]+)z:[^:()|]+:([^:()|]+)c                   4    \ rS rSrSrS rS rS	S jrS rSr	g)
SinicaTreebankCorpusReader4   z!
Reader for the sinica treebank.
c                     UR                  5       n[        R                  SU5      n[        R                  SU5      nU/$ )N )readline
IDENTIFIERsubAPPENDIX)selfstreamsents      D/usr/lib/python3/dist-packages/nltk/corpus/reader/sinica_treebank.py_read_block&SinicaTreebankCorpusReader._read_block9   s5     ~~b$'||B%v    c                     [        U5      $ Nr   r   r   s     r   _parse!SinicaTreebankCorpusReader._parse?   s    D!!r   Nc           
          [         R                  U5       VVs/ s H  u  p4XC4PM
     nnnU(       a;  X R                  :w  a,  U VVs/ s H  u  pCU[        U R                  X#5      4PM     nnnU$ s  snnf s  snnf r   )TAGWORDfindall_tagsetr   )r   r   tagsettwtagged_sents         r   _tagSinicaTreebankCorpusReader._tagB   sr    ,3OOD,AB,A&1v,ABf,DODO&1GDLL&45K    Cs   A/$A5c                 ,    [         R                  U5      $ r   )WORDr   r   s     r   _word SinicaTreebankCorpusReader._wordJ   s    ||D!!r    r   )
__name__
__module____qualname____firstlineno____doc__r   r   r$   r(   __static_attributes__r*   r   r   r   r   4   s    ""r   r   N)r/   nltk.corpus.reader.apinltk.corpus.reader.utilnltk.tagr   	nltk.treer   recompiler   r   r   r'   SyntaxCorpusReaderr   r*   r   r   <module>r8      s`   B % %  "ZZ
#
::n%
**.
/	zz)*"!3 "r   