C Library Reference for Ver.2.1

ãͺ(tatuo-y@cl.aist-nara.ac.jp)
last update 981115

SUFARY(http://cl.aist-nara.ac.jp/lab/nlt/ss/)


 ɽ
 $SUFRAY : SUFARYѥåŸǥ쥯ȥ


 ѥ

ǽ sufary.h 򥤥󥯥롼ɤޤ礦
------ samp.c
#include <stdio.h>
#include <stdlib.h>
#include "sufary.h"
...
------

ơʲΤ褦˥ѥ뤷ޤ(ѥå Makefile ʤɤ򻲹
ˤƲ)

gcc -I$SUFARY/lib -lm -a samp samp.c $SUFARY/lib/libsufary.a

$SUFARY/tools/ ͡ʥץץबޤΤǸ滲Ȳ

 ¤

SUFARY
 arrayե롢ƥȥե롢̤ʤɤξäƤޤ

eresult
 ɤȤΤ CONT,  FAIL,  ERROR 3ĤǤ

DID
 Ф˴ؤ(DocIDե󡢵ϰ֡
ʤ)äƤޤ

 եγ

SUFARY *sa_openfiles(char *t, char *a)

ƥȥեarrayե򳫤ޤ*t ǥƥȥե̾*a 
arrayե̾ꤷޤ*a  NULL ˤȡƥȥե̾ 
+ ".ary"  arrayե̾ˤʤޤ

------ 
SUFARY *ary; ary = sa_openfiles("data.txt", NULL);
------

void sa_closefiles(SUFARY *ary)

ƥȥեarrayեĤޤ

 

eresult sa_sel(SUFARY *ary, char *s)

ɸԤʤޤ*s ǥɤꤷޤ
(suffix array ϰϤɽޤ)SUFARYѿ˳Ǽ졢
sa_left(), sa_right() ǼФޤɤ˥ޥåʸ󤬸
Ĥ CONTĤʤ FAIL ֤ޤ

------ 
if (sa_sel(ary, "Hello") == CONT) {...
------

void sa_reset(SUFARY *ary)

νԤʤޤԤʤȼθϼưŪɲø
ˤʤޤưκǽθɬפޤ(sa_openfiles()ǽ
ԤʤΤ)

eresult sa_search(SUFARY *ary, char *s, int keylen, int base_offset)

sa_sel() ٤ǡɸǤޤ*s ǥɤ
keylen ǸĹbase_offset ǥɤβʸܤ鸡
뤫(եå)ꤷޤ̤Ӥˤ sa_sel() ǽʬǤ
ɤ˥ޥåʸ󤬸Ĥ CONTĤʤ FAIL 
֤ޤ

long *sa_common_prefix_search(SUFARY *ary, char *kw, char we)

Common Prefix Search Ԥʤޤ㤨С a, abc, anny, any,
anybody, anymore, b, body, boy ʤɤñ줫ʤ뼭եФơ
 anybody Ǹ(Common Prefix Search)ȡ ̤Ȥ a,
any, anybody ֤äƤޤǲϤμ񸡺ʤɤˤ褯Ȥ
ޤ*kw ǥʸ we ñζڤʸꤷޤ֤
(longݥ)ؤκǽǤ˸̿ǼƤޤ
ʹߡñؤƥȥǥåθ̿³ޤ

------ 
SUFARY *ary = sa_openfiles("dict",NULL);
long *rslt;
rslt = sa_common_prefix_search(ary, "hello", ':');
for(i = 1; i <= (int)rslt[0]; i++) {
  char *s = sa_getline(ary, rslt[i]);
  printf("%d:%s\n",rslt[i],s);
  free(s);
}
free(rslt);
------
------ ե dict
he:
hell:Ϲ
hello:䤢
------

 ̤Ф

long sa_left(SUFARY *ary), long sa_right(SUFARY *a)

̤κüü֤ޤ
֤ͤarrayեlongȤߤʤȤźǡ
sa_aryidx2txtidx(), sa_aryidx2txtptr()+ 𤷤ƻѤޤ

------ 㡧 suffix Υƥΰ֤
if (sa_sel(ary, "Hello") == CONT)
  for(i = sa_left(ary); i <= sa_right(ary); i++)
    printf("index: %ld\n", sa_aryidx2txtidx(ary, i));
------

long sa_aryidx2txtidx(SUFARY *ary, long i)

̤(sa_left(), sa_right() ֤)ƥǤΰ֤Ѵ
ޤ

char *sa_txtidx2txtptr(SUFARY *ary, long i)

ƥǤΰ֤ƥȤΥǥåʸݥ󥿡(char*)
Ѵޤ

char *sa_aryidx2txtptr(SUFARY *ary, long i)+}

̤(sa_left(), sa_right() ֤)ʸݥ󥿡(char*)
Ѵޤ

 ̤ʸȤƤμФ

char *sa_getline(SUFARY *ary, long i)

ƥȤΥǥåǻꤵ줿դΰԤФޤ 
malloc() 򤷤ƤΤǡfree() 򤹤ɬפޤ

------ 㡧 suffix ޤԤ
for(i = sa_left(ary); i <= sa_right(ary); i++){
  char *s = sa_getline(ary, sa_aryidx2txtidx(ary, i));
  printf("%s\n", s);
  free(s);
}
-------

char *sa_getlines(SUFARY *ary, long i, int B, int A)

ƥȤΥǥåǻꤵ줿դΰԤȡBԡAԤ
Фޤ malloc() 򤷤ƤΤǡfree() 򤹤ɬפ
ޤ

char *sa_getblock(SUFARY *ary, long i, char *d1, char *d2)

ФԤޤƥȤΥǥåǻꤵ줿դΡ
ꤵ줿ʸǰϤޤ줿ǡФޤ malloc() 򤷤
Τǡfree() 򤹤ɬפޤDocIDեɬͭޤ󤬡
DocIDեѤˡ٤ʤޤ

------ 㡧 suffix ޤ൭(<ART></ART>ǰϤޤƤ)
for (i = sa_left(ary); i <= sa_right(ary) ; i++){
  char *s = sa_getblock(ary, sa_aryidx2txtidx(ary, i), "<ART>", "</ART>");
  printf("%s\n", s);
  free(s);
}
------

char *sa_getstr(SUFARY *ary, long from, long size) [Ver.2.1]

ary Υƥȥե from ʸܤ size ʸФޤ 
malloc() 򤷤ƤΤǡfree() 򤹤ɬפޤ

 DocIDեˤ뵭Ф [Ver.2.1] 

DID *sa_opendid(char *fn)

DocIDե򳫤ޤ*fn ǥե̾ꤷޤ

------ 
*did; did = sa_opendid("data.did");
------

void sa_closedid(DID *d)

DocIDեĤޤ


long sa_did_size(DID *did)

DocIDե˴ޤޤ뵭֤ޤ


void sa_didsearch(DID *did, long target)

DocIDե뤫顢target ΰ֤ޤޤ뵭γϰ֤ȵ
(ʸ)ľäơDIDѿ˳Ǽޤ뵭
ʤСϰ֤ -1 ˤʤޤDIDѿ˳Ǽ줿
ϡsa_doc_start()sa_doc_size() ǼФޤ

------ 㡧ɤδޤޤƤ뵭Ф
SUFARY *ary = sa_openfiles("data",NULL);
DID *d = sa_opendid("data.did");
if (sa_sel(ary, "Hello") == CONT)
  for (i = sa_left(ary); i <= sa_right(ary) ; i++) {
    char *s;
    DID d;
    sa_didsearch(&d, sa_aryidx2txtidx(ary, i));
    if(sa_doc_start(&d) == -1) exit(0); /* ɤޤ൭̵ */
    s = sa_getstr(ary, sa_doc_start(&d), sa_doc_size(&d)); /* Ф */
    printf("%s\n",s);
    free(s);
  }
------

long sa_doc_start(DID *did)

γϰ֤֤ޤ sa_didsearch() ԤäƤɬפ


long sa_doc_size(DID *did)

Υ֤ޤ \verb+sa_didsearch()+ ԤäƤɬפ
ޤ

long sa_doc_no(DID *did)

ƬƲܤε֤ޤ sa_didsearch() ԤäƤ
ɬפޤ뵭Ĥʤä硢-1 ˤʤޤ

