| | --- |
| | license: apache-2.0 |
| | widget: |
| | - text: "<|endoftext|>\nfunction getDateAfterNDay(n){\n return moment().add(n, 'day')\n}\n// docstring\n/**" |
| | --- |
| | |
| | ## Basic info |
| |
|
| | model based [Salesforce/codegen-350M-mono](https://huggingface.co/Salesforce/codegen-350M-mono) |
| |
|
| | fine-tuned with data [codeparrot/github-code-clean](https://huggingface.co/datasets/codeparrot/github-code-clean) |
| |
|
| | data filter by JavaScript and TypeScript |
| |
|
| | ## Usage |
| |
|
| | ```python |
| | from transformers import AutoTokenizer, AutoModelForCausalLM |
| | |
| | model_type = 'kdf/javascript-docstring-generation' |
| | tokenizer = AutoTokenizer.from_pretrained(model_type) |
| | model = AutoModelForCausalLM.from_pretrained(model_type) |
| | |
| | inputs = tokenizer('''<|endoftext|> |
| | function getDateAfterNDay(n){ |
| | return moment().add(n, 'day') |
| | } |
| | |
| | // docstring |
| | /**''', return_tensors='pt') |
| | |
| | doc_max_length = 128 |
| | |
| | generated_ids = model.generate( |
| | **inputs, |
| | max_length=inputs.input_ids.shape[1] + doc_max_length, |
| | do_sample=False, |
| | return_dict_in_generate=True, |
| | num_return_sequences=1, |
| | output_scores=True, |
| | pad_token_id=50256, |
| | eos_token_id=50256 # <|endoftext|> |
| | ) |
| | |
| | ret = tokenizer.decode(generated_ids.sequences[0], skip_special_tokens=False) |
| | print(ret) |
| | |
| | ``` |
| |
|
| | ## Prompt |
| |
|
| | You could give model a style or a specific language, for example: |
| |
|
| | ```python |
| | inputs = tokenizer('''<|endoftext|> |
| | function add(a, b){ |
| | return a + b; |
| | } |
| | // docstring |
| | /** |
| | * Calculate number add. |
| | * @param a {number} the first number to add |
| | * @param b {number} the second number to add |
| | * @return the result of a + b |
| | */ |
| | <|endoftext|> |
| | function getDateAfterNDay(n){ |
| | return moment().add(n, 'day') |
| | } |
| | // docstring |
| | /**''', return_tensors='pt') |
| | |
| | doc_max_length = 128 |
| | |
| | generated_ids = model.generate( |
| | **inputs, |
| | max_length=inputs.input_ids.shape[1] + doc_max_length, |
| | do_sample=False, |
| | return_dict_in_generate=True, |
| | num_return_sequences=1, |
| | output_scores=True, |
| | pad_token_id=50256, |
| | eos_token_id=50256 # <|endoftext|> |
| | ) |
| | |
| | ret = tokenizer.decode(generated_ids.sequences[0], skip_special_tokens=False) |
| | print(ret) |
| | |
| | inputs = tokenizer('''<|endoftext|> |
| | function add(a, b){ |
| | return a + b; |
| | } |
| | // docstring |
| | /** |
| | * 计算数字相加 |
| | * @param a {number} 第一个加数 |
| | * @param b {number} 第二个加数 |
| | * @return 返回 a + b 的结果 |
| | */ |
| | <|endoftext|> |
| | function getDateAfterNDay(n){ |
| | return moment().add(n, 'day') |
| | } |
| | // docstring |
| | /**''', return_tensors='pt') |
| | |
| | doc_max_length = 128 |
| | |
| | generated_ids = model.generate( |
| | **inputs, |
| | max_length=inputs.input_ids.shape[1] + doc_max_length, |
| | do_sample=False, |
| | return_dict_in_generate=True, |
| | num_return_sequences=1, |
| | output_scores=True, |
| | pad_token_id=50256, |
| | eos_token_id=50256 # <|endoftext|> |
| | ) |
| | |
| | ret = tokenizer.decode(generated_ids.sequences[0], skip_special_tokens=False) |
| | print(ret) |
| | |
| | ``` |