summaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
authorkarbon <yangbo6@chinatelecom.cn>2021-03-15 10:43:25 +0800
committerkarbon <yangbo6@chinatelecom.cn>2021-03-15 10:43:40 +0800
commit06bf4cdcedf782af4f8ff2485f0ad3bc97b08c73 (patch)
treebf297cefbb4110a5daaef35f4c7c8086c05f43af /scripts
parentf15dc37e62eee9d5d02bf58af1053750a20fad23 (diff)
feat:NLP for IBN in UUI
NLP for IBN in UUI Issue-ID: USECASEUI-525 Signed-off-by: karbon <yangbo6@chinatelecom.cn> Change-Id: I9f26312748ebaedb3115035c4af8b0abf19d59bc
Diffstat (limited to 'scripts')
-rw-r--r--scripts/api_squad.py45
-rw-r--r--scripts/api_squad_offline.py48
-rw-r--r--scripts/api_squad_online.py39
-rw-r--r--scripts/create_squad_features.py157
-rw-r--r--scripts/global_setting.py31
5 files changed, 126 insertions, 194 deletions
diff --git a/scripts/api_squad.py b/scripts/api_squad.py
index 239bbd6..f29a74b 100644
--- a/scripts/api_squad.py
+++ b/scripts/api_squad.py
@@ -1,6 +1,6 @@
# coding=utf-8
# squad interface
-# Required parameters:
+# Required parameters
# FLAGS_output_dir :the output path of the model training during training process, the output of the trained model, etc.; the output path of the model prediction during predicting process
# FLAGS_init_checkpoint_squad : model initialization path, use bert pre-trained model for training; use the output path during training for prediction
# FLAGS_predict_file : the file to be predicted, csv file
@@ -22,18 +22,13 @@ from __future__ import print_function
import collections
import json
import math
-import os
-import random
import modeling
import optimization
import tokenization
import six
import tensorflow as tf
import pandas as pd
-from global_setting import FLAGS_bert_config_file, FLAGS_vocab_file, FLAGS_init_checkpoint_squad
-
-
-
+from global_setting import FLAGS_init_checkpoint_squad
FLAGS_max_seq_length = 512
FLAGS_do_lower_case = True
@@ -53,11 +48,12 @@ FLAGS_warmup_proportion = 0.1
FLAGS_gcp_project = None
FLAGS_null_score_diff_threshold = 0.0
-def make_json(input_file,questions):
+
+def make_json(input_file, questions):
print(input_file)
data_train = pd.read_excel(input_file)
print(444)
- data_train.fillna(0,inplace=True)
+ data_train.fillna(0, inplace=True)
data_train.index = [i for i in range(len(data_train))]
question = questions
res = {}
@@ -67,17 +63,16 @@ def make_json(input_file,questions):
data_inside['title'] = 'Not available'
data_inside['paragraphs'] = []
paragraphs_inside = {}
- paragraphs_inside['context'] = data_train.loc[i,'text']
+ paragraphs_inside['context'] = data_train.loc[i, 'text']
paragraphs_inside['qas'] = []
- for ques in question:
+ for ques in question:
qas_inside = {}
qas_inside['answers'] = []
- if data_train.loc[i,ques]:
+ if data_train.loc[i, ques]:
answer_inside = {}
- answer_inside['text'] = str(data_train.loc[i,ques])
+ answer_inside['text'] = str(data_train.loc[i, ques])
answer_inside['answer_start'] = paragraphs_inside['context'].find(answer_inside['text'])
qas_inside['is_impossible'] = 0
-
else:
qas_inside['is_impossible'] = 1
answer_inside = {}
@@ -92,8 +87,6 @@ def make_json(input_file,questions):
return json.dumps(res)
-
-
class SquadExample(object):
"""A single training/test example for simple sequence classification.
@@ -164,9 +157,9 @@ class InputFeatures(object):
self.is_impossible = is_impossible
-def read_squad_examples(input_file, is_training,questions,FLAGS_version_2_with_negative):
+def read_squad_examples(input_file, is_training, questions, FLAGS_version_2_with_negative):
"""Read a SQuAD json file into a list of SquadExample."""
- data = make_json(input_file,questions)
+ data = make_json(input_file, questions)
input_data = json.loads(data)["data"]
def is_whitespace(c):
@@ -212,8 +205,7 @@ def read_squad_examples(input_file, is_training,questions,FLAGS_version_2_with_n
answer_offset = answer["answer_start"]
answer_length = len(orig_answer_text)
start_position = char_to_word_offset[answer_offset]
- end_position = char_to_word_offset[answer_offset + answer_length -
- 1]
+ end_position = char_to_word_offset[answer_offset + answer_length - 1]
# Only add answers where the text can be exactly recovered from the
# document. If this CAN'T happen it's likely due to weird Unicode
# stuff so we will just skip the example.
@@ -353,8 +345,7 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length,
doc_start = doc_span.start
doc_end = doc_span.start + doc_span.length - 1
out_of_span = False
- if not (tok_start_position >= doc_start and
- tok_end_position <= doc_end):
+ if not (tok_start_position >= doc_start and tok_end_position <= doc_end):
out_of_span = True
if out_of_span:
start_position = 0
@@ -544,7 +535,6 @@ def model_fn_builder(bert_config, init_checkpoint, learning_rate,
tf.logging.info(" name = %s, shape = %s" %
(name, features[name].shape))
- unique_ids = features["unique_ids"]
input_ids = features["input_ids"]
input_mask = features["input_mask"]
segment_ids = features["segment_ids"]
@@ -686,7 +676,7 @@ RawResult = collections.namedtuple("RawResult",
def write_predictions(all_examples, all_features, all_results, n_best_size,
max_answer_length, do_lower_case, output_prediction_file,
- output_nbest_file, output_null_log_odds_file):
+ output_nbest_file, output_null_log_odds_file, FLAGS_version_2_with_negative):
"""Write final predictions to the json file and log-odds of null if needed."""
tf.logging.info("Writing predictions to: %s" % (output_prediction_file))
tf.logging.info("Writing nbest to: %s" % (output_nbest_file))
@@ -705,7 +695,6 @@ def write_predictions(all_examples, all_features, all_results, n_best_size,
all_predictions = collections.OrderedDict()
all_nbest_json = collections.OrderedDict()
- scores_diff_json = collections.OrderedDict()
for (example_index, example) in enumerate(all_examples):
features = example_index_to_features[example_index]
@@ -713,9 +702,6 @@ def write_predictions(all_examples, all_features, all_results, n_best_size,
prelim_predictions = []
# keep track of the minimum score of null start+end of position 0
score_null = 1000000 # large and positive
- min_null_feature_index = 0 # the paragraph slice with min mull score
- null_start_logit = 0 # the start logit at the slice with min null score
- null_end_logit = 0 # the end logit at the slice with min null score
for (feature_index, feature) in enumerate(features):
result = unique_id_to_result[feature.unique_id]
start_indexes = _get_best_indexes(result.start_logits, n_best_size)
@@ -726,9 +712,6 @@ def write_predictions(all_examples, all_features, all_results, n_best_size,
result.end_logits[0]
if feature_null_score < score_null:
score_null = feature_null_score
- min_null_feature_index = feature_index
- null_start_logit = result.start_logits[0]
- null_end_logit = result.end_logits[0]
for start_index in start_indexes:
for end_index in end_indexes:
# We could hypothetically create invalid predictions, e.g., predict
diff --git a/scripts/api_squad_offline.py b/scripts/api_squad_offline.py
index 1c98a10..8a05141 100644
--- a/scripts/api_squad_offline.py
+++ b/scripts/api_squad_offline.py
@@ -1,4 +1,3 @@
-
#!/usr/bin/env python
# coding: utf-8
@@ -9,25 +8,36 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-import json
-import datetime
-import threading
-import time
from flask import Flask, abort, request, jsonify
from concurrent.futures import ThreadPoolExecutor
-import collections
-import math
import os
import random
import modeling
-import optimization
import tokenization
-import six
import tensorflow as tf
import sys
-from api_squad import *
-from global_setting import *
+
+from api_squad import FLAGS_max_seq_length
+from api_squad import FLAGS_do_lower_case
+from api_squad import FLAGS_use_tpu
+from api_squad import FLAGS_tpu_name
+from api_squad import FLAGS_tpu_zone
+from api_squad import FLAGS_gcp_project
+from api_squad import FLAGS_master
+from api_squad import FLAGS_save_checkpoints_steps
+from api_squad import FLAGS_iterations_per_loop
+from api_squad import FLAGS_num_tpu_cores
+from api_squad import FLAGS_warmup_proportion
+from api_squad import FLAGS_doc_stride
+from api_squad import model_fn_builder
+from api_squad import FeatureWriter
+from api_squad import convert_examples_to_features
+from api_squad import input_fn_builder
+
+from global_setting import CUDA_VISIBLE_DEVICES
+from global_setting import validate_flags_or_throw
+from global_setting import read_squad_examples
from global_setting import FLAGS_bert_config_file, FLAGS_vocab_file, FLAGS_init_checkpoint_squad, questions
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
@@ -35,9 +45,10 @@ os.environ["CUDA_VISIBLE_DEVICES"] = str(CUDA_VISIBLE_DEVICES)
app = Flask(__name__)
+
def serving_input_fn():
input_ids = tf.placeholder(tf.int32, [None, FLAGS_max_seq_length], name='input_ids')
- unique_id = tf.placeholder(tf.int32,[None])
+ unique_id = tf.placeholder(tf.int32, [None])
input_mask = tf.placeholder(tf.int32, [None, FLAGS_max_seq_length], name='input_mask')
segment_ids = tf.placeholder(tf.int32, [None, FLAGS_max_seq_length], name='segment_ids')
input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
@@ -45,13 +56,13 @@ def serving_input_fn():
'input_mask': input_mask,
'segment_ids': segment_ids,
'unique_ids': unique_id,
- })()
+ })()
return input_fn
+
def main(FLAGS_output_dir, FLAGS_init_checkpoint_squad, FLAGS_export_dir, FLAGS_predict_file=None, FLAGS_train_file=None, FLAGS_do_predict=False,
FLAGS_do_train=False, FLAGS_train_batch_size=16, FLAGS_predict_batch_size=8, FLAGS_learning_rate=5e-5, FLAGS_num_train_epochs=3.0,
FLAGS_max_answer_length=100, FLAGS_max_query_length=64, FLAGS_version_2_with_negative=False):
-
tf.logging.set_verbosity(tf.logging.INFO)
bert_config = modeling.BertConfig.from_json_file(FLAGS_bert_config_file)
@@ -60,7 +71,6 @@ def main(FLAGS_output_dir, FLAGS_init_checkpoint_squad, FLAGS_export_dir, FLAGS_
tf.gfile.MakeDirs(FLAGS_output_dir)
-
tokenizer = tokenization.FullTokenizer(
vocab_file=FLAGS_vocab_file, do_lower_case=FLAGS_do_lower_case)
@@ -68,7 +78,6 @@ def main(FLAGS_output_dir, FLAGS_init_checkpoint_squad, FLAGS_export_dir, FLAGS_
if FLAGS_use_tpu and FLAGS_tpu_name:
tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
FLAGS_tpu_name, zone=FLAGS_tpu_zone, project=FLAGS_gcp_project)
-
is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
run_config = tf.contrib.tpu.RunConfig(
cluster=tpu_cluster_resolver,
@@ -86,8 +95,7 @@ def main(FLAGS_output_dir, FLAGS_init_checkpoint_squad, FLAGS_export_dir, FLAGS_
if FLAGS_do_train:
train_examples = read_squad_examples(
- input_file=FLAGS_train_file, is_training=True,questions = questions,FLAGS_version_2_with_negative = FLAGS_version_2_with_negative)
-
+ input_file=FLAGS_train_file, is_training=True, questions=questions, FLAGS_version_2_with_negative=FLAGS_version_2_with_negative)
num_train_steps = int(
len(train_examples) / FLAGS_train_batch_size * FLAGS_num_train_epochs)
num_warmup_steps = int(num_train_steps * FLAGS_warmup_proportion)
@@ -174,7 +182,7 @@ class AI2Flask:
@app.route('/api/offline/train', methods=['POST'])
def text_analyse():
- if not request.json or not 'task_id' in request.json:
+ if not request.json or 'task_id' not in request.json:
abort(400)
if check_threads():
return jsonify({"Des": "Task list is full. Can not submit new task! ", "Result": "Failed to submit the training task ", "Status": "ERROR"})
@@ -227,8 +235,6 @@ class AI2Flask:
except Exception as e:
return jsonify({"Des": str(e), "Result": 'None', "Status": "Error"})
-
-
@app.route('/api/offline/status', methods=['POST'])
def todo_status():
task_id = request.json['task_id']
diff --git a/scripts/api_squad_online.py b/scripts/api_squad_online.py
index 9cc6b08..abe3d5f 100644
--- a/scripts/api_squad_online.py
+++ b/scripts/api_squad_online.py
@@ -1,4 +1,3 @@
-
#!/usr/bin/env python
# coding: utf-8
@@ -6,30 +5,15 @@
# date = 20201204
-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import json
-import datetime
-import threading
import sys
from flask import Flask, abort, request, jsonify
-from concurrent.futures import ThreadPoolExecutor
-import collections
-import math
import os
-import random
-import modeling
-import optimization
-import tokenization
-import six
-import tensorflow as tf
-import pandas as pd
-import numpy as np
-import requests
-from global_setting import *
+from global_setting import questions, tokenizer_ch, CUDA_VISIBLE_DEVICES
from create_squad_features import get_squad_feature_result
@@ -38,17 +22,15 @@ app = Flask(__name__)
class AI2Flask:
- def __init__(self, port=5000,workers=4):
+ def __init__(self, port=5000, workers=4):
self.app = app
self.port = port
-
-
@app.route('/api/online/predict', methods=['POST'])
def text_analyse():
if not request.json:
abort(400)
-
+
else:
try:
try:
@@ -56,7 +38,6 @@ class AI2Flask:
except:
title = 'Not available'
text_origin = request.json['text']
-
if len(text_origin) > 800:
text = text_origin[:800]
@@ -65,23 +46,16 @@ class AI2Flask:
result = {}
for ques in questions:
- tmp = get_squad_feature_result(title=title,text=text,tokenizer=tokenizer_ch,question=[ques],url='http://localhost:8502/v1/models/predict:predict')
+ tmp = get_squad_feature_result(title=title, text=text, tokenizer=tokenizer_ch, question=[ques], url='http://localhost:8502/v1/models/predict:predict')
result[ques] = dict(tmp)[ques]
-
-
+
print('finished!!')
return json.dumps(result)
-
-
+
except KeyError as e:
return jsonify({"Des": 'KeyError: {}'.format(str(e)), "Result": 'None', "Status": "Error"})
except Exception as e:
return jsonify({"Des": str(e), "Result": 'None', "Status": "Error"})
-
-
-
-
-
@app.route('/api/online/load', methods=['POST'])
def load_model():
@@ -105,4 +79,3 @@ class AI2Flask:
if __name__ == '__main__':
port = sys.argv[1]
AI2Flask(port=port).start()
-
diff --git a/scripts/create_squad_features.py b/scripts/create_squad_features.py
index e779b9e..ce274e0 100644
--- a/scripts/create_squad_features.py
+++ b/scripts/create_squad_features.py
@@ -1,38 +1,28 @@
+#!/usr/bin/env python
+# coding: utf-8
- #!/usr/bin/env python
- # coding: utf-8
-
- # auther = 'liuzhiyong'
- # date = 20201204
+# auther = 'liuzhiyong'
+# date = 20201204
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import json
-import datetime
-import threading
-import time
-from flask import Flask, abort, request, jsonify
-from concurrent.futures import ThreadPoolExecutor
import collections
import math
-import os
-import random
-import modeling
-import optimization
import tokenization
import six
import tensorflow as tf
-import sys
import requests
-from global_setting import *
+from global_setting import _improve_answer_span
version_2_with_negative = True
-def get_squad_feature_result(title,text,tokenizer,question, url):
+
+def get_squad_feature_result(title, text, tokenizer, question, url):
def make_json(title, text, question):
res = {}
@@ -59,7 +49,6 @@ def get_squad_feature_result(title,text,tokenizer,question, url):
res['data'].append(data_inside.copy())
return json.dumps(res)
-
def _compute_softmax(scores):
"""Compute softmax probability over raw logits."""
if not scores:
@@ -83,7 +72,7 @@ def get_squad_feature_result(title,text,tokenizer,question, url):
return probs
def get_final_text(pred_text, orig_text, do_lower_case):
-
+
def _strip_spaces(text):
ns_chars = []
ns_to_s_map = collections.OrderedDict()
@@ -152,7 +141,7 @@ def get_squad_feature_result(title,text,tokenizer,question, url):
return output_text
def _get_best_indexes(logits, n_best_size):
-
+
index_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True)
best_indexes = []
@@ -164,8 +153,7 @@ def get_squad_feature_result(title,text,tokenizer,question, url):
RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"])
- def write_predictions(all_examples, all_features, all_results, n_best_size,
- max_answer_length, do_lower_case):
+ def write_predictions(all_examples, all_features, all_results, n_best_size, max_answer_length, do_lower_case):
"""Write final predictions to the json file and log-odds of null if needed."""
example_index_to_features = collections.defaultdict(list)
@@ -236,19 +224,19 @@ def get_squad_feature_result(title,text,tokenizer,question, url):
if version_2_with_negative:
prelim_predictions.append(
- _PrelimPrediction(
- feature_index=min_null_feature_index,
- start_index=0,
- end_index=0,
- start_logit=null_start_logit,
- end_logit=null_end_logit))
+ _PrelimPrediction(
+ feature_index=min_null_feature_index,
+ start_index=0,
+ end_index=0,
+ start_logit=null_start_logit,
+ end_logit=null_end_logit))
prelim_predictions = sorted(
- prelim_predictions,
- key=lambda x: (x.start_logit + x.end_logit),
- reverse=True)
+ prelim_predictions,
+ key=lambda x: (x.start_logit + x.end_logit),
+ reverse=True)
_NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name
- "NbestPrediction", ["text", "start_logit", "end_logit"])
+ "NbestPrediction", ["text", "start_logit", "end_logit"])
seen_predictions = {}
nbest = []
@@ -282,10 +270,10 @@ def get_squad_feature_result(title,text,tokenizer,question, url):
seen_predictions[final_text] = True
nbest.append(
- _NbestPrediction(
- text=final_text,
- start_logit=pred.start_logit,
- end_logit=pred.end_logit))
+ _NbestPrediction(
+ text=final_text,
+ start_logit=pred.start_logit,
+ end_logit=pred.end_logit))
# if we didn't inlude the empty option in the n-best, inlcude it
if version_2_with_negative:
@@ -299,7 +287,7 @@ def get_squad_feature_result(title,text,tokenizer,question, url):
# just create a nonce prediction in this case to avoid failure.
if not nbest:
nbest.append(
- _NbestPrediction(text="", start_logit=0.0, end_logit=0.0))
+ _NbestPrediction(text="", start_logit=0.0, end_logit=0.0))
assert len(nbest) >= 1
@@ -339,30 +327,28 @@ def get_squad_feature_result(title,text,tokenizer,question, url):
all_nbest_json[example.qas_id] = nbest_json
return all_predictions
-
def create_int_feature(values):
feature = tf.train.Feature(
int64_list=tf.train.Int64List(value=list(values)))
return feature
-
class InputFeatures(object):
"""A single set of features of data."""
def __init__(self,
- unique_id,
- example_index,
- doc_span_index,
- tokens,
- token_to_orig_map,
- token_is_max_context,
- input_ids,
- input_mask,
- segment_ids,
- start_position=None,
- end_position=None,
- is_impossible=None):
+ unique_id,
+ example_index,
+ doc_span_index,
+ tokens,
+ token_to_orig_map,
+ token_is_max_context,
+ input_ids,
+ input_mask,
+ segment_ids,
+ start_position=None,
+ end_position=None,
+ is_impossible=None):
self.unique_id = unique_id
self.example_index = example_index
self.doc_span_index = doc_span_index
@@ -413,7 +399,7 @@ def get_squad_feature_result(title,text,tokenizer,question, url):
return cur_span_index == best_span_index
def convert_examples_to_features(examples, tokenizer, max_seq_length,
- doc_stride, max_query_length, is_training):
+ doc_stride, max_query_length, is_training):
"""Loads a data file into a list of `InputBatch`s."""
unique_id = 1000000000
@@ -487,7 +473,7 @@ def get_squad_feature_result(title,text,tokenizer,question, url):
token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index]
is_max_context = _check_is_max_context(doc_spans, doc_span_index,
- split_token_index)
+ split_token_index)
token_is_max_context[len(tokens)] = is_max_context
tokens.append(all_doc_tokens[split_token_index])
segment_ids.append(1)
@@ -518,8 +504,7 @@ def get_squad_feature_result(title,text,tokenizer,question, url):
doc_start = doc_span.start
doc_end = doc_span.start + doc_span.length - 1
out_of_span = False
- if not (tok_start_position >= doc_start and
- tok_end_position <= doc_end):
+ if not (tok_start_position >= doc_start and tok_end_position <= doc_end):
out_of_span = True
if out_of_span:
start_position = 0
@@ -574,22 +559,21 @@ def get_squad_feature_result(title,text,tokenizer,question, url):
is_impossible=example.is_impossible)
# Run callback
-
+
result.append(feature)
unique_id += 1
return result
class SquadExample(object):
-
def __init__(self,
- qas_id,
- question_text,
- doc_tokens,
- orig_answer_text=None,
- start_position=None,
- end_position=None,
- is_impossible=False):
+ qas_id,
+ question_text,
+ doc_tokens,
+ orig_answer_text=None,
+ start_position=None,
+ end_position=None,
+ is_impossible=False):
self.qas_id = qas_id
self.question_text = question_text
self.doc_tokens = doc_tokens
@@ -615,8 +599,6 @@ def get_squad_feature_result(title,text,tokenizer,question, url):
s += ", is_impossible: %r" % (self.is_impossible)
return s
-
-
def read_squad_examples(input_file, is_training):
"""Read a SQuAD json file into a list of SquadExample."""
@@ -654,7 +636,6 @@ def get_squad_feature_result(title,text,tokenizer,question, url):
is_impossible = False
if is_training:
-
if (len(qa["answers"]) != 1) and (not is_impossible):
raise ValueError(
"For training, each question should have exactly 1 answer.")
@@ -664,8 +645,7 @@ def get_squad_feature_result(title,text,tokenizer,question, url):
answer_offset = answer["answer_start"]
answer_length = len(orig_answer_text)
start_position = char_to_word_offset[answer_offset]
- end_position = char_to_word_offset[answer_offset + answer_length -
- 1]
+ end_position = char_to_word_offset[answer_offset + answer_length - 1]
# Only add answers where the text can be exactly recovered from the
# document. If this CAN'T happen it's likely due to weird Unicode
# stuff so we will just skip the example.
@@ -678,7 +658,7 @@ def get_squad_feature_result(title,text,tokenizer,question, url):
tokenization.whitespace_tokenize(orig_answer_text))
if actual_text.find(cleaned_answer_text) == -1:
tf.logging.warning("Could not find answer: '%s' vs. '%s'",
- actual_text, cleaned_answer_text)
+ actual_text, cleaned_answer_text)
continue
else:
start_position = -1
@@ -697,27 +677,24 @@ def get_squad_feature_result(title,text,tokenizer,question, url):
return examples
+ def get_result(title, text, question, url):
- def get_result(title,text,question,url):
-
- data = make_json(title,text,question)
-
-
- examples = read_squad_examples(data,False)
+ data = make_json(title, text, question)
+ examples = read_squad_examples(data, False)
predict_files = convert_examples_to_features(
- examples=examples,
- tokenizer=tokenizer,
- max_seq_length=512,
- doc_stride=128,
- max_query_length=100,
- is_training=False,
+ examples=examples,
+ tokenizer=tokenizer,
+ max_seq_length=512,
+ doc_stride=128,
+ max_query_length=100,
+ is_training=False,
)
-
+
headers = {"content-type": "application/json"}
all_results = []
- for predict_file in predict_files:
+ for predict_file in predict_files:
features = {}
features["unique_ids"] = predict_file.unique_id
features["input_mask"] = predict_file.input_mask
@@ -725,22 +702,20 @@ def get_squad_feature_result(title,text,tokenizer,question, url):
features["input_ids"] = predict_file.input_ids
data_list = []
data_list.append(features)
-
+
data = json.dumps({"instances": data_list})
-
+
json_response = requests.post(url, data=data, headers=headers)
-
x = json.loads(json_response.text)
-
+
all_results.append(
RawResult(
unique_id=predict_file.unique_id,
start_logits=x['predictions'][0]['start_logits'],
end_logits=x['predictions'][0]['end_logits']))
-
- result = write_predictions(examples, predict_files, all_results,20, 64,True)
+
+ result = write_predictions(examples, predict_files, all_results, 20, 64, True)
return result
return get_result(title, text, question, url)
-
diff --git a/scripts/global_setting.py b/scripts/global_setting.py
index bb035f9..51dfec1 100644
--- a/scripts/global_setting.py
+++ b/scripts/global_setting.py
@@ -3,34 +3,29 @@ from __future__ import division
from __future__ import print_function
-import collections
-import math
-import modeling
-import optimization
+# import collections
+# import math
+# import modeling
+# import optimization
import tokenization
-import six
-import tensorflow as tf
-import os
+# import six
+# import tensorflow as tf
+# import os
-
-
-
-### Global variables
+# Global variables
# GPU number, default: -1, means not used
-CUDA_VISIBLE_DEVICES="2"
+CUDA_VISIBLE_DEVICES = "2"
# Questions to be trained/predicted
-questions = ['Communication Service Name','Max Number of UEs','Data Rate Downlink','Latency','Data Rate Uplink','Resource Sharing Level','Mobility','Area']
+questions = ['Communication Service Name', 'Max Number of UEs', 'Data Rate Downlink', 'Latency', 'Data Rate Uplink', 'Resource Sharing Level', 'Mobility', 'Area']
# Configuration file
-FLAGS_bert_config_file = '/home/run/chinese_L-12_H-768_A-12/bert_config.json'
-FLAGS_vocab_file = '/home/run/chinese_L-12_H-768_A-12/vocab.txt'
-FLAGS_init_checkpoint_squad = '/home/run/chinese_L-12_H-768_A-12/bert_model.ckpt'
+FLAGS_bert_config_file = '/home/run/uncased_L-12_H-768_A-12/bert_config.json'
+FLAGS_vocab_file = '/home/run/uncased_L-12_H-768_A-12/vocab.txt'
+FLAGS_init_checkpoint_squad = '/home/run/uncased_L-12_H-768_A-12/bert_model.ckpt'
max_seq_length = 512
tokenizer_ch = tokenization.FullTokenizer(vocab_file=FLAGS_vocab_file, do_lower_case=True)
-
-