- 
                Notifications
    
You must be signed in to change notification settings  - Fork 21
 
Open
Description
model = FastFit.from_pretrained("fast-fit")
model
gives
FastFit(
  (encoder): MPNetModel(
    (embeddings): MPNetEmbeddings(
      (word_embeddings): Embedding(30527, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): MPNetEncoder(
      (layer): ModuleList(
        (0-11): 12 x MPNetLayer(
          (attention): MPNetAttention(
            (attn): MPNetSelfAttention(
              (q): Linear(in_features=768, out_features=768, bias=True)
              (k): Linear(in_features=768, out_features=768, bias=True)
              (v): Linear(in_features=768, out_features=768, bias=True)
              (o): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (intermediate): MPNetIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
            (intermediate_act_fn): GELUActivation()
          )
          (output): MPNetOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (relative_attention_bias): Embedding(32, 12)
    )
    (pooler): MPNetPooler(
      (dense): Linear(in_features=768, out_features=768, bias=True)
      (activation): Tanh()
    )
  )
  (projection): Linear(in_features=768, out_features=128, bias=False)
  (clf): Linear(in_features=768, out_features=17999, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (batch_norm): BatchNorm1d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (clf_criterion): CrossEntropyLoss()
  (sim_criterion): SupConLoss()
  (all_docs): ParameterList(
      (0): Parameter containing: [torch.int64 of size 17999x10]
      (1): Parameter containing: [torch.int64 of size 17999x10]
  )
)
plison, alexandergrote and philmas
Metadata
Metadata
Assignees
Labels
No labels